import urllib2 archive_file = urllib2.urlopen('https://raw.githubusercontent.com/HappyPenguin/OpenScience/master/FSLmailinglist_archive_April2014.txt') archive_lines = archive_file.readlines() archive_lines = [ line.rstrip() for line in archive_lines] for line in archive_lines[:10]: print line len(archive_lines) archive_messages_n = [ x.rsplit('(',1)[1].rstrip(' messages)') for x in archive_lines ] print archive_messages_n[:10] import numpy as np array = np.array(archive_messages_n, dtype=np.int) print array.sum()