import operator
# read data
with open( "imdb.tsv", "r" ) as txt:
lines = txt.readlines()
# remove line breaks
lines = [ line.strip() for line in lines ]
# break each line into tokens
tuples = [ line.split( "\t" ) for line in lines ]
# remove the column names from the results
attributes = tuples.pop( 0 )
# sort the whole tuples in terms of country name
tuples = sorted(tuples, key=operator.itemgetter(4))
# make separate lists for each column of txt file and remove line breaks
ranks = [ int(tuple[0].strip()) for tuple in tuples ]
titles = [ tuple[1].strip() for tuple in tuples ]
years = [ int(tuple[2].strip() ) for tuple in tuples ]
directors = [ tuple[3].strip() for tuple in tuples ]
countries = [ tuple[4].strip() for tuple in tuples ]
figsize(6,6)
title("Best 30 Movies of All Time")
xlabel("Year of Release")
ylabel("Ranking (lower is better)")
scatter(years, ranks, c="blue", s=100, marker="o");
# pick the country names without duplicates
unique_countries = sorted(list(set(countries)))
# match ranking position to dot size
min_dot_size = 100
max_dot_size = 500
dot_size_range = max_dot_size - min_dot_size
min_rank = 1
max_rank = max(ranks)
# define plot properties
title("Best 30 Movies of All Time by Country")
xlabel("Year of Release")
ylabel("Ranking (lower is better)")
grid(True, which="both")
xlim((1920, 2040))
figsize(10,6)
color_map = matplotlib.cm.get_cmap("Paired")
figsize(10,6)
# plot dots for every country separately
for country_to_plot_index in range( len( unique_countries ) ):
years_to_plot = [ years[i] for i in range(len(tuples)) if countries[i]==unique_countries[country_to_plot_index] ]
ranks_to_plot = [ ranks[i] for i in range(len(tuples)) if countries[i]==unique_countries[country_to_plot_index] ]
scatter(years_to_plot,
ranks_to_plot,
c = color_map( float( country_to_plot_index ) / len( unique_countries ) ),
s = 100,
marker = "o",
label = unique_countries[ country_to_plot_index ] );
legend(loc="lower right", ncol=1);