import matplotlib.pyplot as plt import csv %matplotlib inline # Let's get a list of available files %ls -1 *.csv # Open up the sunspot data and read out the header sd = open("sunspot_num.csv", "rb") csv_reader = csv.DictReader(sd) csv_reader.fieldnames # Looks like a simple set of data of monthly counts # of sunspot activity. Read the rest of the data and # plot the sunspot data year = [] spot = [] for row in csv_reader: year.append(float(row["YEAR"]) + (float(row["MON"]) - 1)/12) spot.append(float(row["SSN"])) print "Found %d points" % len(year) # Plot the data plt.figure(figsize=(8,4)) plt.plot(year, spot) plt.title("Sunspot Count") plt.grid() # Open up the World Bank Data # I had the full data set for the demo, but for the repository, I only # included a scrubbed subset of the data I was using. If you'd like to # use the full data set, you can find it at the link I provide in the # "Resources" section. #wd = open("WDI_Data.csv", "rb") wd = open("WDI_Data_US_China.csv", "rb") csv_reader = csv.DictReader(wd) csv_reader.fieldnames # Looks like rows of data where each row represent an indicator # for a particular country. And we have data that can range # from 1960 to 2013. Let's gather all the rows for USA and China. # Be patient. This requires a little work. We will convert # each row into a dictionary that can be access by field name. usa = [] china = [] for row in csv_reader: if row["Country Code"] == "USA": usa.append({f: row[f] for f in csv_reader.fieldnames}) elif row["Country Code"] == "CHN": china.append({f: row[f] for f in csv_reader.fieldnames}) # How many indicators are there? print len(usa), len(china) # Whoa! That's a lot of data. Let's find all the rows that # have something to do with savings. usa_saving = [] for row in usa: if row["Indicator Name"].lower().find("savings") != -1: usa_saving.append(row) china_saving = [] for row in china: if row["Indicator Name"].lower().find("savings") != -1: china_saving.append(row) print len(usa_saving), len(china_saving) # That's better. Let's print out the savings indicators. for s in usa_saving: print s["Indicator Name"], ", ", s["Indicator Code"] # Let's look at "Gross savings (% of GDP)" # Using the Indicator Code usa_year = [] usa_rate = [] for s in usa_saving: if s["Indicator Code"] == "NY.GNS.ICTR.ZS": for Y in range(1960, 2014): # Ignore years without data if len(s[str(Y)]): usa_year.append(Y) usa_rate.append(float(s[str(Y)])) china_year = [] china_rate = [] for s in china_saving: if s["Indicator Code"] == "NY.GNS.ICTR.ZS": for Y in range(1960, 2014): # Ignore years without data if len(s[str(Y)]): china_year.append(Y) china_rate.append(float(s[str(Y)])) plt.figure(figsize=(8,4)) plt.title("Gross savings (% of GDP)") plt.plot(usa_year, usa_rate, china_year, china_rate) plt.grid()