import matplotlib.pyplot as plt
import csv

%matplotlib inline

# Let's get a list of available files
%ls -1 *.csv

# Open up the sunspot data and read out the header
sd = open("sunspot_num.csv", "rb")
csv_reader = csv.DictReader(sd)
csv_reader.fieldnames

# Looks like a simple set of data of monthly counts
# of sunspot activity. Read the rest of the data and 
# plot the sunspot data
year = []
spot = []
for row in csv_reader:
    year.append(float(row["YEAR"]) + (float(row["MON"]) - 1)/12)
    spot.append(float(row["SSN"]))
    
print "Found %d points" % len(year)

# Plot the data
plt.figure(figsize=(8,4))
plt.plot(year, spot)
plt.title("Sunspot Count")
plt.grid()

# Open up the World Bank Data
# I had the full data set for the demo, but for the repository, I only 
# included a scrubbed subset of the data I was using. If you'd like to
# use the full data set, you can find it at the link I provide in the
# "Resources" section.
#wd = open("WDI_Data.csv", "rb")
wd = open("WDI_Data_US_China.csv", "rb")
csv_reader = csv.DictReader(wd)
csv_reader.fieldnames

# Looks like rows of data where each row represent an indicator
# for a particular country. And we have data that can range
# from 1960 to 2013. Let's gather all the rows for USA and China.
# Be patient. This requires a little work. We will convert
# each row into a dictionary that can be access by field name.
usa = []
china = []
for row in csv_reader:
    if row["Country Code"] == "USA":
        usa.append({f: row[f] for f in csv_reader.fieldnames})
    elif row["Country Code"] == "CHN":
        china.append({f: row[f] for f in csv_reader.fieldnames})

# How many indicators are there?
print len(usa), len(china)

# Whoa! That's a lot of data. Let's find all the rows that
# have something to do with savings.
usa_saving = []
for row in usa:
    if row["Indicator Name"].lower().find("savings") != -1:
        usa_saving.append(row)

china_saving = []
for row in china:
    if row["Indicator Name"].lower().find("savings") != -1:
        china_saving.append(row)
        
print len(usa_saving), len(china_saving)

# That's better. Let's print out the savings indicators.
for s in usa_saving:
    print s["Indicator Name"], ", ", s["Indicator Code"]

# Let's look at "Gross savings (% of GDP)"
# Using the Indicator Code
usa_year = []
usa_rate = []
for s in usa_saving:
    if s["Indicator Code"] == "NY.GNS.ICTR.ZS":
        for Y in range(1960, 2014):
            # Ignore years without data
            if len(s[str(Y)]):
                usa_year.append(Y)
                usa_rate.append(float(s[str(Y)]))

china_year = []
china_rate = []
for s in china_saving:
    if s["Indicator Code"] == "NY.GNS.ICTR.ZS":
        for Y in range(1960, 2014):
            # Ignore years without data
            if len(s[str(Y)]):
                china_year.append(Y)
                china_rate.append(float(s[str(Y)]))
                
plt.figure(figsize=(8,4))
plt.title("Gross savings (% of GDP)")
plt.plot(usa_year, usa_rate, china_year, china_rate)
plt.grid()