from IPython.core.display import HTML
styles = open("Style.css").read()
HTML(styles)

import datetime as dt
from collections import defaultdict

import matplotlib.pyplot as plt
import pandas

matplotlib.rcParams['figure.figsize'] = [8,4] # Set default figure size

# Set this variable to the directory where the GDELT data files are
PATH = "GDELT.1979-2012.reduced/"

# Peeking at the data:
!head -n 5 GDELT.1979-2012.reduced/2010.reduced.txt

monthly_data = defaultdict(int) # We'll use this to store the counts
count = 0 # While we're at it, let's count how many records there are, total.

for year in range(1979, 2013):
    #print year # Uncomment this line to see the program's progress.
    f = open(PATH + str(year) + ".reduced.txt")
    next(f) # Skip the header row.
    for raw_row in f:
        try:
            row = raw_row.split("\t")
            # Get the date, which is in YYYYMMDD format:
            date_str = row[0]
            year = int(date_str[:4])
            month = int(date_str[4:6])
            date = dt.datetime(year, month, 1)
            monthly_data[date] += 1
            count += 1
        except:
            pass # Skip error-generating rows for now. 

print "Total rows processed:", count
print "Total months:", len(monthly_data)

monthly_events = pandas.Series(monthly_data)
monthly_events.plot()

material_coop = defaultdict(int) 
material_conf = defaultdict(int)

for year in range(1979, 2013):
    f = open(PATH + str(year) + ".reduced.txt")
    next(f) # Skip the header row.
    for raw_row in f:
        try:
            row = raw_row.split("\t")
            # Check the quadcat, and skip if not relevant:
            if row[4] not in ['1', '4']:
                continue
                
            # Get the date, which is in YYYYMMDD format:
            date_str = row[0]
            year = int(date_str[:4])
            month = int(date_str[4:6])
            date = dt.datetime(year, month, 1)
            if row[4] == '1':
                material_coop[date] += 1
            elif row[4] == '4':
                material_conf[date] += 1

        except:
            pass # Skip error-generating rows for now. 

# Convert both into time series: 
monthly_coop = pandas.Series(material_coop)
monthly_conf = pandas.Series(material_conf)

# Join the time series together into a DataFrame
trends = pandas.DataFrame({"Material_Cooperation": monthly_coop,
                           "Material_Conflict": monthly_conf})
trends.plot()

data = []
for year in range(1979, 2013):
    f = open(PATH + str(year) + ".reduced.txt")
    for raw_row in f:
        row = raw_row.split("\t")
        actor1 = row[1][:3]
        actor2 = row[2][:3]
        both = actor1 + actor2
        if "ISR" in both and ("PAL" in both or "PSE" in both):
            year = int(row[0][:4])
            month = int(row[0][4:6])
            day = int(row[0][6:])
            quad_cat = row[4]
            data.append([year, month, day, actor1, actor2, quad_cat])
print "Israeli-Palestinian Conflict Records:", len(data)

ilpalcon = pandas.DataFrame(data, 
    columns=["Year", "Month", "Day", "Actor1", "Actor2", "QuadCat"])
ilpalcon.head()

pivot = pandas.pivot_table(ilpalcon, values="Day", rows=["Year", "Month"], cols="QuadCat", aggfunc=len)
pivot = pivot.fillna(0) # Replace any missing data with zeros
pivot = pivot.reset_index() # Make Year and Month regular columns
pivot.head()

# date-generating function:
get_date = lambda x: dt.datetime(year=int(x[0]), month=int(x[1]), day=1)

pivot["date"] = pivot.apply(get_date, axis=1) # Apply row-wise
pivot = pivot.set_index("date") # Set the new date column as the index

# Now we no longer need the Year and Month columns, so let's drop them:
pivot = pivot[["1", "2", "3", "4"]]
# Rename the QuadCat columns
pivot = pivot.rename(columns={"1": "Material Cooperation",
                              "2": "Verbal Cooperation",
                              "3": "Verbal Conflict",
                              "4": "Material Conflict"})

pivot.plot(figsize=(8,4))

pivot["Peace_Index"] = (pivot["Material Cooperation"] 
                        + pivot["Verbal Cooperation"] 
                        - pivot["Verbal Conflict"]
                        - pivot["Material Conflict"])
pivot["Peace_Index"].plot(figsize=(8,4))