#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt import pandas as pd import seaborn as sns pd.set_option('display.width', 500) pd.set_option('display.max_columns', 100) # In[45]: 1+1 # #here is a heading # # ##a smaller one # # ###an even smaller one # # ####even less important # # and here is some text # # hello world # # http://www.google.com # # [hello world](http://www.google.com) # # here is an equation $E = m c^2$ # # $$ \alpha = \beta $$ # # hello **bold**, nor *emph* # In[11]: #also https://dl.dropboxusercontent.com/u/75194/tao-full.csv dfsmall=pd.read_csv("https://dl.dropboxusercontent.com/u/75194/tao.csv") dfsmall.head() # Primary question: Can we detect an El Nino event, based on sea surface temperature? What about other observed variables? # In[13]: del dfsmall["Unnamed: 0"] dfsmall.head() # In[12]: dfsmall.dtypes # In[14]: dfsmall.shape # In[16]: dfsmall.groupby('year').mean()#look up groupby # In[39]: dfsmall.groupby('year').describe() # In[19]: dfsmall.columns # In[20]: for feat in dfsmall.columns: print feat, dfsmall[feat].isnull().mean() # In[27]: for k,v in dfsmall.groupby('year'): for feat in dfsmall.columns: print k, feat, v[feat].isnull().mean() # In[28]: dfsmallnew=dfsmall[dfsmall['humidity'].notnull()] # In[29]: dfsmallnew.shape # In[30]: dfsmallnew=dfsmallnew[dfsmallnew['air.temp'].notnull()] dfsmallnew=dfsmallnew[dfsmallnew['sea.surface.temp'].notnull()] dfsmallnew.shape # In[31]: dfsmallnew.head() # In[32]: dfsmallnew.groupby('year').count() # In[34]: sns.pairplot(dfsmallnew, hue="year", size=2.5, alpha=0.3);#ahap happened here? #look up http://stanford.edu/~mwaskom/software/seaborn/tutorial/axis_grids.html # In[35]: sns.pairplot(dfsmallnew, hue="year", size=2.5, plot_kws={'alpha':0.4}); # In[36]: vars=list(dfsmallnew.columns) vars # In[38]: sns.pairplot(dfsmallnew, hue="year", size=2.5, plot_kws={'alpha':0.4}, vars=vars[1:]); # In[40]: g = sns.PairGrid(dfsmallnew, vars=["sea.surface.temp", "air.temp"], hue="year") g.map(plt.scatter) # In[43]: g = sns.PairGrid(dfsmallnew, vars=["sea.surface.temp", "air.temp"], hue="year") g.map_upper(plt.scatter, alpha=0.3) g.map_diag(sns.kdeplot, alpha=0.5) g.map_lower(sns.kdeplot, alpha=0.5) # In[ ]: dfsmall.