#!/usr/bin/env python # coding: utf-8 # # Essential Introduction to pandas # ## 1. First we will set up our environment in Jupyter # ### If you don't have pandas_datareader, at the command line enter # ### conda install -c pandas-datareader or pip install if you don't have Anaconda # # In[71]: import numpy as np import pandas as pd get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import pandas_datareader as pdr # ## 2. Get data (5 years, GOOG AMZN) # In[73]: amzn = pdr.get_data_yahoo('AMZN', '20121101') goog = pdr.get_data_yahoo('GOOG', '2012-11-01') # ## 3. View head, tail, info # In[76]: amzn.head() # In[77]: amzn.tail() # In[79]: print(amzn.info(), goog.info()) # ## 4. Filtering using .loc, .iloc, index # In[83]: amzn.loc['20121101','Open'] # In[85]: amzn.iloc[0,0] # In[86]: amzn[amzn.index > '2017-11-01'] # In[ ]: # ## 5. Filtering with criteria # In[87]: print('Mean: %.3f Std: %.3f'% (amzn['Volume'].mean(), amzn['Volume'].std()) ) # In[88]: high = amzn['Volume'].mean() + 2* amzn['Volume'].std() # In[91]: len(amzn[amzn.Volume >high]) / len(amzn) # In[ ]: # ## 6. Adding Columns # In[92]: amzn['pct_chng'] = amzn.Close.pct_change() # In[93]: amzn.head() # ## 7. Appending data # In[97]: goog_11 = goog.loc['2017-11'] amzn_11 = amzn.loc['2017-11'] # In[100]: goog_11.append(amzn_11).tail() # ## 8. Merging Columns from Different DataFrames # In[101]: close = pd.DataFrame({'AZMN' : amzn.Close, 'GOOG' : goog.Close}) close.head() # In[ ]: # In[ ]: # ## 9. Grouping and sorting data # In[105]: goog['Volume'][goog.index > '2017'].resample('M').mean().plot() # In[ ]: # In[ ]: # ## 10. Cumulative Sum and Rolling Calculations # In[106]: amzn_change = np.log(amzn.Close /amzn.Close.shift(1)) goog_change = np.log(goog.Close / goog.Close.shift(1)) # In[108]: amzn_change.cumsum().plot() goog_change.cumsum().plot() # ## 11. Create new dataFrame and save to csv # In[111]: new = pd.DataFrame({'AMZN': amzn_change[1:], 'GOOG' : goog_change[1:]}) new.to_csv('stocks.csv') # In[112]: pd.read_csv('stocks.csv').head() # In[ ]: # ## 12. Change the index and remove columns # # In[113]: new_stocks = pd.read_csv('stocks.csv') new_stocks.index = new_stocks.Date new_stocks.head() # In[114]: new_stocks.drop(['Date'], axis=1, inplace=True) new_stocks.head() # ## 13. Rename Columns # In[120]: new_stocks.rename(columns={'AMZN': 'AMZN_chng'}, inplace=True) new_stocks.head() # In[ ]: # In[ ]: