import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import pandas_datareader as pdr
amzn = pdr.get_data_yahoo('AMZN', '20121101')
goog = pdr.get_data_yahoo('GOOG', '2012-11-01')
amzn.head()
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2012-11-01 | 234.229996 | 234.550003 | 231.339996 | 232.139999 | 232.139999 | 3905100 |
2012-11-02 | 234.009995 | 237.399994 | 232.110001 | 232.419998 | 232.419998 | 3374700 |
2012-11-05 | 232.039993 | 234.830002 | 230.110001 | 234.330002 | 234.330002 | 1846100 |
2012-11-06 | 235.410004 | 237.750000 | 234.449997 | 237.559998 | 237.559998 | 2661300 |
2012-11-07 | 235.649994 | 235.660004 | 229.419998 | 232.059998 | 232.059998 | 3624200 |
amzn.tail()
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2017-11-13 | 1123.000000 | 1139.900024 | 1122.339966 | 1129.170044 | 1129.170044 | 2918400 |
2017-11-14 | 1130.109985 | 1138.000000 | 1123.890015 | 1136.839966 | 1136.839966 | 3138400 |
2017-11-15 | 1127.010010 | 1131.750000 | 1121.630005 | 1126.689941 | 1126.689941 | 3928700 |
2017-11-16 | 1130.160034 | 1138.160034 | 1130.050049 | 1137.290039 | 1137.290039 | 2213300 |
2017-11-17 | 1138.280029 | 1138.800049 | 1125.810059 | 1129.880005 | 1129.880005 | 2345100 |
print(amzn.info(), goog.info())
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 1272 entries, 2012-11-01 to 2017-11-17 Data columns (total 6 columns): Open 1272 non-null float64 High 1272 non-null float64 Low 1272 non-null float64 Close 1272 non-null float64 Adj Close 1272 non-null float64 Volume 1272 non-null int64 dtypes: float64(5), int64(1) memory usage: 69.6 KB <class 'pandas.core.frame.DataFrame'> DatetimeIndex: 1272 entries, 2012-11-01 to 2017-11-17 Data columns (total 6 columns): Open 1272 non-null float64 High 1272 non-null float64 Low 1272 non-null float64 Close 1272 non-null float64 Adj Close 1272 non-null float64 Volume 1272 non-null int64 dtypes: float64(5), int64(1) memory usage: 69.6 KB None None
amzn.loc['20121101','Open']
234.229996
amzn.iloc[0,0]
234.229996
amzn[amzn.index > '2017-11-01']
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2017-11-02 | 1097.810059 | 1101.939941 | 1086.869995 | 1094.219971 | 1094.219971 | 3684900 |
2017-11-03 | 1091.150024 | 1112.680054 | 1088.520020 | 1111.599976 | 1111.599976 | 3751500 |
2017-11-06 | 1109.150024 | 1125.410034 | 1108.770020 | 1120.660034 | 1120.660034 | 3381100 |
2017-11-07 | 1124.739990 | 1130.599976 | 1117.500000 | 1123.170044 | 1123.170044 | 2689000 |
2017-11-08 | 1122.819946 | 1135.540039 | 1119.109985 | 1132.880005 | 1132.880005 | 2581500 |
2017-11-09 | 1125.959961 | 1129.619995 | 1115.770020 | 1129.130005 | 1129.130005 | 3732700 |
2017-11-10 | 1126.099976 | 1131.750000 | 1124.060059 | 1125.349976 | 1125.349976 | 2179900 |
2017-11-13 | 1123.000000 | 1139.900024 | 1122.339966 | 1129.170044 | 1129.170044 | 2918400 |
2017-11-14 | 1130.109985 | 1138.000000 | 1123.890015 | 1136.839966 | 1136.839966 | 3138400 |
2017-11-15 | 1127.010010 | 1131.750000 | 1121.630005 | 1126.689941 | 1126.689941 | 3928700 |
2017-11-16 | 1130.160034 | 1138.160034 | 1130.050049 | 1137.290039 | 1137.290039 | 2213300 |
2017-11-17 | 1138.280029 | 1138.800049 | 1125.810059 | 1129.880005 | 1129.880005 | 2345100 |
print('Mean: %.3f Std: %.3f'% (amzn['Volume'].mean(), amzn['Volume'].std()) )
Mean: 3684915.959 Std: 2130898.713
high = amzn['Volume'].mean() + 2* amzn['Volume'].std()
len(amzn[amzn.Volume >high]) / len(amzn)
0.03459119496855346
amzn['pct_chng'] = amzn.Close.pct_change()
amzn.head()
Open | High | Low | Close | Adj Close | Volume | pct_chng | |
---|---|---|---|---|---|---|---|
Date | |||||||
2012-11-01 | 234.229996 | 234.550003 | 231.339996 | 232.139999 | 232.139999 | 3905100 | NaN |
2012-11-02 | 234.009995 | 237.399994 | 232.110001 | 232.419998 | 232.419998 | 3374700 | 0.001206 |
2012-11-05 | 232.039993 | 234.830002 | 230.110001 | 234.330002 | 234.330002 | 1846100 | 0.008218 |
2012-11-06 | 235.410004 | 237.750000 | 234.449997 | 237.559998 | 237.559998 | 2661300 | 0.013784 |
2012-11-07 | 235.649994 | 235.660004 | 229.419998 | 232.059998 | 232.059998 | 3624200 | -0.023152 |
goog_11 = goog.loc['2017-11']
amzn_11 = amzn.loc['2017-11']
goog_11.append(amzn_11).tail()
Adj Close | Close | High | Low | Open | Volume | pct_chng | |
---|---|---|---|---|---|---|---|
Date | |||||||
2017-11-13 | 1129.170044 | 1129.170044 | 1139.900024 | 1122.339966 | 1123.000000 | 2918400 | 0.003395 |
2017-11-14 | 1136.839966 | 1136.839966 | 1138.000000 | 1123.890015 | 1130.109985 | 3138400 | 0.006793 |
2017-11-15 | 1126.689941 | 1126.689941 | 1131.750000 | 1121.630005 | 1127.010010 | 3928700 | -0.008928 |
2017-11-16 | 1137.290039 | 1137.290039 | 1138.160034 | 1130.050049 | 1130.160034 | 2213300 | 0.009408 |
2017-11-17 | 1129.880005 | 1129.880005 | 1138.800049 | 1125.810059 | 1138.280029 | 2345100 | -0.006516 |
close = pd.DataFrame({'AZMN' : amzn.Close, 'GOOG' : goog.Close})
close.head()
AZMN | GOOG | |
---|---|---|
Date | ||
2012-11-01 | 232.139999 | 341.573395 |
2012-11-02 | 232.419998 | 341.737335 |
2012-11-05 | 234.330002 | 339.273346 |
2012-11-06 | 237.559998 | 338.657349 |
2012-11-07 | 232.059998 | 331.404541 |
goog['Volume'][goog.index > '2017'].resample('M').mean().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1110b4208>
amzn_change = np.log(amzn.Close /amzn.Close.shift(1))
goog_change = np.log(goog.Close / goog.Close.shift(1))
amzn_change.cumsum().plot()
goog_change.cumsum().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x111103d30>
new = pd.DataFrame({'AMZN': amzn_change[1:], 'GOOG' : goog_change[1:]})
new.to_csv('stocks.csv')
pd.read_csv('stocks.csv').head()
Date | AMZN | GOOG | |
---|---|---|---|
0 | 2012-11-02 | 0.001205 | 0.000480 |
1 | 2012-11-05 | 0.008184 | -0.007236 |
2 | 2012-11-06 | 0.013690 | -0.001817 |
3 | 2012-11-07 | -0.023424 | -0.021649 |
4 | 2012-11-08 | -0.020505 | -0.022481 |
new_stocks = pd.read_csv('stocks.csv')
new_stocks.index = new_stocks.Date
new_stocks.head()
Date | AMZN | GOOG | |
---|---|---|---|
Date | |||
2012-11-02 | 2012-11-02 | 0.001205 | 0.000480 |
2012-11-05 | 2012-11-05 | 0.008184 | -0.007236 |
2012-11-06 | 2012-11-06 | 0.013690 | -0.001817 |
2012-11-07 | 2012-11-07 | -0.023424 | -0.021649 |
2012-11-08 | 2012-11-08 | -0.020505 | -0.022481 |
new_stocks.drop(['Date'], axis=1, inplace=True)
new_stocks.head()
AMZN | GOOG | |
---|---|---|
Date | ||
2012-11-02 | 0.001205 | 0.000480 |
2012-11-05 | 0.008184 | -0.007236 |
2012-11-06 | 0.013690 | -0.001817 |
2012-11-07 | -0.023424 | -0.021649 |
2012-11-08 | -0.020505 | -0.022481 |
new_stocks.rename(columns={'AMZN': 'AMZN_chng'}, inplace=True)
new_stocks.head()
AMZN_chng | GOOG | |
---|---|---|
Date | ||
2012-11-02 | 0.001205 | 0.000480 |
2012-11-05 | 0.008184 | -0.007236 |
2012-11-06 | 0.013690 | -0.001817 |
2012-11-07 | -0.023424 | -0.021649 |
2012-11-08 | -0.020505 | -0.022481 |