from IPython.core.display import HTML HTML("") import datetime import pandas as pd import pandas.io.data from pandas import Series, DataFrame pd.__version__ import matplotlib.pyplot as plt import matplotlib as mpl mpl.rc('figure', figsize=(8, 7)) mpl.__version__ labels = ['a', 'b', 'c', 'd', 'e'] s = Series([1, 2, 3, 4, 5], index=labels) s 'b' in s s['b'] mapping = s.to_dict() mapping Series(mapping) aapl = pd.io.data.get_data_yahoo('AAPL', start=datetime.datetime(2006, 10, 1), end=datetime.datetime(2012, 1, 1)) aapl.head() aapl.to_csv('data/aapl_ohlc.csv') !head data/aapl_ohlc.csv df = pd.read_csv('data/aapl_ohlc.csv', index_col='Date', parse_dates=True) df.head() df.index ts = df['Close'][-10:] ts type(ts) date = ts.index[5] date ts[date] ts[5] df[['Open', 'Close']].head() df['diff'] = df.Open - df.Close df.head() del df['diff'] df.head() close_px = df['Adj Close'] mavg = pd.rolling_mean(close_px, 40) mavg[-10:] rets = close_px / close_px.shift(1) - 1 rets.head() close_px.pct_change().head() close_px.plot(label='AAPL') mavg.plot(label='mavg') plt.legend() df = pd.io.data.get_data_yahoo(['AAPL', 'GE', 'GOOG', 'IBM', 'KO', 'MSFT', 'PEP'], start=datetime.datetime(2010, 1, 1), end=datetime.datetime(2013, 1, 1))['Adj Close'] df.head() rets = df.pct_change() plt.scatter(rets.PEP, rets.KO) plt.xlabel('Returns PEP') plt.ylabel('Returns KO') pd.scatter_matrix(rets, diagonal='kde', figsize=(10, 10)); corr = rets.corr() corr plt.imshow(corr, cmap='hot', interpolation='none') plt.colorbar() plt.xticks(range(len(corr)), corr.columns) plt.yticks(range(len(corr)), corr.columns); plt.scatter(rets.mean(), rets.std()) plt.xlabel('Expected returns') plt.ylabel('Risk') for label, x, y in zip(rets.columns, rets.mean(), rets.std()): plt.annotate( label, xy = (x, y), xytext = (20, -20), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) series_list = [] securities = ['AAPL', 'GOOG', 'IBM', 'MSFT'] for security in securities: s = pd.io.data.get_data_yahoo(security, start=datetime.datetime(2011, 10, 1), end=datetime.datetime(2013, 1, 1))['Adj Close'] s.name = security # Rename series to match security name series_list.append(s) df = pd.concat(series_list, axis=1) df.head() df.ix[0, 'AAPL'] = np.nan df.ix[1, ['GOOG', 'IBM']] = np.nan df.ix[[1, 2, 3], 'MSFT'] = np.nan df.head() (df.AAPL + df.GOOG).head() df.ffill().head()