## FinanceDataReader 설치 (for 구글 Colab) !pip install -q finance-datareader %matplotlib inline import matplotlib.pyplot as plt plt.rcParams["figure.figsize"] = (14,8) plt.rcParams['font.size'] = 16 plt.rcParams['lines.linewidth'] = 2 plt.rcParams["axes.grid"] = True plt.rcParams['axes.axisbelow'] = True # S&P500 종목 리스트 import FinanceDataReader as fdr sp500 = fdr.StockListing('S&P500') sp500.head(10) # S&P500 종목은 실제 505개 len(sp500) # 가격 데이터 가져오기 예시 import FinanceDataReader as fdr aapl = fdr.DataReader('AAPL', '2010-01-01', '2019-12-31') aapl.head(10) aapl.tail(10) # 디렉토리가 없으면 생성 import os folder = "sp500_price" if not os.path.isdir(folder): os.mkdir(folder) import csv import pandas as pd for ix, row in sp500.iterrows(): sym, name = row['Symbol'], row['Name'] csv_fn = os.path.join(folder, f'{sym}.csv') if os.path.exists(csv_fn): print('skip', csv_fn) continue try: print(csv_fn, end=' ') df = fdr.DataReader(sym, '2019-01-01', '2019-12-31') df['Symbol'] = sym print(len(df), 'rows') df.to_csv(csv_fn, quoting=csv.QUOTE_MINIMAL) except ValueError as e: print(e) except IndexError as e: print(e) # 읽어서 확인 sym = 'MMM' csv_fn = os.path.join(folder, f'{sym}.csv') mmm = pd.read_csv(csv_fn, parse_dates=True, index_col='Date') mmm.head() mmm.tail() us500 = fdr.DataReader('US500', '2010-01-01', '2019-12-31') us500.head() df_plot = pd.DataFrame() df_plot['S&P500'] = us500['Close'] df_plot['AAPL'] = aapl['Close'] df_plot = df_plot / df_plot.iloc[0] - 1.0 df_plot.plot() changes = us500['Close'].pct_change() (changes + 1).product() - 1 changes = aapl['Close'].pct_change() (changes + 1).product() - 1 df_price = pd.DataFrame() for ix, row in sp500.iterrows(): try: sym, name = row['Symbol'], row['Name'] csv_fn = os.path.join(folder, f'{sym}.csv') print(csv_fn) df = pd.read_csv(csv_fn, parse_dates=True, index_col='Date') df_price[sym] = df['Close'] except FileNotFoundError as e: print(e) except ValueError as e: print(e) df_price.tail(10) df_price.columns len(df_price.columns) returns = (df_price['2019'].pct_change() + 1).product() - 1 returns = returns.sort_values(ascending=False) # Best 20 best20 = returns.head(20) best20 # Worst 20 worst20 = returns.tail(20)[::-1] worst20 # Best 10 best10 = returns.head(10) best10 df_price.loc['2019', best10.keys()].head(20) import matplotlib as mpl import numpy as np def make_colors(n, colormap=plt.cm.Spectral): return colormap(np.linspace(0.1, 1.0, n)) df_plot = pd.DataFrame() df_plot['S&P500'] = us500.loc['2019', 'Close'] # S&P500 지수 df_plot[best10.keys()] = df_price.loc['2019', best10.keys()] # 10개 종목 df_plot = df_plot / df_plot.iloc[0] - 1 df_plot.plot(color=make_colors(11)) worst10 = returns.tail(10)[::-1] df_plot = pd.DataFrame() df_plot['S&P500'] = us500.loc['2019', 'Close'] # S&P500 지수 df_plot[worst10.keys()] = df_price.loc['2019', worst10.keys()] # 10개 종목 df_plot = df_plot / df_plot.iloc[0] - 1 df_plot.plot(color=make_colors(11)) df = pd.DataFrame() df['S&P500'] = us500.loc['2019', 'Close'] # S&P500 지수 df[best20.keys()] = df_price.loc['2019', best20.keys()] df.head(20) # 종목간 상관관계 changes = df.pct_change() corr = changes.corr() corr.head(10) # 상관관계 히트맵 plt.figure(figsize=(16,8)) plt.grid(False) plt.imshow(corr, cmap='hot', interpolation='none') plt.colorbar() plt.xticks(range(len(corr)), corr.columns, rotation=90) plt.yticks(range(len(corr)), corr.columns) plt.show() # 상관계수 순위 만들기 (수익률 상위 20종목) import pandas as pd idx, vals = [], [] for ix, i in enumerate(corr.columns.values): for j in corr.columns.values[ix + 1:]: idx.append((i, j)) vals.append(corr[i][j]) ser = pd.Series(data=vals, index=idx) ser_ord = ser.sort_values(ascending=False) ser_ord[:20] # 지수와 상관관계 changes = df.pct_change() corr = changes.corr() corr['S&P500'].sort_values(ascending=False)[1:] plt.figure(figsize=(16,8)) plt.scatter(changes.mean(), changes.std()) plt.xlabel('returns') plt.ylabel('risk') plt.xlim(0.0005, 0.003) plt.ylim(0.005, 0.030) for label, x, y in zip(changes.columns, changes.mean(), changes.std()): plt.annotate( label, xy=(x, y), xytext=(30, -30), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) df = pd.DataFrame() df['S&P500'] = us500.loc['2019', 'Close'] # S&P500 지수 df[df_price.columns] = df_price.loc['2019', df_price.columns] # 종목간 상관관계 changes = df.pct_change() corr = changes.corr() idx, vals = [], [] for ix, i in enumerate(corr.columns.values): for j in corr.columns.values[ix + 1:]: idx.append((i, j)) vals.append(corr[i][j]) ser = pd.Series(data=vals, index=idx) ser_ord = ser.sort_values(ascending=False) ser_ord[:40] df_price[['CMS', 'WEC']].plot() df_plot = df_price[['CMS', 'WEC']] df_plot = df_plot / (df_plot.max() - df_plot.min()) df_plot.plot() df_plot = df_price[['CMS', 'WEC']] df_plot = df_plot / df_plot.iloc[0] df_plot.plot() !zip -r sp500_price.zip sp500_price from google.colab import files files.download('sp500_price.zip')