S&P 500 가격 데이터 수집과 수익률 분석

이승준 [email protected]

2019 ~ 2020 FinanceData.KR

In [1]:
## FinanceDataReader 설치 (for 구글 Colab)

!pip install -q finance-datareader
In [2]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (14,8)
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2
plt.rcParams["axes.grid"] = True
plt.rcParams['axes.axisbelow'] = True

S&P500 종목 리스트

In [3]:
# S&P500 종목 리스트
import FinanceDataReader as fdr

sp500 = fdr.StockListing('S&P500')
sp500.head(10)
Out[3]:
Symbol Name Sector Industry
0 MMM 3M Company Industrials Industrial Conglomerates
1 ABT Abbott Laboratories Health Care Health Care Equipment
2 ABBV AbbVie Inc. Health Care Pharmaceuticals
3 ABMD ABIOMED Inc Health Care Health Care Equipment
4 ACN Accenture plc Information Technology IT Consulting & Other Services
5 ATVI Activision Blizzard Communication Services Interactive Home Entertainment
6 ADBE Adobe Inc. Information Technology Application Software
7 AMD Advanced Micro Devices Inc Information Technology Semiconductors
8 AAP Advance Auto Parts Consumer Discretionary Automotive Retail
9 AES AES Corp Utilities Independent Power Producers & Energy Traders
In [4]:
# S&P500 종목은 실제 505개
len(sp500)
Out[4]:
505

데이터 수집과 저장

In [5]:
# 가격 데이터 가져오기 예시
import FinanceDataReader as fdr

aapl = fdr.DataReader('AAPL', '2010-01-01', '2019-12-31')
aapl.head(10)
Out[5]:
Close Open High Low Volume Change
Date
2010-01-04 30.57 30.49 30.64 30.34 123430000.0 0.0156
2010-01-05 30.63 30.66 30.80 30.46 150480000.0 0.0020
2010-01-06 30.14 30.63 30.75 30.11 138040000.0 -0.0160
2010-01-07 30.08 30.25 30.29 29.86 119280000.0 -0.0020
2010-01-08 30.28 30.04 30.29 29.87 111970000.0 0.0066
2010-01-11 30.02 30.40 30.43 29.78 115560000.0 -0.0086
2010-01-12 29.67 29.88 29.97 29.49 148610000.0 -0.0117
2010-01-13 30.09 29.70 30.13 29.16 151470000.0 0.0142
2010-01-14 29.92 30.02 30.07 29.86 108290000.0 -0.0056
2010-01-15 29.42 30.13 30.23 29.41 148580000.0 -0.0167
In [6]:
aapl.tail(10)
Out[6]:
Close Open High Low Volume Change
Date
2019-12-17 280.41 279.57 281.77 278.80 28580000.0 0.0020
2019-12-18 279.74 279.80 281.90 279.12 29020000.0 -0.0024
2019-12-19 280.02 279.50 281.18 278.95 24630000.0 0.0010
2019-12-20 279.44 282.23 282.65 278.56 69030000.0 -0.0021
2019-12-23 284.00 280.53 284.25 280.37 24680000.0 0.0163
2019-12-24 284.27 284.69 284.89 282.92 12120000.0 0.0010
2019-12-26 289.91 284.82 289.98 284.70 23330000.0 0.0198
2019-12-27 289.80 291.12 293.97 288.12 36590000.0 -0.0004
2019-12-30 291.52 289.46 292.69 285.22 36060000.0 0.0059
2019-12-31 293.65 289.93 293.68 289.52 25250000.0 0.0073

2010-01-01 ~ 2019-12-30 데이터 수집과 저장

CSV 파일로 저장하기

In [7]:
# 디렉토리가 없으면 생성
import os
folder = "sp500_price"
if not os.path.isdir(folder):
    os.mkdir(folder)
In [ ]:
import csv
import pandas as pd

for ix, row in sp500.iterrows():
    sym, name = row['Symbol'], row['Name']
    csv_fn = os.path.join(folder, f'{sym}.csv')
    if os.path.exists(csv_fn):
        print('skip', csv_fn)
        continue

    try:
      print(csv_fn, end=' ')
      df = fdr.DataReader(sym, '2019-01-01', '2019-12-31')
      df['Symbol'] = sym
      print(len(df), 'rows')
      df.to_csv(csv_fn, quoting=csv.QUOTE_MINIMAL)
    except ValueError as e:
      print(e)
    except IndexError as e:
      print(e)
In [9]:
# 읽어서 확인
sym = 'MMM'
csv_fn = os.path.join(folder, f'{sym}.csv')
mmm = pd.read_csv(csv_fn, parse_dates=True, index_col='Date')
mmm.head()
Out[9]:
Close Open High Low Volume Change Symbol
Date
2019-01-02 190.95 187.82 190.99 186.70 2480000.0 0.0022 MMM
2019-01-03 183.76 188.28 188.28 182.89 3360000.0 -0.0377 MMM
2019-01-04 191.32 186.75 191.98 186.03 3000000.0 0.0411 MMM
2019-01-07 190.88 191.36 192.30 188.66 2160000.0 -0.0023 MMM
2019-01-08 191.68 193.00 194.11 189.58 2480000.0 0.0042 MMM
In [10]:
mmm.tail()
Out[10]:
Close Open High Low Volume Change Symbol
Date
2019-12-24 176.68 178.27 178.67 176.04 1260000.0 -0.0100 MMM
2019-12-26 176.59 176.45 176.66 175.50 1440000.0 -0.0005 MMM
2019-12-27 177.26 176.65 177.54 175.62 1470000.0 0.0038 MMM
2019-12-30 175.83 177.26 178.96 175.59 1720000.0 -0.0081 MMM
2019-12-31 176.42 175.23 176.59 175.07 1570000.0 0.0034 MMM

S&P500 지수

In [11]:
us500 = fdr.DataReader('US500', '2010-01-01', '2019-12-31')
us500.head()
Out[11]:
Close Open High Low Volume Change
Date
2010-01-04 1132.99 1116.56 1133.87 1116.56 0.0 0.0160
2010-01-05 1136.52 1132.66 1136.63 1129.66 0.0 0.0031
2010-01-06 1137.14 1135.71 1139.19 1133.95 0.0 0.0005
2010-01-07 1141.69 1136.27 1142.46 1131.32 0.0 0.0040
2010-01-08 1144.98 1140.52 1145.39 1136.22 0.0 0.0029
In [12]:
df_plot = pd.DataFrame()

df_plot['S&P500'] = us500['Close']
df_plot['AAPL'] = aapl['Close']

df_plot = df_plot / df_plot.iloc[0] - 1.0
df_plot.plot()
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f29520d3d68>
In [13]:
changes = us500['Close'].pct_change()

(changes + 1).product() - 1
Out[13]:
1.8515520878383813
In [14]:
changes = aapl['Close'].pct_change()

(changes + 1).product() - 1
Out[14]:
8.605822701995441

S&P500 전체 종목의 가격

In [ ]:
df_price = pd.DataFrame() 

for ix, row in sp500.iterrows():
  try:
    sym, name = row['Symbol'], row['Name']
    csv_fn = os.path.join(folder, f'{sym}.csv')
    print(csv_fn)
    df = pd.read_csv(csv_fn, parse_dates=True, index_col='Date')
    df_price[sym] = df['Close']
  except FileNotFoundError as e:
    print(e)
  except ValueError as e:
    print(e)    
In [16]:
df_price.tail(10)
Out[16]:
MMM ABT ABBV ABMD ACN ATVI ADBE AMD AAP AES AFL A APD AKAM ALK ALB ARE ALXN ALGN ALLE ADS LNT ALL GOOGL GOOG MO AMZN AMCR AEE AAL AEP AXP AIG AMT AWK AMP ABC AME AMGN APH ... VFC VLO VAR VTR VRSN VRSK VZ VRTX VIAC V VNO VMC WRB WAB WMT WBA DIS WM WAT WEC WFC WELL WST WDC WU WRK WY WHR WMB WLTW WYNN XEL XRX XLNX XYL YUM ZBRA ZBH ZION ZTS
Date
2019-12-17 169.93 87.06 90.08 178.93 203.40 58.26 322.85 42.77 157.98 18.92 53.11 83.95 235.61 84.29 70.06 68.77 154.90 109.82 268.29 123.53 109.83 53.88 109.85 1354.89 1355.12 50.83 1790.66 10.81 75.61 28.28 93.22 124.95 51.79 211.93 120.90 167.34 83.15 99.20 242.84 106.90 ... 95.02 95.68 142.81 55.83 190.93 150.81 61.49 217.62 40.28 185.52 63.13 143.03 69.57 74.42 121.28 57.09 147.73 112.96 231.30 91.09 54.34 77.40 148.39 57.24 26.86 42.01 29.66 146.65 22.94 201.23 133.50 63.46 37.08 96.00 77.98 99.62 253.44 149.86 51.51 127.10
2019-12-18 169.03 86.93 89.33 177.01 205.76 59.02 324.38 42.30 158.03 19.17 52.91 83.43 233.11 84.90 69.03 68.90 158.14 109.20 272.61 123.61 110.45 54.28 109.65 1351.91 1352.62 51.36 1784.03 10.77 76.03 28.00 93.75 124.26 51.97 217.95 121.23 167.26 83.55 98.96 241.94 106.22 ... 96.50 94.51 142.10 56.03 190.15 148.84 60.86 219.35 41.15 184.90 64.30 143.43 69.12 74.77 119.86 57.20 146.26 112.58 229.00 91.66 53.67 78.39 149.30 57.23 26.87 42.14 29.84 146.59 23.08 201.71 138.71 63.79 37.29 96.93 77.91 98.74 255.43 148.88 51.35 126.99
2019-12-19 172.15 87.35 88.77 169.43 208.30 59.13 327.63 42.83 156.48 19.55 52.99 84.51 233.06 84.51 69.20 69.45 159.82 111.24 273.75 123.68 110.08 54.22 111.21 1356.44 1356.04 50.79 1792.28 10.80 75.45 28.55 93.97 124.83 51.93 224.20 121.66 167.28 83.80 99.56 241.84 107.05 ... 96.90 94.59 141.86 56.62 190.20 148.19 60.80 217.78 41.95 186.54 65.34 141.71 68.74 76.10 120.08 57.75 146.15 112.48 231.87 92.18 53.63 79.05 149.44 58.77 27.17 42.24 30.01 143.90 23.50 203.85 139.84 63.85 37.03 96.29 77.59 99.82 253.13 148.80 51.49 128.74
2019-12-20 175.37 86.66 89.29 162.89 211.10 59.22 327.61 44.15 158.12 19.81 53.14 85.19 235.00 86.86 69.34 70.36 159.24 109.75 272.89 124.20 111.29 54.95 111.58 1351.22 1349.59 51.13 1786.50 10.77 76.57 29.04 94.90 125.77 51.54 227.74 122.72 168.09 85.67 99.78 243.06 107.15 ... 97.24 95.27 142.01 57.45 192.45 149.68 62.07 220.69 42.39 188.00 64.99 141.77 68.93 76.41 120.29 58.39 146.88 113.16 233.28 92.85 53.33 80.40 150.84 61.11 27.43 42.48 30.15 147.41 23.80 205.64 138.05 64.15 37.35 97.74 78.61 100.59 252.49 150.32 51.63 132.68
2019-12-23 178.47 87.35 90.25 170.44 210.83 58.89 328.95 45.46 159.97 19.82 52.54 85.26 235.14 85.91 69.29 72.43 159.21 110.46 278.14 123.42 111.47 54.30 110.88 1350.63 1348.84 51.55 1793.00 10.91 75.64 29.30 93.75 124.49 50.97 226.97 121.39 167.28 85.60 100.14 243.03 107.26 ... 98.35 95.24 142.80 57.48 192.43 149.55 61.40 219.98 42.19 187.08 65.09 140.36 68.40 77.86 119.03 58.57 144.68 112.91 232.99 91.48 53.81 79.85 151.21 62.70 27.14 43.18 30.14 145.48 23.94 201.58 140.23 63.02 37.35 98.90 78.95 99.81 256.73 151.24 51.34 132.37
2019-12-24 176.68 87.28 89.85 172.30 211.61 58.81 329.64 46.54 160.57 19.76 52.76 85.30 235.05 85.73 69.09 72.32 160.10 110.28 277.89 123.87 110.99 54.27 111.17 1344.43 1343.56 50.28 1789.21 10.79 75.75 29.18 93.84 124.74 50.94 227.08 121.62 167.72 85.42 99.71 242.33 107.23 ... 98.90 95.10 142.90 57.37 192.75 149.17 61.28 220.56 42.02 187.57 65.28 142.65 68.42 77.63 119.51 58.35 145.29 112.60 233.09 91.62 53.82 79.93 150.73 62.23 27.07 42.76 29.85 145.18 23.84 201.99 139.44 62.86 37.37 98.49 78.64 100.33 254.33 150.87 51.53 132.92
2019-12-26 176.59 87.28 89.83 171.01 212.05 58.89 331.20 46.63 159.27 19.93 52.62 85.45 236.08 86.34 69.07 72.49 160.03 108.93 278.26 124.64 110.33 54.22 111.60 1362.47 1360.40 50.40 1868.77 10.85 75.64 29.67 93.88 125.41 50.62 227.87 121.88 167.68 85.05 100.07 241.90 107.99 ... 99.40 94.42 141.66 57.61 193.71 148.83 61.29 220.25 42.66 189.16 65.96 143.65 68.47 78.18 119.52 58.90 145.70 113.18 233.26 91.75 54.15 80.48 150.77 62.80 27.29 42.97 29.99 145.19 23.81 202.02 140.94 62.79 37.48 98.68 78.60 101.79 254.42 150.30 51.66 133.03
2019-12-27 177.26 87.40 89.20 169.27 212.22 59.19 330.79 46.18 158.35 19.98 52.62 85.42 235.49 86.50 68.29 72.01 160.40 108.55 277.64 124.59 110.91 54.34 111.87 1354.64 1351.89 50.40 1869.80 10.95 76.30 28.44 94.19 125.19 51.07 229.08 122.96 166.71 84.91 99.95 241.53 108.06 ... 99.96 93.20 142.24 57.83 194.05 148.58 61.53 220.97 41.76 189.39 65.89 143.89 68.57 78.55 119.59 59.02 145.75 113.89 233.76 92.31 53.92 80.96 150.81 62.13 27.37 42.64 29.98 145.92 23.59 201.76 139.81 63.12 37.31 98.42 78.84 101.90 256.00 150.01 51.50 133.25
2019-12-30 175.83 86.80 88.52 167.29 210.64 58.88 328.34 45.52 159.12 20.04 52.74 84.90 235.33 85.88 67.68 71.93 159.67 107.34 275.63 124.12 110.69 54.38 111.77 1339.71 1336.14 49.89 1846.89 10.85 76.56 28.30 94.03 124.30 51.00 228.85 123.05 166.07 83.64 99.93 240.27 108.28 ... 99.56 92.88 141.45 57.98 192.33 147.55 61.21 218.97 41.48 187.83 65.64 142.53 68.63 77.60 119.40 58.91 143.77 113.77 233.19 92.17 53.60 81.01 150.81 62.38 26.78 42.60 29.93 146.22 23.20 201.58 139.38 63.29 36.90 97.68 78.74 100.64 254.11 148.75 51.77 132.21
2019-12-31 176.42 86.86 88.54 170.59 210.57 59.42 329.81 45.86 160.16 19.90 52.90 85.31 234.99 86.38 67.75 73.04 161.58 108.15 279.04 124.54 112.20 54.72 112.45 1339.39 1337.02 49.91 1847.84 10.84 76.80 28.68 94.51 124.49 51.33 229.82 122.85 166.58 85.02 99.74 241.07 108.23 ... 99.66 93.65 142.01 57.74 192.68 149.34 61.40 218.95 41.97 187.90 66.50 143.99 69.10 77.80 118.84 58.96 144.63 113.96 233.65 92.23 53.80 81.78 150.33 63.47 26.78 42.91 30.20 147.53 23.72 201.94 138.87 63.49 36.87 97.77 78.79 100.73 255.44 149.68 51.92 132.35

10 rows × 503 columns

In [17]:
df_price.columns
Out[17]:
Index(['MMM', 'ABT', 'ABBV', 'ABMD', 'ACN', 'ATVI', 'ADBE', 'AMD', 'AAP',
       'AES',
       ...
       'WYNN', 'XEL', 'XRX', 'XLNX', 'XYL', 'YUM', 'ZBRA', 'ZBH', 'ZION',
       'ZTS'],
      dtype='object', length=503)
In [18]:
len(df_price.columns)
Out[18]:
503

2019 수익률 Best, Worst 20

  • df.pct_change()
  • (df_price['2019'].pct_change() + 1).product() - 1
In [19]:
returns = (df_price['2019'].pct_change() + 1).product() - 1
returns = returns.sort_values(ascending=False)

# Best 20
best20 = returns.head(20)
best20
Out[19]:
AMD     1.435475
PAYC    1.211863
LRCX    1.114244
KLAC    0.959850
TGT     0.929711
CPRT    0.907299
QRVO    0.899804
DXCM    0.899114
CMG     0.888104
LDOS    0.880692
AAPL    0.859486
XRX     0.831595
AMAT    0.823178
MKTX    0.812103
CDW     0.804219
ANSS    0.799944
GPN     0.789278
SWKS    0.778693
MSCI    0.757044
IR      0.746667
dtype: float64
In [20]:
# Worst 20

worst20 = returns.tail(20)[::-1]
worst20
Out[20]:
ABMD   -0.449639
OXY    -0.335323
GPS    -0.313398
LB     -0.311288
DXC    -0.310149
MYL    -0.280086
ADS    -0.277899
COG    -0.260093
KHC    -0.258653
MOS    -0.258396
KSS    -0.248525
TPR    -0.216216
CXO    -0.179211
DD     -0.172467
CTL    -0.144430
JWN    -0.137953
NLSN   -0.137272
FFIV   -0.133577
WBA    -0.133451
BEN    -0.129648
dtype: float64

2019 수익률 상위 10

  1. AMD(에이엠디) - 그래픽카드, 집적회로 제조사
  2. PAYC 페이컴 소프트웨어 - 클라우드 기반 인적 자본 관리 애플리케이션
  3. LRCX 램리서치 - 글로벌 웨이퍼 제조 장비 제조사로, 집적회로(IC) 제작에 쓰이는 반도체 가공 시스템
  4. KLAC KLA 텐코 - 반도체 제조 공정 관련 결함 검사 및 측정 시스템
  5. TGT 타깃 - 일반상품판매점과 온라인 사이트를 운영하는 유통업체

CPRT 코파트 온라인 경매 및 차량 리마케팅 서비스

QRVO 코보 무선 주파수(RF) 기술을 전문으로 하는 반도체 제품 제조

CMG 치폴레 멕시칸 그릴 멕시코 음식 전문의 패스트푸드 프랜차이즈

LDOS 레이도스 홀딩스

AAPL 애플 전자제품 제조사로, 소프트웨어, 서비스, 주변 기기, 네트워킹 솔루션

In [21]:
# Best 10
best10 = returns.head(10)
best10
Out[21]:
AMD     1.435475
PAYC    1.211863
LRCX    1.114244
KLAC    0.959850
TGT     0.929711
CPRT    0.907299
QRVO    0.899804
DXCM    0.899114
CMG     0.888104
LDOS    0.880692
dtype: float64
In [22]:
df_price.loc['2019', best10.keys()].head(20)
Out[22]:
AMD PAYC LRCX KLAC TGT CPRT QRVO DXCM CMG LDOS
Date
2019-01-02 18.83 119.70 138.30 90.91 66.44 47.68 61.18 115.18 443.36 52.05
2019-01-03 17.05 116.50 131.63 85.81 65.53 46.89 55.64 112.26 439.45 51.33
2019-01-04 19.00 121.02 138.06 91.30 66.43 48.56 59.37 116.24 455.00 53.38
2019-01-07 20.57 124.11 139.29 92.60 69.68 48.83 59.78 129.95 485.15 53.50
2019-01-08 20.75 127.14 136.93 91.13 69.61 49.63 59.37 131.52 497.37 53.91
2019-01-09 20.19 128.47 144.00 93.48 70.29 50.30 60.45 138.75 498.48 54.57
2019-01-10 19.74 128.61 144.57 93.71 68.29 50.60 61.86 141.90 509.62 54.91
2019-01-11 20.27 127.30 144.11 94.03 69.61 50.66 63.69 141.10 506.95 55.42
2019-01-14 20.23 124.57 140.43 92.43 68.84 49.92 61.18 142.74 501.99 55.33
2019-01-15 20.38 130.77 140.00 92.52 68.21 49.36 61.56 147.05 514.49 55.39
2019-01-16 19.73 131.38 138.75 90.96 67.85 49.32 61.41 147.05 514.82 55.28
2019-01-17 20.25 132.22 141.76 92.92 69.37 49.74 62.25 149.50 517.66 56.08
2019-01-18 20.77 136.24 147.55 95.73 70.68 50.06 63.28 149.52 513.24 56.88
2019-01-22 19.76 136.54 139.89 93.21 70.28 49.64 61.94 150.82 520.08 56.12
2019-01-23 19.80 137.02 139.33 91.85 70.32 49.27 61.91 146.54 521.37 56.15
2019-01-24 20.85 138.49 161.20 98.29 70.61 49.51 63.38 144.08 533.39 56.37
2019-01-25 21.93 143.05 165.49 101.39 72.39 50.08 65.76 145.01 543.37 57.05
2019-01-28 20.18 144.00 166.06 101.68 72.12 49.84 65.09 141.36 532.00 57.09
2019-01-29 19.25 141.67 164.99 100.99 71.70 49.58 63.66 140.05 530.06 57.51
2019-01-30 23.09 145.56 170.93 105.98 72.33 50.14 65.53 143.61 532.98 58.19
In [23]:
import matplotlib as mpl
import numpy as np

def make_colors(n, colormap=plt.cm.Spectral):
    return colormap(np.linspace(0.1, 1.0, n))
In [24]:
df_plot = pd.DataFrame()

df_plot['S&P500'] = us500.loc['2019', 'Close'] # S&P500 지수
df_plot[best10.keys()] = df_price.loc['2019', best10.keys()] # 10개 종목

df_plot = df_plot / df_plot.iloc[0] - 1
df_plot.plot(color=make_colors(11))
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f2952058240>
In [25]:
worst10 = returns.tail(10)[::-1]
In [26]:
df_plot = pd.DataFrame()

df_plot['S&P500'] = us500.loc['2019', 'Close'] # S&P500 지수
df_plot[worst10.keys()] = df_price.loc['2019', worst10.keys()] # 10개 종목

df_plot = df_plot / df_plot.iloc[0] - 1
df_plot.plot(color=make_colors(11))
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f29508452e8>

수익률

In [27]:
df = pd.DataFrame()

df['S&P500'] = us500.loc['2019', 'Close'] # S&P500 지수
df[best20.keys()] = df_price.loc['2019', best20.keys()]
In [28]:
df.head(20)
Out[28]:
S&P500 AMD PAYC LRCX KLAC TGT CPRT QRVO DXCM CMG LDOS AAPL XRX AMAT MKTX CDW ANSS GPN SWKS MSCI IR
Date
2019-01-02 2510.03 18.83 119.70 138.30 90.91 66.44 47.68 61.18 115.18 443.36 52.05 157.92 20.13 33.48 209.21 79.17 143.01 102.03 67.96 146.94 21.00
2019-01-03 2447.89 17.05 116.50 131.63 85.81 65.53 46.89 55.64 112.26 439.45 51.33 142.19 19.69 31.54 207.76 77.13 137.64 98.72 60.72 141.34 20.58
2019-01-04 2531.94 19.00 121.02 138.06 91.30 66.43 48.56 59.37 116.24 455.00 53.38 148.26 20.45 33.70 214.30 77.88 143.66 102.75 63.63 146.60 20.94
2019-01-07 2549.69 20.57 124.11 139.29 92.60 69.68 48.83 59.78 129.95 485.15 53.50 147.93 20.89 34.30 216.86 77.57 147.32 104.90 64.49 147.28 21.61
2019-01-08 2574.41 20.75 127.14 136.93 91.13 69.61 49.63 59.37 131.52 497.37 53.91 150.75 21.58 32.91 216.69 80.27 148.05 107.67 65.19 150.34 21.86
2019-01-09 2584.96 20.19 128.47 144.00 93.48 70.29 50.30 60.45 138.75 498.48 54.57 153.31 21.89 34.30 213.14 81.33 150.64 109.25 67.69 151.81 22.38
2019-01-10 2596.64 19.74 128.61 144.57 93.71 68.29 50.60 61.86 141.90 509.62 54.91 153.80 22.15 34.76 212.67 81.94 152.60 110.25 68.59 153.94 22.49
2019-01-11 2596.26 20.27 127.30 144.11 94.03 69.61 50.66 63.69 141.10 506.95 55.42 152.29 22.14 34.78 212.59 80.59 154.75 110.79 70.02 154.83 22.71
2019-01-14 2582.61 20.23 124.57 140.43 92.43 68.84 49.92 61.18 142.74 501.99 55.33 150.00 22.30 33.86 207.58 79.24 153.60 110.04 68.35 153.71 22.00
2019-01-15 2610.30 20.38 130.77 140.00 92.52 68.21 49.36 61.56 147.05 514.49 55.39 153.07 22.81 33.75 210.88 80.49 156.16 112.01 68.45 157.06 21.92
2019-01-16 2616.10 19.73 131.38 138.75 90.96 67.85 49.32 61.41 147.05 514.82 55.28 154.94 23.11 33.64 216.25 80.53 157.13 112.91 68.80 160.25 21.84
2019-01-17 2635.96 20.25 132.22 141.76 92.92 69.37 49.74 62.25 149.50 517.66 56.08 155.86 23.12 34.54 215.99 80.76 158.31 113.43 69.03 160.73 22.23
2019-01-18 2670.71 20.77 136.24 147.55 95.73 70.68 50.06 63.28 149.52 513.24 56.88 156.82 23.43 35.71 219.18 81.69 161.71 114.59 70.07 165.97 23.22
2019-01-22 2632.90 19.76 136.54 139.89 93.21 70.28 49.64 61.94 150.82 520.08 56.12 153.30 23.38 34.44 218.30 80.33 161.03 112.69 68.90 162.26 22.78
2019-01-23 2638.70 19.80 137.02 139.33 91.85 70.32 49.27 61.91 146.54 521.37 56.15 153.92 23.57 34.31 219.81 81.39 158.63 111.28 68.27 162.78 22.43
2019-01-24 2642.33 20.85 138.49 161.20 98.29 70.61 49.51 63.38 144.08 533.39 56.37 152.70 23.92 37.82 218.75 81.16 158.10 112.23 70.03 161.74 22.65
2019-01-25 2664.76 21.93 143.05 165.49 101.39 72.39 50.08 65.76 145.01 543.37 57.05 157.76 24.54 39.19 219.45 81.96 161.65 113.23 72.47 163.83 24.18
2019-01-28 2643.85 20.18 144.00 166.06 101.68 72.12 49.84 65.09 141.36 532.00 57.09 156.30 24.30 38.70 218.30 82.07 160.70 111.65 72.69 162.44 23.97
2019-01-29 2640.00 19.25 141.67 164.99 100.99 71.70 49.58 63.66 140.05 530.06 57.51 154.68 27.07 38.20 213.50 81.92 158.86 110.70 71.54 160.85 24.44
2019-01-30 2681.05 23.09 145.56 170.93 105.98 72.33 50.14 65.53 143.61 532.98 58.19 165.25 28.16 39.60 210.22 82.98 162.99 113.68 73.92 162.77 24.63
In [29]:
# 종목간 상관관계
changes = df.pct_change()
corr = changes.corr()
corr.head(10)
Out[29]:
S&P500 AMD PAYC LRCX KLAC TGT CPRT QRVO DXCM CMG LDOS AAPL XRX AMAT MKTX CDW ANSS GPN SWKS MSCI IR
S&P500 1.000000 0.610437 0.510378 0.573241 0.659310 0.287969 0.492393 0.553678 0.291508 0.302902 0.517872 0.742273 0.527770 0.600518 0.226362 0.585452 0.751633 0.646109 0.619364 0.677740 0.455418
AMD 0.610437 1.000000 0.327791 0.454464 0.548725 0.204122 0.314434 0.445470 0.248204 0.271885 0.303984 0.493731 0.325530 0.499653 0.074996 0.333528 0.491899 0.372128 0.445481 0.437443 0.274731
PAYC 0.510378 0.327791 1.000000 0.342322 0.324485 0.216424 0.302606 0.257256 0.281175 0.312506 0.399857 0.387340 0.199495 0.292915 0.437965 0.444345 0.609793 0.577442 0.320867 0.562790 0.109671
LRCX 0.573241 0.454464 0.342322 1.000000 0.779465 0.130958 0.308146 0.511473 0.100905 0.166910 0.302005 0.481127 0.279081 0.872015 0.091235 0.352777 0.472235 0.411833 0.565487 0.381490 0.271845
KLAC 0.659310 0.548725 0.324485 0.779465 1.000000 0.171246 0.313695 0.539211 0.206961 0.240637 0.364401 0.571946 0.324578 0.808078 0.173323 0.395231 0.532250 0.447443 0.597925 0.428796 0.306690
TGT 0.287969 0.204122 0.216424 0.130958 0.171246 1.000000 0.148721 0.194496 0.140692 0.124946 0.128571 0.166440 0.150220 0.122040 0.127591 0.133044 0.246601 0.160135 0.191611 0.152761 0.143732
CPRT 0.492393 0.314434 0.302606 0.308146 0.313695 0.148721 1.000000 0.327883 0.103755 0.128791 0.305929 0.326658 0.320583 0.304192 0.177971 0.269213 0.440785 0.427613 0.363579 0.368780 0.260459
QRVO 0.553678 0.445470 0.257256 0.511473 0.539211 0.194496 0.327883 1.000000 0.139119 0.055690 0.238955 0.557627 0.265410 0.520509 0.042831 0.297773 0.396601 0.372271 0.804908 0.414121 0.369953
DXCM 0.291508 0.248204 0.281175 0.100905 0.206961 0.140692 0.103755 0.139119 1.000000 0.221246 0.195887 0.222664 0.100036 0.180592 0.220295 0.183933 0.416770 0.312937 0.197225 0.325803 0.192268
CMG 0.302902 0.271885 0.312506 0.166910 0.240637 0.124946 0.128791 0.055690 0.221246 1.000000 0.277243 0.205593 0.087576 0.233175 0.212962 0.345914 0.349593 0.293355 0.123536 0.291325 0.057731
In [30]:
# 상관관계 히트맵

plt.figure(figsize=(16,8))
plt.grid(False)
plt.imshow(corr, cmap='hot', interpolation='none')
plt.colorbar()
plt.xticks(range(len(corr)), corr.columns, rotation=90)
plt.yticks(range(len(corr)), corr.columns)
plt.show()
In [31]:
# 상관계수 순위 만들기 (수익률 상위 20종목)
import pandas as pd

idx, vals = [], []
for ix, i in enumerate(corr.columns.values):
    for j in corr.columns.values[ix + 1:]:
        idx.append((i, j))
        vals.append(corr[i][j])

ser = pd.Series(data=vals, index=idx)
ser_ord = ser.sort_values(ascending=False)
ser_ord[:20]
Out[31]:
(LRCX, AMAT)      0.872015
(KLAC, AMAT)      0.808078
(QRVO, SWKS)      0.804908
(LRCX, KLAC)      0.779465
(S&P500, ANSS)    0.751633
(S&P500, AAPL)    0.742273
(S&P500, MSCI)    0.677740
(S&P500, KLAC)    0.659310
(S&P500, GPN)     0.646109
(AAPL, SWKS)      0.637391
(GPN, MSCI)       0.622021
(ANSS, MSCI)      0.620718
(S&P500, SWKS)    0.619364
(AMAT, SWKS)      0.616893
(S&P500, AMD)     0.610437
(PAYC, ANSS)      0.609793
(S&P500, AMAT)    0.600518
(ANSS, GPN)       0.598392
(KLAC, SWKS)      0.597925
(S&P500, CDW)     0.585452
dtype: float64
In [32]:
# 지수와 상관관계
changes = df.pct_change()
corr = changes.corr()
corr['S&P500'].sort_values(ascending=False)[1:]
Out[32]:
ANSS    0.751633
AAPL    0.742273
MSCI    0.677740
KLAC    0.659310
GPN     0.646109
SWKS    0.619364
AMD     0.610437
AMAT    0.600518
CDW     0.585452
LRCX    0.573241
QRVO    0.553678
XRX     0.527770
LDOS    0.517872
PAYC    0.510378
CPRT    0.492393
IR      0.455418
CMG     0.302902
DXCM    0.291508
TGT     0.287969
MKTX    0.226362
Name: S&P500, dtype: float64

스캐터 차트, 수익과 위험 분석

  • 수익 (returns): 수익률 평균 (mean)
  • 위험 (risk): 표준편차 (std)
In [33]:
plt.figure(figsize=(16,8))
plt.scatter(changes.mean(), changes.std())
plt.xlabel('returns')
plt.ylabel('risk')
plt.xlim(0.0005, 0.003)
plt.ylim(0.005, 0.030)
for label, x, y in zip(changes.columns, changes.mean(), changes.std()):
    plt.annotate( label, xy=(x, y), xytext=(30, -30),
    textcoords = 'offset points',
    ha = 'right', va = 'bottom',
    bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
    arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))   

전체 종목에서 2019년 수익률 상관관계 상위 종목

In [34]:
df = pd.DataFrame()

df['S&P500'] = us500.loc['2019', 'Close'] # S&P500 지수
df[df_price.columns] = df_price.loc['2019', df_price.columns]

# 종목간 상관관계
changes = df.pct_change()
corr = changes.corr()

idx, vals = [], []
for ix, i in enumerate(corr.columns.values):
    for j in corr.columns.values[ix + 1:]:
        idx.append((i, j))
        vals.append(corr[i][j])

ser = pd.Series(data=vals, index=idx)
ser_ord = ser.sort_values(ascending=False)
ser_ord[:40]
Out[34]:
(GOOGL, GOOG)     0.997356
(FOXA, FOX)       0.987175
(UAA, UA)         0.980852
(DISCA, DISCK)    0.980606
(NWSA, NWS)       0.978020
(CMS, WEC)        0.892382
(HBAN, KEY)       0.891619
(KEY, RF)         0.885981
(CMS, XEL)        0.884278
(CFG, KEY)        0.881697
(WEC, XEL)        0.879908
(HBAN, RF)        0.875948
(LNT, CMS)        0.873435
(FITB, KEY)       0.872738
(CFG, HBAN)       0.872727
(CMA, HBAN)       0.872630
(AMAT, LRCX)      0.872015
(AIV, UDR)        0.871271
(LNC, PRU)        0.870651
(CFG, CMA)        0.869331
(FITB, RF)        0.869273
(EQR, UDR)        0.868437
(MA, V)           0.867344
(LNT, XEL)        0.866470
(HBAN, TFC)       0.862957
(CFG, RF)         0.862820
(BAC, GS)         0.859066
(C, MS)           0.857629
(C, JPM)          0.857281
(FITB, HBAN)      0.856761
(AVB, EQR)        0.854082
(LNT, WEC)        0.853289
(ESS, UDR)        0.853266
(CMA, KEY)        0.852449
(CMA, RF)         0.851086
(BAC, JPM)        0.850873
(AIV, EQR)        0.850095
(AEP, XEL)        0.849707
(JPM, MS)         0.845753
(CFG, FITB)       0.845562
dtype: float64

상관계수가 높은 페어 종목

In [35]:
df_price[['CMS', 'WEC']].plot()
Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f294ee851d0>
In [36]:
df_plot = df_price[['CMS', 'WEC']]
df_plot = df_plot / (df_plot.max() - df_plot.min())
df_plot.plot()
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f294b275dd8>
In [37]:
df_plot = df_price[['CMS', 'WEC']]
df_plot = df_plot / df_plot.iloc[0]
df_plot.plot()
Out[37]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f294b1f3dd8>

데이터 다운로드 (구글 Colab)

In [ ]:
!zip -r sp500_price.zip sp500_price
In [42]:
from google.colab import files
files.download('sp500_price.zip')

2020 FinanceData.KR