Markdown Wiki | Markdown Editor
$$ x = \dfrac{-b \pm \sqrt{b^2 - 4ac}}{2a} $$from FinanceDataReader import DataReader as fdr
samsung = fdr('005930', '19980101')
data_dict['samsung'] = samsung_dpc
samsung_dpc = (samsung['Close']-samsung['Close'].shift(1)) / samsung['Close'].shift(1) * 100
samsung_dpc.iloc[0] = 0
msft_dpc = (msft['Close']-msft['Close'].shift(1)) / msft['Close'].shift(1) * 100
msft_dpc.iloc[0] = 0
CPU times: user 1.65 s, sys: 851 ms, total: 2.5 s Wall time: 4.77 s
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
from data.finance import Initialized
Initialized().ploting()
bin_number = 30
plt.rcParams['figure.dpi'] = 300
plt.rcParams['figure.figsize'] = (20.0, 5.0)
fig = plt.figure()
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
ax1.hist(samsung_dpc, bins=bin_number)
ax2.hist(msft_dpc, bins=bin_number)
plt.grid(True); plt.show()
found : D2Coding
변동의 누적적인 변화를 비교하려면 누적합(Cumulative Sum) 을 계산해야 한다
%%time
samsung_dpc = (samsung['Close']-samsung['Close'].shift(1)) / samsung['Close'].shift(1) * 100
samsung_dpc.iloc[0] = 0
samsung_dpc_cs = samsung_dpc.cumsum() # 누적합 Series 생성하기
# msft_dpc = (msft['Close'] / msft['Close'].shift(1)-1) * 100
msft_dpc = (msft['Close']-msft['Close'].shift(1)) / msft['Close'].shift(1) * 100
msft_dpc.iloc[0] = 0
msft_dpc_cs = msft_dpc.cumsum()
CPU times: user 3.44 ms, sys: 1.03 ms, total: 4.48 ms Wall time: 3.07 ms
# Visualization
plt.plot(samsung_dpc_cs, 'b', label="Samsung Electronics")
plt.plot(msft_dpc_cs, 'r--', label="MicroSoft")
plt.ylabel("Change %")
plt.grid(True)
plt.legend(loc='best')
plt.show()
DATA_FILENAME = 'data/stock-ks11.pkl'
# https://aroussi.com/post/python-yahoo-finance
import pandas as pd
from FinanceDataReader import DataReader as fdr
try:
kospi = pd.read_pickle(DATA_FILENAME)
except FileNotFoundError:
kospi = fdr('KS11', "19820101")
kospi.to_pickle(DATA_FILENAME)
kospi.head(3)
Close | Open | High | Low | Volume | Change | |
---|---|---|---|---|---|---|
Date | ||||||
1982-01-04 | 123.60 | 123.60 | 123.60 | 123.60 | 413000.0 | -0.0586 |
1982-01-05 | 121.66 | 121.66 | 121.66 | 121.66 | 697000.0 | -0.0157 |
1982-01-06 | 125.80 | 125.80 | 125.80 | 125.80 | 1180000.0 | 0.0340 |
# 연간(분기) 기준 최대 손실낙폭 계산하기
window = 252 # 1년간 거래일수
peak = kospi['Close'].rolling(window, min_periods=1).max() # 연간최저치 배열
drawdown = kospi['Close'] / peak - 1. # 종가와 연간최저치 Gap 배열
drawdown_max = drawdown.rolling(window, min_periods=1).min() # 종가 대비 연간최저치 Gap 이동평균 배열
# 최저구간 계산으로 확인하기
drawdown_max_series = drawdown_max[drawdown_max == drawdown_max.min()]
drawdown_max_series.unique()[0], len(drawdown_max_series), drawdown_max_series.index,
(-0.6340204164324833, 252, DatetimeIndex(['1998-06-16', '1998-06-17', '1998-06-18', '1998-06-19', '1998-06-20', '1998-06-22', '1998-06-23', '1998-06-24', '1998-06-25', '1998-06-26', ... '1999-05-07', '1999-05-10', '1999-05-11', '1999-05-12', '1999-05-13', '1999-05-14', '1999-05-17', '1999-05-18', '1999-05-19', '1999-05-20'], dtype='datetime64[ns]', name='Date', length=252, freq=None))
# Visualization
plt.rcParams['figure.figsize'] = (20.0, 6.0)
plt.subplot(211) # 2행 1열중, 1행에 그린다
kospi['Close'].plot(label="KOSPI", title="KOSPI MDD", grid=True, legend=True)
plt.subplot(212) # 2행 1열중, 2행에 그린다
drawdown.plot(c="blue", label="KOSPI DD", grid=True, legend=True)
drawdown_max.plot(c="red", label="KOSPI MDD", grid=True, legend=True)
plt.show()
DATA_FILENAME = 'data/stock-dji.pkl'
# Loading the dataset
import yfinance as yf
try:
dow = pd.read_pickle(DATA_FILENAME)
except FileNotFoundError:
dow = yf.Ticker("DJI")
dow = dow.history(period="max", start='1982-01-04')
dow.to_pickle(DATA_FILENAME)
dow.head(3)
Open | High | Low | Close | Volume | Dividends | Stock Splits | |
---|---|---|---|---|---|---|---|
Date | |||||||
1982-01-04 | 882.520020 | 882.520020 | 882.520020 | 882.520020 | 0 | 0 | 0 |
1982-01-05 | 865.299988 | 865.299988 | 865.299988 | 865.299988 | 0 | 0 | 0 |
1982-01-06 | 861.020020 | 861.020020 | 861.020020 | 861.020020 | 0 | 0 | 0 |
# 연간(분기) 기준 최대 손실낙폭 계산하기
peak = dow['Close'].rolling(window, min_periods=1).max() # 연간최저치 배열
drawdown = dow['Close'] / peak - 1. # 종가와 연간최저치 Gap 배열
drawdown_max = drawdown.rolling(window, min_periods=1).min() # 종가 대비 연간최저치 Gap 이동평균 배열
# Visualization
plt.rcParams['figure.figsize'] = (20.0, 6.0)
plt.subplot(211) # 2행 2열중, 1행에 그린다
dow['Close'].plot(label="DOW JONES", title="DOW JONES MDD", grid=True, legend=True)
plt.subplot(212) # 2행 2열중, 1행에 그린다
drawdown.plot(c="blue", label="DOW JONES DD", grid=True, legend=True)
drawdown_max.plot(c="red", label="DOW JONES MDD", grid=True, legend=True)
plt.show()
# 지수화 데이터 변환
date_init = dow.index[0].strftime('%Y-%m-%d') # 최초 기준일 데이터 추출
dow_exp = dow.Close / dow.Close[date_init] * 100
kospi_exp = kospi.Close / kospi.Close[date_init] * 100
plt.plot(dow_exp, 'r--', label='Dow Jones Industrials')
plt.plot(kospi_exp, 'b', label="KOSPI")
plt.legend(loc='best'); plt.grid(True); plt.show()
import pandas as pd
table_stock = pd.DataFrame({'DOW': dow.Close, 'KOSPI': kospi.Close})
table_stock = table_stock.fillna(method='bfill')
table_stock = table_stock.fillna(method='ffill')
plt.rcParams['figure.dpi'] = 80
plt.rcParams['figure.figsize'] = (20.0, 5.0)
plt.scatter(table_stock['DOW'], table_stock['KOSPI'], marker='.')
plt.xlabel('Dow Jones Industrial Average')
plt.ylabel('KOSPI')
plt.show()
$Y$ 와 원인이 되는 $X$ 간의 관계를 추정하는 선형 회귀 모델(linear Regression Model)을 분석한다
$$ Y_{i} = \beta_{0} + \beta_{1} X_{i} + \varepsilon_{i}(i = 1,2,...,n) $$# Pandas 를 활용한 상관계수 분석
# NaN 값이 섞여있는 경우, 결과값 추출이 안됨에 유의
# print(table_stock.isna().sum())
r_value = table_stock['DOW'].corr(table_stock['KOSPI'])
r_squared = r_value ** 2
print(f"r_value(상관계수): {r_value}\nr_squared(결정계수): {r_squared}")
r_value(상관계수): 0.8834179122883052 r_squared(결정계수): 0.7804272077518277
# stats 모듈을 활용한 회귀식 추출하기
from scipy import stats
regression = stats.linregress(table_stock.DOW, table_stock.KOSPI)
regression_line = f'Y = {regression.slope:.2f} * X + {regression.intercept:.2f}'
regression_line
'Y = 0.08 * X + 299.36'
# Visualization
plt.plot(table_stock.DOW, table_stock.KOSPI, '.')
plt.plot(table_stock.DOW, (regression.slope * table_stock.DOW + regression.intercept), 'r')
plt.legend(['DOW x KOSPI', regression_line])
plt.title(f'DOW x KOSPI (R = {regression.rvalue:.2f})')
plt.xlabel('Dow Jones Industrial Average')
plt.ylabel('KOSPI')
plt.show()