mark down math functions \ Finance-data
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# Loading ... Visualization Modules
import matplotlib.pyplot as plt
import warnings
plt.style.use('seaborn')
warnings.simplefilter(action='ignore', category=FutureWarning)
from matplotlib.font_manager import fontManager as fm # 한글폰트 확인 및 추가
font_list_check = ['D2Coding', 'NanumGothicCoding', 'NanumGothic']
for _ in font_list_check:
font_counts = [f for f in font_list_check if _ in [_.name for _ in fm.ttflist]]
if len(font_counts) > 1: print(f"found : {_}"); plt.rc('font', family=_); break
found : D2Coding
# S&P 500 지수와 VIX 지수를 다운로드 합니다
import yfinance as yf
df = yf.download(['^GSPC', '^VIX'],
start='1999-01-01',
end='2022-12-31',
progress=False)
df = df[['Adj Close']].copy()
df.columns = df.columns.droplevel(0)
df = df.rename(columns={'^GSPC': 'sp500', '^VIX': 'vix'})
# yahoo finance 를 활용한 주가수집
# loading ... the Financial DataSet
import os
import pandas as pd
def fd_csv(file_name, code='', date_start='', date_end=''):
if os.path.isfile(file_name): # Checking the File exist
data = pd.read_csv(file_name)
data['Date'] = pd.to_datetime(data['Date']) # datetime 포맷
data = data.set_index('Date') # 변경된 컬럼을 Index 로 설정
else: # if not, download the data
from FinanceDataReader import DataReader as fdr
data = fdr(code, date_start, date_end).reset_index()
data.to_csv(file_name, index=None)
data = data.set_index('Date')
return data
# Finance DataSet 생성하기
file_name = './data/stock-msft.csv'
data_csv = fd_csv(file_name, 'MSFT', date_start='2020-1-1', date_end='2021-3-31')
data_csv.tail(3)
Close | Open | High | Low | Volume | Change | |
---|---|---|---|---|---|---|
Date | ||||||
2021-03-25 | 232.34 | 235.34 | 236.90 | 231.60 | 34060000.0 | -0.0133 |
2021-03-26 | 236.48 | 231.55 | 236.71 | 231.55 | 25480000.0 | 0.0178 |
2021-03-29 | 232.24 | 236.69 | 236.69 | 231.91 | 5490000.0 | -0.0179 |
# yahoo finance 를 활용한 주가수집
# loading ... the Financial DataSet
import os
import pandas as pd
def yf_csv(file_name, code='', date_start='', date_end=''):
if os.path.isfile(file_name): # Checking the File exist
data = pd.read_csv(file_name)
data['Date'] = pd.to_datetime(data['Date']) # datetime 포맷 변경
data = data.set_index('Date') # 변경된 컬럼을 Index 로 설정
else: # if not, download the data
import yfinance as yf
data = yf.download(code, start=date_start, end=date_end,
progress=False, auto_adjust=True).reset_index()
data.to_csv(file_name, index=None)
data = data.set_index('Date')
return data
# Finance DataSet 생성하기
file_name = './data/stock-samsung.csv'
data_csv = yf_csv(file_name, '005930', date_start='2020-1-1', date_end='2021-3-31')
data_csv.tail(3)
Open | High | Low | Close | Volume | Change | |
---|---|---|---|---|---|---|
Date | ||||||
2021-03-25 | 81000 | 82100 | 80800 | 81200 | 14758826 | 0.002469 |
2021-03-26 | 81400 | 81600 | 81000 | 81500 | 12845778 | 0.003695 |
2021-03-29 | 81700 | 81700 | 81000 | 81600 | 14835336 | 0.001227 |
data_csv['2021-MAR-1':'2021-MAY-1'].tail()
Open | High | Low | Close | Volume | Change | |
---|---|---|---|---|---|---|
Date | ||||||
2021-03-23 | 82600 | 82900 | 81800 | 81800 | 13299907 | -0.002439 |
2021-03-24 | 81000 | 81600 | 80700 | 81000 | 17926638 | -0.009780 |
2021-03-25 | 81000 | 82100 | 80800 | 81200 | 14758826 | 0.002469 |
2021-03-26 | 81400 | 81600 | 81000 | 81500 | 12845778 | 0.003695 |
2021-03-29 | 81700 | 81700 | 81000 | 81600 | 14835336 | 0.001227 |
data_csv
import os
import pandas as pd
class StockCSV:
#def __init__(self):
# self.code = '005930'
def finance(self, code='', date_start='', date_end='', file_name=None):
r'''Finance Data Reacer : CSV file save & loader
https://github.com/FinanceData/FinanceDataReader/blob/master/README.md'''
# case 1 : 파일을 지정한 경우
if file_name:
## 01 Loading from CSV
if os.path.isfile(file_name): # Checking the File exist
data = pd.read_csv(file_name)
data['Date'] = pd.to_datetime(data['Date']) # datetime 포맷
data = data.set_index('Date') # 변경된 컬럼을 Index 로 설정
## 02 Loading from Py Module API
else: # if not, download the data
from FinanceDataReader import DataReader as fdr
data = fdr(code, date_start, date_end).reset_index()
data.to_csv(file_name, index=None)
data = data.set_index('Date')
# case 2 : 파일을 지정하지 않은경우
else:
from FinanceDataReader import DataReader as fdr
data = fdr(code, date_start, date_end)
return data
def yahoo(self, code='', date_start='', date_end='', file_name=None):
r'''Yahoo Finance : CSV file save & loader'''
# case 1 : 파일을 지정한 경우
if file_name:
## 01 Loading from CSV
if os.path.isfile(file_name): # Checking the File exist
data = pd.read_csv(file_name)
data['Date'] = pd.to_datetime(data['Date']) # datetime 포맷 변경
data = data.set_index('Date') # 변경된 컬럼을 Index 로 설정
## 02 Loading from Py Module API
else: # if not, download the data
import yfinance as yf
data = yf.download(code, start=date_start, end=date_end,
progress=False, auto_adjust=True).reset_index()
data.to_csv(file_name, index=None)
data = data.set_index('Date')
# case 2 : 파일을 지정하지 않은경우
else:
import yfinance as yf
data = yf.download(code, start=date_start, end=date_end,
progress=False, auto_adjust=True) #.reset_index()
return data
reader = StockCSV()
# reader.finance('005930', '2020-01-01','2021-01-01', './data/test.csv')
reader.finance('005930', '2020-01-01','2021-01-01').tail()
Open | High | Low | Close | Volume | Change | |
---|---|---|---|---|---|---|
Date | ||||||
2020-01-02 | 55500 | 56000 | 55000 | 55200 | 12993228 | -0.010753 |
2020-01-03 | 56000 | 56600 | 54900 | 55500 | 15422255 | 0.005435 |
2020-01-06 | 54900 | 55600 | 54600 | 55500 | 10278951 | 0.000000 |
2020-01-07 | 55700 | 56400 | 55600 | 55800 | 10009778 | 0.005405 |
2020-01-08 | 56200 | 57400 | 55900 | 56800 | 23501171 | 0.017921 |
... | ... | ... | ... | ... | ... | ... |
2020-12-23 | 72400 | 74000 | 72300 | 73900 | 19411326 | 0.022130 |
2020-12-24 | 74100 | 78800 | 74000 | 77800 | 32502870 | 0.052774 |
2020-12-28 | 79000 | 80100 | 78200 | 78700 | 40085044 | 0.011568 |
2020-12-29 | 78800 | 78900 | 77300 | 78300 | 30339449 | -0.005083 |
2020-12-30 | 77400 | 81300 | 77300 | 81000 | 29417421 | 0.034483 |
248 rows × 6 columns
# https://bjecondata.blogspot.com/2019/08/open-api-ecos-ii.html
import pandas as pd
code_table = pd.read_csv('./data/BOK_ECOS_EXAMPLE.csv')
code_table.head(3)
통계명 | 항목명1 | 통계코드 | 항목코드1 | 항목코드2 | 항목명2 | 항목코드3 | 항목명3 | 단위 | URL | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1.1.주요 통화금융지표 | 예금은행요구불예금회전율 | 010Y002 | AAAA5 | NaN | NaN | NaN | NaN | 회/월 | http://ecos.bok.or.kr/api/StatisticSearch/APIC... |
1 | 1.1.주요 통화금융지표 | 화폐발행잔액(말잔) | 010Y002 | AAAA11 | NaN | NaN | NaN | NaN | 십억원 | http://ecos.bok.or.kr/api/StatisticSearch/APIC... |
2 | 1.1.주요 통화금융지표 | 화폐발행잔액(평잔) | 010Y002 | AAAA12 | NaN | NaN | NaN | NaN | 십억원 | http://ecos.bok.or.kr/api/StatisticSearch/APIC... |
# 필요한 자료코드 추출하기
search_text = '인플레'
code_index = [no for no, _ in enumerate(code_table['통계명'].to_list())
if _.find(search_text) != -1]
code_example = code_table.iloc[code_index, :].reset_index(drop=True)
code_example.head(3)
# code_example['URL'][:3].to_list()
통계명 | 항목명1 | 통계코드 | 항목코드1 | 항목코드2 | 항목명2 | 항목코드3 | 항목명3 | 단위 | URL | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 9.2.3 소비자동향조사(기대 인플레이션율)(전국) | 물가인식 | 040Y003 | FMA | NaN | NaN | NaN | NaN | NaN | http://ecos.bok.or.kr/api/StatisticSearch/APIC... |
1 | 9.2.3 소비자동향조사(기대 인플레이션율)(전국) | <0% | 040Y003 | FMAAA | NaN | NaN | NaN | NaN | NaN | http://ecos.bok.or.kr/api/StatisticSearch/APIC... |
2 | 9.2.3 소비자동향조사(기대 인플레이션율)(전국) | 0~1% | 040Y003 | FMAAB | NaN | NaN | NaN | NaN | NaN | http://ecos.bok.or.kr/api/StatisticSearch/APIC... |
from data.finance import bok_eos_api
eos_table = bok_eos_api('040Y003', date_end='20210101', code2='FMB') # 7년 단위로만 추출가능
eos_table['DATA_VALUE'] = list(map(lambda x : float(x), eos_table['DATA_VALUE']))
eos_table = eos_table.rename(columns={'DATA_VALUE':'cpi', 'TIME':'Date'})
eos_table['Date'] = pd.to_datetime(eos_table['Date'], format='%Y%m')
eos_table = eos_table.set_index('Date') # eos_table.head(3)
eos_table = eos_table[['cpi']]
# ploting the DataSet
eos_table.head(3)
cpi | |
---|---|
Date | |
2002-02-01 | 4.4 |
2002-03-01 | 4.3 |
2002-04-01 | 4.3 |