#!/usr/bin/env python # coding: utf-8 # # Part 0: Import 套件 # In[ ]: # 下載資料套件 import requests as r #資料處理套件 import pandas as pd import json from datetime import datetime, date # # Part 1: 下載單日三大法人買賣超日報 # - 資料來源:[證交所 三大法人買賣超日報](https://www.twse.com.tw/zh/page/trading/fund/T86.html) # In[ ]: url = 'https://www.twse.com.tw/fund/T86?response=json&date=20210827&selectType=ALL' # In[ ]: res = r.get(url) res # In[ ]: inv_json = res.json() inv_json # In[ ]: inv_json['data'] # In[ ]: df = pd.DataFrame.from_dict(inv_json['data']) df.head() # In[ ]: inv_json['fields'] # In[ ]: df.columns = inv_json['fields'] # In[ ]: df.head() # # Part 2: 下載多天以上的三大法人買賣超日報 # In[ ]: url = 'https://www.twse.com.tw/fund/T86?response=json&date=20210827&selectType=ALL' # In[ ]: pd.date_range('2021-08-23', '2021-08-27', freq='D').strftime("%Y%m%d").tolist() # In[ ]: date_list = pd.date_range('2021-08-23', '2021-08-27', freq='D').strftime("%Y%m%d").tolist() for day in date_list: print(day) # In[ ]: df = pd.DataFrame() df # In[ ]: for day in date_list: url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=24' res = r.get(url) inv_json = res.json() df_inv = pd.DataFrame.from_dict(inv_json['data']) df = df.append(df_inv, ignore_index = True) # In[ ]: df.head() # In[ ]: df.columns = ['證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數'] # In[ ]: df.head() # ## Part 2.1: 沒日期怎麼分辦是哪一天的資料?! # In[ ]: df[df['證券代號'] == '2330'] # In[ ]: url = 'https://www.twse.com.tw/fund/T86?response=json&date=20210827&selectType=ALL' res = r.get(url) inv_json = res.json() df = pd.DataFrame.from_dict(inv_json['data']) df.head() # In[ ]: df.insert(0, '日期', datetime(2021, 8, 27)) # In[ ]: df.head() # In[ ]: for day in date_list: print(day, int(day[:4]), int(day[4:6]), int(day[6:]), datetime(int(day[:4]), int(day[4:6]), int(day[6:]))) # In[ ]: df = pd.DataFrame() for day in date_list: url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=ALL' res = r.get(url) inv_json = res.json() df_inv = pd.DataFrame.from_dict(inv_json['data']) df_inv.insert(0, '日期', datetime(int(day[:4]), int(day[4:6]), int(day[6:]))) df = df.append(df_inv, ignore_index = True) # In[ ]: df.head(100) # In[ ]: df.columns = ['日期', '證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數'] # In[ ]: df.head(100) # In[ ]: df[df['證券代號'] == '2330'] # In[ ]: def get_tw_investor_data(start_year, start_month, start_day, end_year, end_month, end_day): start_date = str(date(start_year, start_month, start_day)) end_date = str(date(end_year, end_month, end_day)) date_list = pd.date_range(start_date, end_date, freq='D').strftime("%Y%m%d").tolist() df = pd.DataFrame() for day in date_list: url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=24' res = r.get(url) inv_json = res.json() df_inv = pd.DataFrame.from_dict(inv_json['data']) #加入日期 df_inv.insert(0, '日期', datetime(int(day[:4]), int(day[4:6]), int(day[6:]))) df = df.append(df_inv, ignore_index = True) df.columns = ['日期', '證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數'] return df # In[ ]: df = get_tw_investor_data(start_year = 2021, start_month = 8, start_day = 23, end_year = 2021, end_month = 8, end_day = 27) df # ## Part 2.2: 怎麼下載不到資料?! # In[ ]: df = get_tw_investor_data(start_year = 2021, start_month = 8, start_day = 22, end_year = 2021, end_month = 8, end_day = 27) df # In[ ]: url = 'https://www.twse.com.tw/fund/T86?response=json&date=20210822&selectType=ALL' res = r.get(url) res # In[ ]: res.json() # In[ ]: def get_tw_investor_data(start_year, start_month, start_day, end_year, end_month, end_day, stock_code): start_date = str(date(start_year, start_month, start_day)) end_date = str(date(end_year, end_month, end_day)) date_list = pd.date_range(start_date, end_date, freq='D').strftime("%Y%m%d").tolist() df = pd.DataFrame() for day in date_list: url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=ALL' res = r.get(url) inv_json = res.json() # 加入非開盤日就跳過的下載的判斷式 if res.json()['stat'] == '很抱歉,沒有符合條件的資料!': continue else: df_inv = pd.DataFrame.from_dict(inv_json['data']) df_inv.insert(0, '日期', datetime(int(day[:4]), int(day[4:6]), int(day[6:]))) df = df.append(df_inv, ignore_index = True) df.columns = ['日期', '證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數'] return df # In[ ]: df = get_tw_investor_data(start_year = 2021, start_month = 8, start_day = 22, end_year = 2021, end_month = 8, end_day = 27, stock_code = None) df # ## Part 2.3: 想篩選特定股票怎麼做? # In[ ]: df[df['證券代號'] == '2330'] # In[ ]: def get_tw_investor_data(start_year, start_month, start_day, end_year, end_month, end_day, stock_code): start_date = str(date(start_year, start_month, start_day)) end_date = str(date(end_year, end_month, end_day)) date_list = pd.date_range(start_date, end_date, freq='D').strftime("%Y%m%d").tolist() df = pd.DataFrame() for day in date_list: url = 'https://www.twse.com.tw/fund/T86?response=json&date=' + day + '&selectType=ALL' res = r.get(url) inv_json = res.json() if res.json()['stat'] == '很抱歉,沒有符合條件的資料!': continue else: df_inv = pd.DataFrame.from_dict(inv_json['data']) df_inv.insert(0, '日期', datetime(int(day[:4]), int(day[4:6]), int(day[6:]))) df = df.append(df_inv, ignore_index = True) df.columns = ['日期', '證券代號', '證券名稱', '外陸資買進股數(不含外資自營商)', '外陸資賣出股數(不含外資自營商)', '外陸資買賣超股數(不含外資自營商)', '外資自營商買進股數', '外資自營商賣出股數', '外資自營商買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數'] # 加入股票代碼篩選 if stock_code == None: pass else: df = df[df['證券代號'] == str(stock_code)] return df # In[ ]: df = get_tw_investor_data(start_year = 2021, start_month = 8, start_day = 22, end_year = 2021, end_month = 8, end_day = 27, stock_code = 2330) df # ## Part 2.4: 資料轉型 - 把數字資料從字串(string) 轉成浮點數 (float) # In[ ]: for col in range(3, 20): for row in range(df.shape[0]): df.iloc[row, col] = float(df.iloc[row,col].replace(',', '')) # In[ ]: df # # Part 3: 另存成csv檔 # In[ ]: df.to_csv('investor.csv')