#!/usr/bin/env python
# coding: utf-8
#
# # Universe Analysis-using get_universe
#
# 分析股票池的產業分布與成交金額
# ## 選單
#
# 1. [分析臺灣50指數成份股公司的產業分佈](#臺灣50)
# 2. [分析臺灣中型100指數成份股公司的產業分佈](#臺灣100)
# 3. [分析臺灣高股息指數成份股公司的產業分佈](#臺灣高股息)
# 4. [分析電子工業公司的產業分佈](#電子工業)
# 5. [分析上市ETF成交金額](#上市ETF)
# In[1]:
import tejapi
import os
import numpy as np
import pandas as pd
# set tej_key and base
os.environ['TEJAPI_KEY'] = "your key"
os.environ['TEJAPI_BASE'] = "https://api.tej.com.tw"
# set date
start = end = '2024-05-31'
from matplotlib import pyplot as plt
plt.rc("font",family='MicroSoft YaHei',weight="bold")
import TejToolAPI
from zipline.sources.TEJ_Api_Data import get_universe
from zipline.utils.calendar_utils import get_calendar
# 利用`get_universe`取得台灣50指數成份股
# In[2]:
tw50_ = get_universe(start, end, idx_id='IX0002')
# In[3]:
tw50_
# `getUniverseSector`:繪製股票池產業分佈柱狀圖與圓餅圖
# In[4]:
def plot_sector_counts(sector_counts):
# create bar chart of number of companies in each sector
from matplotlib import pyplot as plt
plt.rc("font",family='MicroSoft YaHei',weight="normal")
from matplotlib.ticker import MaxNLocator
import matplotlib.ticker as ticker
plt.figure(figsize=(12, 15), dpi=100)
bar = plt.subplot2grid((5,5), (0,0), rowspan=2, colspan=5)
pie = plt.subplot2grid((5,5), (2,0), rowspan=3, colspan=5)
# Bar chart
sector_counts.plot(
kind='barh',
color='b',
# rot=90,
grid=True,
fontsize=12,
ax=bar,
)
plt.gca().yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
bar.set_title('股票池產業分布家數')
bar.set_xlabel('家數')
num = list(sector_counts.values)
explode = [0.1 if x == max(num) else 0 for x in num]
# Pie chart
sector_counts.plot(
kind='pie',
colormap='Set3',
autopct='%.1f %%', # '%.2f %%'
fontsize=12,
ax=pie,
labeldistance=1.1,
pctdistance=0.9,
explode = explode
)
pie.set_ylabel('')
pie.set_title('股票池產業分布占比 - %')
plt.tight_layout(pad=5);
def getUniverseSector(start_date,
end_date,
trading_calendar=get_calendar('TEJ_XTAI'),
**kwargs):
tickers = get_universe(start_date,
end_date,
trading_calendar = trading_calendar,
**kwargs)
df_sector = TejToolAPI.get_history_data(ticker=tickers,
columns=['Industry'], transfer_to_chinese=True,
start = start_date,
end = end_date)
counts = (df_sector.groupby('主產業別_中文').size())
_c =[]
counts.index = [ x.split(' ')[1] if len(x)>0 else ' ' for x in counts.index]
plot_sector_counts(counts[counts>0].sort_values(ascending=False))
#
# # 分析臺灣50指數成份股公司的產業分佈
# [Return to Menu](#menu)
# In[5]:
getUniverseSector(start, end, idx_id='IX0002')
#
# # 分析臺灣中型100指數成份股公司的產業分佈
# [Return to Menu](#menu)
# In[6]:
getUniverseSector(start, end, idx_id='IX0003')
#
# # 分析臺灣高股息指數成份股公司的產業分佈
# [Return to Menu](#menu)
# In[7]:
getUniverseSector(start, end, idx_id='IX0006')
#
# # 分析電子工業公司的產業分佈
# [Return to Menu](#menu)
# In[8]:
getUniverseSector(start, end, main_ind_c='M2300 電子工業')
#
# # 分析上市ETF成交金額
# [Return to Menu](#menu)
# In[9]:
etf = get_universe(start, end, stktp_c=['ETF', '國外ETF'], mkt=['TWSE'])
# In[10]:
df_amount = TejToolAPI.get_history_data(ticker=etf,
columns=['Value_Dollars'],
transfer_to_chinese=False,
start = '2023-01-01',
end = end
)
# In[11]:
df_top = (df_amount.
set_index(['coid','mdate']).
unstack('coid').
rolling(30).
mean().
iloc[-1].
sort_values(ascending=False)['Value_Dollars'] #['成交金額_元']
)
# In[12]:
df_top = (df_top.to_frame().
join(tejapi.get('TWN/APISTOCK')[['coid','stk_name']].
set_index('coid')).
set_index('stk_name').iloc[:,0]
)
# In[13]:
plt.figure(figsize=(8, 12), dpi=150)
bar = plt.subplot2grid((5,5), (0,0), rowspan=2, colspan=5)
df_top.nlargest(20).plot(
kind='barh',
color='b',
# rot=90,
grid=True,
ax=bar
)
bar.set_xlabel('TWD')
bar.set_ylabel('')
bar.set_title('上市ETF 過去30日的平均成交金額 Top20({})'.format(df_amount.mdate.max().strftime('%Y-%m-%d')))
# [Return to Menu](#menu)