import requests
from bs4 import BeautifulSoup
import pandas as pd
import zipfile
import requests
import io
from datetime import datetime,timedelta, date
import datetime
def getLinkDownload(text):
if text.find('/Download/BalanceposEfek') != -1:
return 1
else:
return 0
Kepemilikan saham ada yang dimiliki lokal (Local) atau asing (Foreign). Keduanya dibagi menjadi beberapa kelompok yaitu
Kita akan menggunakan fasilitas download via file zip untuk mendapatkan data kepemilikan saham di website KSEI.
URL = "https://www.ksei.co.id/archive_download/holding_composition"
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
ahref = soup.find_all("a",href=True) #mendapatkan semua tag a href
ahref_extract = [k.get("href") for k in ahref ] # extract url di dalam href
link_download = [k for k in ahref_extract if getLinkDownload(k)==1] #mengambil tag a href yang mengandung link donwload
link_download
['/Download/BalanceposEfek20210930.zip', '/Download/BalanceposEfek20210831.zip', '/Download/BalanceposEfek20210730.zip', '/Download/BalanceposEfek20210630.zip', '/Download/BalanceposEfek20210531.zip', '/Download/BalanceposEfek20210430.zip', '/Download/BalanceposEfek20210331.zip', '/Download/BalanceposEfek20210226.zip', '/Download/BalanceposEfek20210129.zip']
main_url = 'https://www.ksei.co.id'
full_url = [main_url + k for k in link_download ]
full_url
['https://www.ksei.co.id/Download/BalanceposEfek20210930.zip', 'https://www.ksei.co.id/Download/BalanceposEfek20210831.zip', 'https://www.ksei.co.id/Download/BalanceposEfek20210730.zip', 'https://www.ksei.co.id/Download/BalanceposEfek20210630.zip', 'https://www.ksei.co.id/Download/BalanceposEfek20210531.zip', 'https://www.ksei.co.id/Download/BalanceposEfek20210430.zip', 'https://www.ksei.co.id/Download/BalanceposEfek20210331.zip', 'https://www.ksei.co.id/Download/BalanceposEfek20210226.zip', 'https://www.ksei.co.id/Download/BalanceposEfek20210129.zip']
Setelah pattern berhasil didapatkan, maka selanjutnya melakukan download dan import data
ls_kepemilikan = []
for k in full_url:
r = requests.get(k) #request
z = zipfile.ZipFile(io.BytesIO(r.content)) #download dalam bentuk zip
filename = z.filelist[0].filename #mendapatkan nama file utk digunakan sebagai parameter input data
z.extractall() #extract file zip
df = pd.read_csv(filename,sep='|') #read data csv
ls_kepemilikan.append(df)
df_kepemilikan = pd.concat(ls_kepemilikan)
# kolom Total menunjukkan total local, kolom Total.1 menunjukkan total foreign
df_kepemilikan.rename(columns={"Total":"Total Local",
"Total.1":"Total Foreign"},inplace=True)
df_kepemilikan.head()
Date | Code | Type | Sec. Num | Price | Local IS | Local CP | Local PF | Local IB | Local ID | ... | Foreign IS | Foreign CP | Foreign PF | Foreign IB | Foreign ID | Foreign MF | Foreign SC | Foreign FD | Foreign OT | Total Foreign | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 30-SEP-2021 | AALI | EQUITY | 1924688333 | 9775 | 98046648.0 | 1.282314e+07 | 24024315 | 49700 | 112262713 | ... | 1722373 | 4502836 | 7364706 | 17082815 | 1966864 | 32411811 | 26161535 | 208822 | 5760536 | 97182298 |
1 | 30-SEP-2021 | ABBA | EQUITY | 2755125000 | 535 | 42084400.0 | 1.724088e+09 | 1390500 | 0 | 485142947 | ... | 0 | 20400 | 0 | 37158500 | 3684300 | 0 | 1900 | 0 | 0 | 40865100 |
2 | 30-SEP-2021 | ABDA | EQUITY | 620806680 | 6950 | 21885.0 | 4.754088e+07 | 0 | 98 | 47918272 | ... | 0 | 426136555 | 0 | 97403500 | 400 | 0 | 0 | 0 | 62 | 523540517 |
3 | 30-SEP-2021 | ABMM | EQUITY | 2753165000 | 1200 | 2277000.0 | 7.030500e+06 | 1600 | 0 | 66518400 | ... | 0 | 57406700 | 0 | 7628100 | 411600 | 10351700 | 0 | 0 | 452514400 | 528312500 |
4 | 30-SEP-2021 | ACES | EQUITY | 17150000000 | 1270 | 278775522.0 | 1.039961e+08 | 19573511 | 0 | 318630511 | ... | 20220800 | 602545467 | 1243693406 | 464664725 | 955700 | 2218346946 | 176680654 | 917900 | 1136360191 | 5864385789 |
5 rows × 25 columns
datelist = list(set(df_kepemilikan['Date']))
fixed_date = []
for k in datelist:
day = int(k.split('-')[0])
month = int(datetime.datetime.strptime(str(k.split('-')[1]), "%b").month)
year = int(k.split('-')[2])
date = datetime.date(year, month, day)
fixed_date.append([k,date])
df_fixed_date = pd.DataFrame(fixed_date)
df_fixed_date.columns = ['Date','Date_Format']
df = pd.merge(df_kepemilikan,df_fixed_date,on='Date',how='inner')
df['Date'] = df['Date_Format']
df.drop('Date_Format',axis=1,inplace=True)
df.head()
Date | Code | Type | Sec. Num | Price | Local IS | Local CP | Local PF | Local IB | Local ID | ... | Foreign IS | Foreign CP | Foreign PF | Foreign IB | Foreign ID | Foreign MF | Foreign SC | Foreign FD | Foreign OT | Total Foreign | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2021-09-30 | AALI | EQUITY | 1924688333 | 9775 | 98046648.0 | 1.282314e+07 | 24024315 | 49700 | 112262713 | ... | 1722373 | 4502836 | 7364706 | 17082815 | 1966864 | 32411811 | 26161535 | 208822 | 5760536 | 97182298 |
1 | 2021-09-30 | ABBA | EQUITY | 2755125000 | 535 | 42084400.0 | 1.724088e+09 | 1390500 | 0 | 485142947 | ... | 0 | 20400 | 0 | 37158500 | 3684300 | 0 | 1900 | 0 | 0 | 40865100 |
2 | 2021-09-30 | ABDA | EQUITY | 620806680 | 6950 | 21885.0 | 4.754088e+07 | 0 | 98 | 47918272 | ... | 0 | 426136555 | 0 | 97403500 | 400 | 0 | 0 | 0 | 62 | 523540517 |
3 | 2021-09-30 | ABMM | EQUITY | 2753165000 | 1200 | 2277000.0 | 7.030500e+06 | 1600 | 0 | 66518400 | ... | 0 | 57406700 | 0 | 7628100 | 411600 | 10351700 | 0 | 0 | 452514400 | 528312500 |
4 | 2021-09-30 | ACES | EQUITY | 17150000000 | 1270 | 278775522.0 | 1.039961e+08 | 19573511 | 0 | 318630511 | ... | 20220800 | 602545467 | 1243693406 | 464664725 | 955700 | 2218346946 | 176680654 | 917900 | 1136360191 | 5864385789 |
5 rows × 25 columns
Pada tahapan ini kita sudah mulai melakukan ekslorasi data sesuai yang kita inginkan
Kita akan menacari berapa persentase asing dan lokal untuk masing masing emiten selama beberapa bulan
df['Total All'] = df['Total Local'] + df['Total Foreign']
df['persentase_lokal'] = 100*df['Total Local']/df['Total All']
df['persentase_asing'] = 100*df['Total Foreign']/df['Total All']
df1 = df[['Date','Type','Code','Price','persentase_lokal','persentase_asing']]
df1 = df1[df1.Type=='EQUITY']
desired_emiten = ['ACES','ADRO','ANTM','ASII','BRIS','CPIN','ICBP',
'INDF','JSMR','KAEF','KLBF','TLKM','UNVR','WIKA',
'BBCA','BBNI','BMRI','BUKA','EMTK']
df_filter1 = df1[df1.Code.isin(desired_emiten)].sort_values('persentase_asing',ascending=False)
df_filter1.sort_values('Date',ascending=True,inplace=True)
df_filter1[df_filter1.Code=='TLKM']
Date | Type | Code | Price | persentase_lokal | persentase_asing | |
---|---|---|---|---|---|---|
18117 | 2021-01-29 | EQUITY | TLKM | 3110 | 29.663063 | 70.336937 |
15933 | 2021-02-26 | EQUITY | TLKM | 3490 | 28.820007 | 71.179993 |
13747 | 2021-03-31 | EQUITY | TLKM | 3420 | 28.128730 | 71.871270 |
11565 | 2021-04-30 | EQUITY | TLKM | 3200 | 28.176963 | 71.823037 |
9402 | 2021-05-31 | EQUITY | TLKM | 3440 | 27.915379 | 72.084621 |
7246 | 2021-06-30 | EQUITY | TLKM | 3150 | 27.244481 | 72.755519 |
5076 | 2021-07-30 | EQUITY | TLKM | 3240 | 27.140362 | 72.859638 |
2911 | 2021-08-31 | EQUITY | TLKM | 3400 | 26.457086 | 73.542914 |
719 | 2021-09-30 | EQUITY | TLKM | 3690 | 25.585615 | 74.414385 |
df_filter1.head()
Date | Type | Code | Price | persentase_lokal | persentase_asing | |
---|---|---|---|---|---|---|
4372 | 2021-07-30 | EQUITY | ACES | 1320 | 12.912900 | 87.087100 |
2205 | 2021-08-31 | EQUITY | ACES | 1390 | 13.281219 | 86.718781 |
17608 | 2021-01-29 | EQUITY | CPIN | 5750 | 13.359545 | 86.640455 |
15423 | 2021-02-26 | EQUITY | CPIN | 6150 | 13.748108 | 86.251892 |
187 | 2021-09-30 | EQUITY | CPIN | 6425 | 14.298519 | 85.701481 |
df.to_csv('DataKepemilikanSaham.csv',index=False)