!pip install wrds
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Collecting wrds Downloading wrds-3.1.1-py3-none-any.whl (12 kB) Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.7/dist-packages (from wrds) (1.4.37) Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from wrds) (1.21.6) Collecting mock Downloading mock-4.0.3-py3-none-any.whl (28 kB) Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from wrds) (1.3.5) Collecting psycopg2-binary Downloading psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB) |████████████████████████████████| 3.0 MB 6.6 MB/s Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->wrds) (2.8.2) Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->wrds) (2022.1) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->wrds) (1.15.0) Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.7/dist-packages (from sqlalchemy->wrds) (1.1.2) Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from sqlalchemy->wrds) (4.11.4) Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->sqlalchemy->wrds) (4.1.1) Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->sqlalchemy->wrds) (3.8.0) Installing collected packages: psycopg2-binary, mock, wrds Successfully installed mock-4.0.3 psycopg2-binary-2.9.3 wrds-3.1.1
import wrds
# Now I am connecting to WRDS site - you must ahve a WRDS account user id and password
conn=wrds.Connection()
Enter your WRDS username [root]:hy11 Enter your password:·········· WRDS recommends setting up a .pgpass file. Create .pgpass file now [y/n]?: y Created .pgpass file successfully. Loading library list... Done
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
help(wrds)
Help on package wrds: NAME wrds DESCRIPTION WRDS Python Data Access Library ============================== WRDS-Py is a library for extracting data from WRDS data sources and getting it into Pandas. >>> import wrds >>> db = wrds.Connection() >>> db.list_libraries() ['aha', 'aha_sample', 'ahasamp', 'audit', 'audit_audit_comp', ...] >>> db.list_tables(library='crsp') ['acti', 'asia', 'asib', 'asic', 'asio', 'asix', 'bmdebt', 'bmheader', ...] >>> data = db.raw_sql('SELECT * FROM crsp.stocknames', index_col='permno') >>> data.head() permco namedt nameenddt cusip ncusip ticker permno 10000.0 7952.0 1986-01-07 1987-06-11 68391610 68391610 OMFGA 10001.0 7953.0 1986-01-09 1993-11-21 36720410 39040610 GFGC 10001.0 7953.0 1993-11-22 2008-02-04 36720410 29274A10 EWST 10001.0 7953.0 2008-02-05 2009-08-03 36720410 29274A20 EWST 10001.0 7953.0 2009-08-04 2009-12-17 36720410 29269V10 EGAS ... PACKAGE CONTENTS sql test DATA __copyright__ = '2017-2021 Wharton Research Data Services' __title__ = 'wrds-py' VERSION 3.1.1 AUTHOR Wharton Research Data Services FILE /usr/local/lib/python3.7/dist-packages/wrds/__init__.py
#listing all dtabases or libraries in WRDS
conn.list_libraries()
['compbd', 'compgd', 'compnad', 'compsegd', 'crspa', 'evts', 'issm', 'nastraq', 'tass', 'wrds_lib_internal', 'aha', 'estimize', 'levin', 'ciqsamp_capstrct', 'toyo', 'ahasamp', 'ciqsamp_pplintel', 'crspm', 'tresgsmp', 'trsdcgs', 'columnar', 'trws', 'lspd', 'twoiq', 'wcai', 'tresg', 'wind', 'wqa', 'msrb_all', 'custom_jl', 'risk', 'centris', 'wrdsapps_link_supplychain', 'block_all', 'zacks', 'crsp', 'contrib', 'audit', 'boardex_trial', 'emdb', 'calcbnch', 'comp_na_monthly_all', 'auditsmp', 'boardsmp', 'comp_segments_hist', 'wrdsrpts_rep_usage', 'public_all', 'pwt_all', 'msfanly', 'etfg', 'snlsamp_fig', 'wrdsapps_backtest_plus', 'pacap', 'ciqsamp_transcripts', 'ciq', 'tr_ds', 'ppublica', 'crsp_a_stock', 'preqin', 'etfg_samp', 'fisdsamp', 'windsmp', 'rent', 'sustainalyticssamp_all', 'wrdsapps_evtstudy_int_ginsight', 'compmcur', 'tr_dealscan', 'markit', 'secsamp_all', 'rpna', 'ibes', 'rpa', 'sdcsamp', 'wrdssec_midas', 'cisdmsmp', 'cboe_all', 'snl', 'cboe', 'sprat', 'ciqsamp', 'fjc_litigation', 'trucost', 'sustain', 'trdssamp', 'risksamp', 'midas', 'snlsamp', 'public', 'pwt', 'repsamp', 'snapsamp', 'taqsamp', 'totalq', 'optionm', 'comp_execucomp', 'fjc_linking', 'compa', 'bank', 'blab', 'compb', 'compg', 'block', 'taqmsec', 'trsamp_ds_eq', 'contrib_ceo_turnover', 'cisdm', 'clrvt', 'clrvtsmp', 'compm', 'compseg', 'compsamp', 'comph', 'compsnap', 'comscore', 'pitchbk', 'crspsamp', 'dmef', 'eursamp', 'contrib_kpss', 'auditsmp_all', 'crspq', 'execcomp', 'taqmsamp_all', 'ff', 'ftse', 'cusipm', 'boardex', 'eureka', 'eventus', 'ftsesamp', 'ginsight', 'ktsamp', 'factset', 'ifgrsamp', 'imssamp', 'gmi', 'fssamp', 'govpxsmp', 'hbsamp', 'hbase', 'hfr', 'ibescorp', 'mrktsamp_msf', 'iri', 'govpx', 'kld', 'ifgr', 'ibeskpi', 'fisd', 'lspdsamp', 'ims', 'ktmine', 'taq', 'aha_sample', 'compdcur', 'calcbench_trial', 'ciqsamp_common', 'ppubsamp_d4d', 'mrktsamp', 'otc', 'msrbsamp', 'contrib_general', 'preqsamp_all', 'twoiq_samp', 'msrb', 'mrktsamp_cdx', 'phlx_all', 'zacksamp_all', 'mfl', 'sustsamp', 'etfgsamp', 'phlx', 'ppubsamp', 'preqsamp', 'ciqsamp_keydev', 'tfn', 'toyosamp', 'wappsamp', 'wrdsapps_evtstudy_lr', 'crsp_a_ccm', 'zacksamp', 'revere', 'eurekahedge_sample', 'comp_na_daily_all', 'ims_obp_trial', 'wrdsrpts', 'compsamp_snapshot', 'comp_na_annual_all', 'rq_all', 'infogroupsamp_business', 'infogroupsamp_residential', 'ktmine_patents_samp', 'djones_all', 'doe_all', 'factsamp_all', 'toyosamp_all', 'wrdsapps_eushort', 'wrdsapps_subsidiary', 'wrdsappssamp_all', 'wrdsapps_link_datastream_wscope', 'wrdsapps_patents', 'dmef_all', 'macrofin_comm_trade', 'mrktsamp_cds', 'pitchsmp', 'wrdsapps_link_comp_eushort', 'totalq_all', 'trsamp_dscom', 'trsamp_dsecon', 'wrdsapps_finratio', 'reprisk', 'comp', 'trcstsmp', 'rpnasamp', 'audit_corp_legal', 'csmar', 'trsamp_dsfut', 'wrdsapps_backtest_basic', 'wrdsapps_link_crsp_bond', 'ff_all', 'wrdsapps_link_crsp_taq', 'contrib_intangible_value', 'wrdssec', 'wrdsapps', 'audit_common', 'contrib_char_returns', 'crsp_q_indexhist', 'bvd', 'secsamp', 'factsamp_revere', 'frb_all', 'hbase_sample', 'hfrsamp_hfrdb', 'trown', 'lvnsamp_all', 'risksamp_all', 'optionmsamp_europe', 'optionmsamp_us', 'reprisk_sample', 'wrdsapps_finratio_ccm', 'trace_standard', 'trdstrm', 'trace_enhanced', 'fjc', 'lvnsamp', 'omtrial', 'trace', 'bvdsamp', 'djones', 'doe', 'frb', 'hfrsamp', 'macrofin', 'comp_bank', 'trsamp', 'wrdsapps_link_crsp_factset', 'twoiqsmp', 'comp_bank_daily', 'audit_audit_comp', 'comp_segments_hist_daily', 'otc_endofday', 'dealscan', 'ravenpack_trial', 'msfinst', 'crsp_a_indexes', 'taqmsamp', 'iss', 'sdc']
#listing CRSP files
conn.list_tables(library="crspa")
['acti', 'asia', 'asib', 'asic', 'asio', 'asix', 'bmdebt', 'bmheader', 'bmpaymts', 'bmquotes', 'bmyield', 'bndprt06', 'bndprt12', 'bxcalind', 'bxdlyind', 'bxmthind', 'bxquotes', 'bxyield', 'ccm_lookup', 'ccm_qvards', 'ccmxpf_linktable', 'ccmxpf_lnkhist', 'ccmxpf_lnkrng', 'ccmxpf_lnkused', 'comphead', 'comphist', 'compmaster', 'crsp_daily_data', 'crsp_header', 'crsp_monthly_data', 'crsp_names', 'crsp_ziman_daily_index', 'crsp_ziman_monthly_index', 'cs20yr', 'cs5yr', 'cs90d', 'cst_hist', 'dport1', 'dport2', 'dport3', 'dport4', 'dport5', 'dport6', 'dport7', 'dport8', 'dport9', 'dsbc', 'dsbo', 'dse', 'dse62', 'dse62delist', 'dse62dist', 'dse62exchdates', 'dse62names', 'dse62nasdin', 'dse62shares', 'dseall', 'dseall62', 'dsedelist', 'dsedist', 'dseexchdates', 'dsenames', 'dsenasdin', 'dseshares', 'dsf', 'dsf62', 'dsfhdr', 'dsfhdr62', 'dsi', 'dsi62', 'dsia', 'dsib', 'dsic', 'dsio', 'dsir', 'dsix', 'dsiy', 'dsp500', 'dsp500list', 'dsp500p', 'dssc', 'dsso', 'erdport1', 'erdport2', 'erdport3', 'erdport4', 'erdport5', 'erdport6', 'erdport7', 'erdport8', 'erdport9', 'ermport1', 'ermport2', 'ermport3', 'ermport4', 'ermport5', 'fbpri', 'fbyld', 'fwdask06', 'fwdask12', 'fwdave06', 'fwdave12', 'fwdbid06', 'fwdbid12', 'hldask06', 'hldask12', 'hldave06', 'hldave12', 'hldbid06', 'hldbid12', 'index_type_map', 'mbi', 'mbmdat', 'mbmhdr', 'mbx', 'mbxid', 'mcti', 'mhista', 'mhistn', 'mhistq', 'mport1', 'mport2', 'mport3', 'mport4', 'mport5', 'mse', 'mse62', 'mse62delist', 'mse62dist', 'mse62exchdates', 'mse62names', 'mse62nasdin', 'mse62shares', 'mseall', 'mseall62', 'msedelist', 'msedist', 'mseexchdates', 'msenames', 'msenasdin', 'mseshares', 'msf', 'msf62', 'msfhdr', 'msfhdr62', 'msi', 'msi62', 'msia', 'msib', 'msic', 'msio', 'msir', 'msix', 'msiy', 'msp500', 'msp500list', 'msp500p', 'priask06', 'priask12', 'priave06', 'priave12', 'pribid06', 'pribid12', 'price_type', 'property_type', 'qcti', 'qsia', 'qsib', 'qsic', 'qsio', 'qsix', 'rebala', 'rebaln', 'rebalq', 'reit_type', 'riskfree', 's6z_del', 's6z_dind', 's6z_dis', 's6z_dp_dly', 's6z_ds_dly', 's6z_hdr', 's6z_indhdr', 's6z_mdel', 's6z_mind', 's6z_mth', 's6z_nam', 's6z_ndi', 's6z_shr', 'saz_del', 'saz_dind', 'saz_dis', 'saz_dp_dly', 'saz_ds_dly', 'saz_hdr', 'saz_indhdr', 'saz_mdel', 'saz_mind', 'saz_mth', 'saz_nam', 'saz_ndi', 'saz_shr', 'sechead', 'sechist', 'sfz_dind', 'sfz_indhdr', 'sfz_mbr', 'sfz_mind', 'sfz_portd', 'sfz_portm', 'sfz_rb', 'stock_qvards', 'stocknames', 'stocknames62', 'sub_property_type', 'tfz_dly', 'tfz_dly_cd', 'tfz_dly_cpi', 'tfz_dly_ft', 'tfz_dly_rf2', 'tfz_dly_ts2', 'tfz_idx', 'tfz_iss', 'tfz_mast', 'tfz_mth', 'tfz_mth_bp', 'tfz_mth_cd', 'tfz_mth_cpi', 'tfz_mth_fb', 'tfz_mth_ft', 'tfz_mth_rf', 'tfz_mth_rf2', 'tfz_mth_ts', 'tfz_mth_ts2', 'tfz_pay', 'yldask06', 'yldask12', 'yldave06', 'yldave12', 'yldbid06', 'yldbid12', 'ziman_reit_info', 'zr_hdrnames']
#listing CRSP dsf file or table
conn.describe_table('crsp', 'dsf')
Approximately 101070400 rows in crsp.dsf.
name | nullable | type | |
---|---|---|---|
0 | cusip | True | VARCHAR(8) |
1 | permno | True | DOUBLE_PRECISION |
2 | permco | True | DOUBLE_PRECISION |
3 | issuno | True | DOUBLE_PRECISION |
4 | hexcd | True | DOUBLE_PRECISION |
5 | hsiccd | True | DOUBLE_PRECISION |
6 | date | True | DATE |
7 | bidlo | True | DOUBLE_PRECISION |
8 | askhi | True | DOUBLE_PRECISION |
9 | prc | True | DOUBLE_PRECISION |
10 | vol | True | DOUBLE_PRECISION |
11 | ret | True | DOUBLE_PRECISION |
12 | bid | True | DOUBLE_PRECISION |
13 | ask | True | DOUBLE_PRECISION |
14 | shrout | True | DOUBLE_PRECISION |
15 | cfacpr | True | DOUBLE_PRECISION |
16 | cfacshr | True | DOUBLE_PRECISION |
17 | openprc | True | DOUBLE_PRECISION |
18 | numtrd | True | DOUBLE_PRECISION |
19 | retx | True | DOUBLE_PRECISION |
#listing CRSP dsf file or table
conn.describe_table('crspm', 'dsfhdr')
Approximately 36564 rows in crspm.dsfhdr.
name | nullable | type | |
---|---|---|---|
0 | permno | True | DOUBLE_PRECISION |
1 | permco | True | DOUBLE_PRECISION |
2 | hshrcd | True | DOUBLE_PRECISION |
3 | dlstcd | True | DOUBLE_PRECISION |
4 | hcusip | True | VARCHAR(8) |
5 | htick | True | VARCHAR(8) |
6 | hcomnam | True | VARCHAR(35) |
7 | htsymbol | True | VARCHAR(10) |
8 | hnaics | True | VARCHAR(7) |
9 | hprimexc | True | VARCHAR(1) |
10 | htrdstat | True | VARCHAR(1) |
11 | hsecstat | True | VARCHAR(1) |
12 | cusip | True | VARCHAR(8) |
13 | compno | True | DOUBLE_PRECISION |
14 | issuno | True | DOUBLE_PRECISION |
15 | hexcd | True | DOUBLE_PRECISION |
16 | hsiccd | True | DOUBLE_PRECISION |
17 | numnam | True | DOUBLE_PRECISION |
18 | numdis | True | DOUBLE_PRECISION |
19 | numshr | True | DOUBLE_PRECISION |
20 | numdel | True | DOUBLE_PRECISION |
21 | numndi | True | DOUBLE_PRECISION |
22 | begdat | True | DATE |
23 | enddat | True | DATE |
24 | begprc | True | DATE |
25 | endprc | True | DATE |
26 | begret | True | DATE |
27 | endret | True | DATE |
28 | begrtx | True | DATE |
29 | endrtx | True | DATE |
30 | begbidlo | True | DATE |
31 | endbidlo | True | DATE |
32 | begaskhi | True | DATE |
33 | endaskhi | True | DATE |
34 | begvol | True | DATE |
35 | endvol | True | DATE |
36 | begbid | True | DATE |
37 | endbid | True | DATE |
38 | begask | True | DATE |
39 | endask | True | DATE |
40 | begopr | True | DATE |
41 | endopr | True | DATE |
42 | hsicmg | True | DOUBLE_PRECISION |
43 | hsicig | True | DOUBLE_PRECISION |
#listing CRSP dsf file or table
conn.describe_table('crsp', 'dsfhdr')
Approximately 36437 rows in crsp.dsfhdr.
name | nullable | type | |
---|---|---|---|
0 | permno | True | DOUBLE_PRECISION |
1 | permco | True | DOUBLE_PRECISION |
2 | hshrcd | True | DOUBLE_PRECISION |
3 | dlstcd | True | DOUBLE_PRECISION |
4 | hcusip | True | VARCHAR(8) |
5 | htick | True | VARCHAR(8) |
6 | hcomnam | True | VARCHAR(35) |
7 | htsymbol | True | VARCHAR(10) |
8 | hnaics | True | VARCHAR(7) |
9 | hprimexc | True | VARCHAR(1) |
10 | htrdstat | True | VARCHAR(1) |
11 | hsecstat | True | VARCHAR(1) |
12 | cusip | True | VARCHAR(8) |
13 | compno | True | DOUBLE_PRECISION |
14 | issuno | True | DOUBLE_PRECISION |
15 | hexcd | True | DOUBLE_PRECISION |
16 | hsiccd | True | DOUBLE_PRECISION |
17 | numnam | True | DOUBLE_PRECISION |
18 | numdis | True | DOUBLE_PRECISION |
19 | numshr | True | DOUBLE_PRECISION |
20 | numdel | True | DOUBLE_PRECISION |
21 | numndi | True | DOUBLE_PRECISION |
22 | begdat | True | DATE |
23 | enddat | True | DATE |
24 | begprc | True | DATE |
25 | endprc | True | DATE |
26 | begret | True | DATE |
27 | endret | True | DATE |
28 | begrtx | True | DATE |
29 | endrtx | True | DATE |
30 | begbidlo | True | DATE |
31 | endbidlo | True | DATE |
32 | begaskhi | True | DATE |
33 | endaskhi | True | DATE |
34 | begvol | True | DATE |
35 | endvol | True | DATE |
36 | begbid | True | DATE |
37 | endbid | True | DATE |
38 | begask | True | DATE |
39 | endask | True | DATE |
40 | begopr | True | DATE |
41 | endopr | True | DATE |
42 | hsicmg | True | DOUBLE_PRECISION |
43 | hsicig | True | DOUBLE_PRECISION |
conn.describe_table('crsp', 'dse')
Approximately 12127940 rows in crsp.dse.
name | nullable | type | |
---|---|---|---|
0 | event | True | VARCHAR(8) |
1 | date | True | DATE |
2 | hsicmg | True | DOUBLE_PRECISION |
3 | hsicig | True | DOUBLE_PRECISION |
4 | comnam | True | VARCHAR(32) |
5 | cusip | True | VARCHAR(8) |
6 | dclrdt | True | DATE |
7 | dlamt | True | DOUBLE_PRECISION |
8 | dlpdt | True | DATE |
9 | dlstcd | True | DOUBLE_PRECISION |
10 | hsiccd | True | DOUBLE_PRECISION |
11 | issuno | True | DOUBLE_PRECISION |
12 | ncusip | True | VARCHAR(8) |
13 | nextdt | True | DATE |
14 | paydt | True | DATE |
15 | rcrddt | True | DATE |
16 | shrcls | True | VARCHAR(1) |
17 | shrflg | True | DOUBLE_PRECISION |
18 | ticker | True | VARCHAR(5) |
19 | permno | True | DOUBLE_PRECISION |
20 | nameendt | True | DATE |
21 | shrcd | True | DOUBLE_PRECISION |
22 | exchcd | True | DOUBLE_PRECISION |
23 | siccd | True | DOUBLE_PRECISION |
24 | tsymbol | True | VARCHAR(10) |
25 | naics | True | VARCHAR(7) |
26 | primexch | True | VARCHAR(1) |
27 | trdstat | True | VARCHAR(1) |
28 | secstat | True | VARCHAR(1) |
29 | permco | True | DOUBLE_PRECISION |
30 | compno | True | DOUBLE_PRECISION |
31 | hexcd | True | DOUBLE_PRECISION |
32 | distcd | True | DOUBLE_PRECISION |
33 | divamt | True | DOUBLE_PRECISION |
34 | facpr | True | DOUBLE_PRECISION |
35 | facshr | True | DOUBLE_PRECISION |
36 | acperm | True | DOUBLE_PRECISION |
37 | accomp | True | DOUBLE_PRECISION |
38 | nwperm | True | DOUBLE_PRECISION |
39 | nwcomp | True | DOUBLE_PRECISION |
40 | dlretx | True | DOUBLE_PRECISION |
41 | dlprc | True | DOUBLE_PRECISION |
42 | dlret | True | DOUBLE_PRECISION |
43 | shrout | True | DOUBLE_PRECISION |
44 | shrenddt | True | DATE |
45 | trtscd | True | DOUBLE_PRECISION |
46 | trtsendt | True | DATE |
47 | nmsind | True | DOUBLE_PRECISION |
48 | mmcnt | True | DOUBLE_PRECISION |
49 | nsdinx | True | DOUBLE_PRECISION |
conn.describe_table('crspm', 'dsf')
Approximately 101263896 rows in crspm.dsf.
name | nullable | type | |
---|---|---|---|
0 | cusip | True | VARCHAR(8) |
1 | permno | True | DOUBLE_PRECISION |
2 | permco | True | DOUBLE_PRECISION |
3 | issuno | True | DOUBLE_PRECISION |
4 | hexcd | True | DOUBLE_PRECISION |
5 | hsiccd | True | DOUBLE_PRECISION |
6 | date | True | DATE |
7 | bidlo | True | DOUBLE_PRECISION |
8 | askhi | True | DOUBLE_PRECISION |
9 | prc | True | DOUBLE_PRECISION |
10 | vol | True | DOUBLE_PRECISION |
11 | ret | True | DOUBLE_PRECISION |
12 | bid | True | DOUBLE_PRECISION |
13 | ask | True | DOUBLE_PRECISION |
14 | shrout | True | DOUBLE_PRECISION |
15 | cfacpr | True | DOUBLE_PRECISION |
16 | cfacshr | True | DOUBLE_PRECISION |
17 | openprc | True | DOUBLE_PRECISION |
18 | numtrd | True | DOUBLE_PRECISION |
19 | retx | True | DOUBLE_PRECISION |
conn.describe_table('crsp', 'stocknames')
Approximately 77779 rows in crsp.stocknames.
name | nullable | type | |
---|---|---|---|
0 | permno | True | DOUBLE_PRECISION |
1 | namedt | True | DATE |
2 | nameenddt | True | DATE |
3 | shrcd | True | DOUBLE_PRECISION |
4 | exchcd | True | DOUBLE_PRECISION |
5 | siccd | True | DOUBLE_PRECISION |
6 | ncusip | True | VARCHAR(8) |
7 | ticker | True | VARCHAR(8) |
8 | comnam | True | VARCHAR(35) |
9 | shrcls | True | VARCHAR(4) |
10 | permco | True | DOUBLE_PRECISION |
11 | hexcd | True | DOUBLE_PRECISION |
12 | cusip | True | VARCHAR(8) |
13 | st_date | True | DATE |
14 | end_date | True | DATE |
15 | namedum | True | DOUBLE_PRECISION |
conn.describe_table('crsp', 'dsenames')
Approximately 110198 rows in crsp.dsenames.
name | nullable | type | |
---|---|---|---|
0 | permno | True | DOUBLE_PRECISION |
1 | namedt | True | DATE |
2 | nameendt | True | DATE |
3 | shrcd | True | DOUBLE_PRECISION |
4 | exchcd | True | DOUBLE_PRECISION |
5 | siccd | True | DOUBLE_PRECISION |
6 | ncusip | True | VARCHAR(8) |
7 | ticker | True | VARCHAR(8) |
8 | comnam | True | VARCHAR(35) |
9 | shrcls | True | VARCHAR(4) |
10 | tsymbol | True | VARCHAR(10) |
11 | naics | True | VARCHAR(7) |
12 | primexch | True | VARCHAR(1) |
13 | trdstat | True | VARCHAR(1) |
14 | secstat | True | VARCHAR(1) |
15 | permco | True | DOUBLE_PRECISION |
16 | compno | True | DOUBLE_PRECISION |
17 | issuno | True | DOUBLE_PRECISION |
18 | hexcd | True | DOUBLE_PRECISION |
19 | hsiccd | True | DOUBLE_PRECISION |
20 | cusip | True | VARCHAR(8) |
conn.describe_table('crsp', 'crsp_header')
Approximately 673 rows in crsp.crsp_header.
name | nullable | type | |
---|---|---|---|
0 | permno | True | DOUBLE_PRECISION |
1 | permco | True | DOUBLE_PRECISION |
2 | begdt | True | DATE |
3 | enddt | True | DATE |
4 | comnam | True | VARCHAR(64) |
5 | hdlstcd | True | DOUBLE_PRECISION |
CRSP is a library, and the monthly stock file CRSP.MSF is a table.
stock_names = conn.get_table(library = 'crsp', table ='stocknames',
columns=['permno', 'comnam','ticker', 'siccd','shrcd', 'namedt','nameenddt'])
stock_names.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 77779 entries, 0 to 77778 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 permno 77779 non-null float64 1 comnam 77779 non-null object 2 ticker 72881 non-null object 3 siccd 77779 non-null float64 4 shrcd 77779 non-null float64 5 namedt 77779 non-null object 6 nameenddt 77779 non-null object dtypes: float64(3), object(4) memory usage: 4.2+ MB
conn.list_tables(library="comp")
['aco_amda', 'aco_imda', 'aco_indfnta', 'aco_indfntq', 'aco_indfntytd', 'aco_indsta', 'aco_indstq', 'aco_indstytd', 'aco_notesa', 'aco_notesq', 'aco_notessa', 'aco_notesytd', 'aco_pnfnda', 'aco_pnfndq', 'aco_pnfndytd', 'aco_pnfnta', 'aco_pnfntq', 'aco_pnfntytd', 'aco_transa', 'aco_transq', 'aco_transsa', 'aco_transytd', 'adsprate', 'asec_amda', 'asec_imda', 'asec_notesa', 'asec_notesq', 'asec_transa', 'asec_transq', 'bank_aacctchg', 'bank_adesind', 'bank_afnd1', 'bank_afnd2', 'bank_afnddc1', 'bank_afnddc2', 'bank_afntind', 'bank_funda', 'bank_funda_fncd', 'bank_fundq', 'bank_fundq_fncd', 'bank_iacctchg', 'bank_idesind', 'bank_ifndq', 'bank_ifndytd', 'bank_ifntq', 'bank_ifntytd', 'bank_names', 'bank_namesq', 'chars', 'co_aacctchg', 'co_aaudit', 'co_adesind', 'co_adjfact', 'co_afnd1', 'co_afnd2', 'co_afnddc1', 'co_afnddc2', 'co_afntind1', 'co_afntind2', 'co_ainvval', 'co_amkt', 'co_busdescl', 'co_cotype', 'co_filedate', 'co_fortune', 'co_hgic', 'co_iacctchg', 'co_iaudit', 'co_idesind', 'co_ifndq', 'co_ifndsa', 'co_ifndytd', 'co_ifntq', 'co_ifntsa', 'co_ifntytd', 'co_imkt', 'co_industry', 'co_ipcd', 'co_mthly', 'co_offtitl', 'company', 'currency', 'dd_group', 'dd_group_xref', 'dd_item', 'dd_package', 'ecind_desc', 'ecind_mth', 'exrt_dly', 'exrt_mth', 'filings', 'funda', 'funda_fncd', 'fundq', 'fundq_fncd', 'g_chars', 'g_co_aaudit', 'g_co_adesind', 'g_co_afnd1', 'g_co_afnd2', 'g_co_afnddc1', 'g_co_afnddc2', 'g_co_afntind1', 'g_co_afntind2', 'g_co_ainvval', 'g_co_gsuppl', 'g_co_hgic', 'g_co_iaudit', 'g_co_idesind', 'g_co_ifndq', 'g_co_ifndsa', 'g_co_ifndytd', 'g_co_ifntq', 'g_co_ifntsa', 'g_co_ifntytd', 'g_co_industry', 'g_co_ipcd', 'g_co_offtitl', 'g_company', 'g_currency', 'g_ecind_desc', 'g_ecind_mth', 'g_exrt_dly', 'g_exrt_mth', 'g_funda', 'g_funda_fncd', 'g_fundq', 'g_fundq_fncd', 'g_idx_daily', 'g_idx_index', 'g_idx_mth', 'g_idxcst_his', 'g_names', 'g_names_ix', 'g_names_ix_cst', 'g_namesq', 'g_sec_adesind', 'g_sec_adjfact', 'g_sec_afnd', 'g_sec_afnddc', 'g_sec_afnt', 'g_sec_divid', 'g_sec_dprc', 'g_sec_dtrt', 'g_sec_history', 'g_sec_idesind', 'g_sec_ifnd', 'g_sec_ifnt', 'g_sec_split', 'g_secd', 'g_secnamesd', 'g_security', 'g_sedolgvkey', 'g_tmptable_pkg6153_tbl4023', 'idx_ann', 'idx_anndes', 'idx_daily', 'idx_index', 'idx_mth', 'idx_qrt', 'idx_qrtdes', 'idxcst_his', 'io_qaggregate', 'io_qbuysell', 'io_qchanges', 'io_qfloatadj', 'io_qholders', 'it_mbuysell', 'it_msummary', 'it_r_rltn', 'names', 'names_aco_indsta', 'names_aco_indstq', 'names_aco_pnfnda', 'names_aco_pnfndq', 'names_adsprate', 'names_ix', 'names_ix_cst', 'names_seg', 'namesd', 'namesm', 'namesq', 'r_accstd', 'r_acqmeth', 'r_auditors', 'r_auopic', 'r_balpres', 'r_cf_formt', 'r_co_status', 'r_coindpre', 'r_compstat', 'r_consol', 'r_country', 'r_cstclscd', 'r_datacode', 'r_datafmt', 'r_divtaxmarker', 'r_docsrce', 'r_ex_codes', 'r_exchgtier', 'r_exrt_typ', 'r_fndfntcd', 'r_footnts', 'r_foricd', 'r_giccd', 'r_hcalendr', 'r_idxclscd', 'r_inactvcd', 'r_incstats', 'r_indfmt', 'r_indsec', 'r_invval', 'r_issuetyp', 'r_majidxcl', 'r_mic_codes', 'r_naiccd', 'r_notetype', 'r_ntsubtype', 'r_offcrso', 'r_ogmethod', 'r_opinions', 'r_prc_stat', 'r_qsrcdoc', 'r_sec_stat', 'r_secannfn', 'r_sectors', 'r_siccd', 'r_spiicd', 'r_spmicd', 'r_statalrt', 'r_states', 'r_stko', 'r_titles', 'r_updates', 'sec_adesind', 'sec_adjfact', 'sec_afnd', 'sec_afnddc', 'sec_afnt', 'sec_divid', 'sec_dprc', 'sec_dtrt', 'sec_history', 'sec_idesind', 'sec_ifnd', 'sec_ifnt', 'sec_mdivfn', 'sec_mshare', 'sec_msptfn', 'sec_mth', 'sec_mthdiv', 'sec_mthprc', 'sec_mthspt', 'sec_mthtrt', 'sec_shortint', 'sec_spind', 'sec_split', 'secd', 'secm', 'security', 'sedolgvkey', 'seg_ann', 'seg_annfund', 'seg_customer', 'seg_geo', 'seg_naics', 'seg_product', 'seg_type', 'spidx_cst', 'spind', 'spind_dly', 'spind_mth', 'tmptable_pkg6153_tbl4023', 'wrds_seg_customer', 'wrds_seg_geo', 'wrds_seg_product', 'wrds_segmerged', 'xfl_column', 'xfl_table']
conn.describe_table('comp', 'funda')
Approximately 858064 rows in comp.funda.
name | nullable | type | |
---|---|---|---|
0 | gvkey | True | VARCHAR(6) |
1 | datadate | True | DATE |
2 | fyear | True | DOUBLE_PRECISION |
3 | indfmt | True | VARCHAR(12) |
4 | consol | True | VARCHAR(2) |
... | ... | ... | ... |
943 | au | True | VARCHAR(8) |
944 | auop | True | VARCHAR(8) |
945 | auopic | True | VARCHAR(1) |
946 | ceoso | True | VARCHAR(1) |
947 | cfoso | True | VARCHAR(1) |
948 rows × 3 columns
firm_names = conn.get_table(library = 'comp', table ='funda',
columns=['gvkey', 'fyear','datadate', 'tic','conm'])
firm_names.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 858064 entries, 0 to 358063 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 gvkey 858064 non-null object 1 fyear 857868 non-null float64 2 datadate 858064 non-null object 3 tic 857836 non-null object 4 conm 858064 non-null object dtypes: float64(1), object(4) memory usage: 39.3+ MB
help(conn.raw_sql)
Help on method raw_sql in module wrds.sql: raw_sql(sql, coerce_float=True, date_cols=None, index_col=None, params=None, chunksize=500000, return_iter=False) method of wrds.sql.Connection instance Queries the database using a raw SQL string. :param sql: SQL code in string object. :param coerce_float: (optional) boolean, default: True Attempt to convert values to non-string, non-numeric objects to floating point. Can result in loss of precision. :param date_cols: (optional) list or dict, default: None - List of column names to parse as date - Dict of ``{column_name: format string}`` where format string is: strftime compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps - Dict of ``{column_name: arg dict}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime` :param index_col: (optional) string or list of strings, default: None Column(s) to set as index(MultiIndex) :param params: parameters to SQL query, if parameterized. :param chunksize: (optional) integer or None default: 500000 Process query in chunks of this size. Smaller chunksizes can save a considerable amount of memory while query is being processed. Set to None run query w/o chunking. :param return_iter: (optional) boolean, default:False When chunksize is not None, return an iterator where chunksize number of rows is included in each chunk. :rtype: pandas.DataFrame or or Iterator[pandas.DataFrame] Usage :: # Basic Usage >>> data = db.raw_sql('select cik, fdate, coname from wrdssec_all.dforms;', date_cols=['fdate'], index_col='cik') >>> data.head() cik fdate coname 0000000003 1995-02-15 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y... 0000000003 1996-02-14 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y... 0000000003 1997-02-19 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y... 0000000003 1998-03-02 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y... 0000000003 1998-03-10 DEFINED ASSET FUNDS MUNICIPAL INVT TR FD NEW Y.. ... # Parameterized SQL query >>> parm = {'syms': ('A', 'AA', 'AAPL'), 'num_shares': 50000} >>> data = db.raw_sql('select * from taqmsec.ctm_20030910 where sym_root in %(syms)s and size > %(num_shares)s', params=parm) >>> data.head() date time_m ex sym_root sym_suffix tr_scond size price tr_stopind tr_corr tr_seqnum tr_source tr_rf 2003-09-10 11:02:09.485000 T A None None 211400.0 25.350 N 00 1.929952e+15 C None 2003-09-10 11:04:29.508000 N A None None 55500.0 25.180 N 00 1.929952e+15 C None 2003-09-10 15:08:21.155000 N A None None 50500.0 24.470 N 00 1.929967e+15 C None 2003-09-10 16:10:35.522000 T A None B 71900.0 24.918 N 00 1.929970e+15 C None 2003-09-10 09:35:20.709000 N AA None None 108100.0 28.200 N 00 1.929947e+15 C None
crsp = conn.raw_sql("""
SELECT permno, date, cusip, permco, ret, vol, shrout, prc, cfacpr, cfacshr
FROM crsp.msf
WHERE date BETWEEN '01/01/2020' AND '12/31/2021'
""", date_cols = ['date'])
print(crsp.head(10))
print(crsp.info())
permno date cusip permco ret vol shrout \ 0 10026.0 2020-01-31 46603210 7976.0 -0.100016 22433.0 18919.0 1 10028.0 2020-01-31 29402E10 7978.0 0.607407 27373.0 26924.0 2 10032.0 2020-01-31 72913210 7980.0 -0.075643 41351.0 29222.0 3 10044.0 2020-01-31 77467X10 7992.0 -0.098592 4414.0 6000.0 4 10051.0 2020-01-31 41043F20 7999.0 -0.115176 51275.0 37338.0 5 10065.0 2020-01-31 00621210 20023.0 0.005707 58175.0 105413.0 6 10066.0 2020-01-31 35518410 6331.0 NaN NaN 43809.0 7 10104.0 2020-01-31 68389X10 8045.0 -0.005474 2005627.0 3207649.0 8 10107.0 2020-01-31 59491810 8048.0 0.079455 5588346.0 7606047.0 9 10113.0 2020-01-31 00768Y20 53202.0 -0.001206 5322.0 2250.0 prc cfacpr cfacshr 0 165.839996 1.0 1.0 1 2.170000 1.0 1.0 2 71.120003 1.0 1.0 3 8.320000 1.0 1.0 4 24.430000 1.0 1.0 5 15.860000 1.0 1.0 6 NaN 1.0 1.0 7 52.450001 1.0 1.0 8 170.229996 1.0 1.0 9 53.825001 1.0 1.0 <class 'pandas.core.frame.DataFrame'> RangeIndex: 200262 entries, 0 to 200261 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 permno 200262 non-null float64 1 date 200262 non-null datetime64[ns] 2 cusip 200262 non-null object 3 permco 200262 non-null float64 4 ret 193846 non-null float64 5 vol 197238 non-null float64 6 shrout 197797 non-null float64 7 prc 196431 non-null float64 8 cfacpr 197797 non-null float64 9 cfacshr 197797 non-null float64 dtypes: datetime64[ns](1), float64(8), object(1) memory usage: 15.3+ MB None
#checking data intergrity
check = {"tickers": ("MSFT", "TSLA", "COP", "AAPL", "WMT")}
checking = conn.raw_sql('select * from crsp.dse WHERE ticker in %(tickers)s', params=check)
checking
event | date | hsicmg | hsicig | comnam | cusip | dclrdt | dlamt | dlpdt | dlstcd | ... | dlretx | dlprc | dlret | shrout | shrenddt | trtscd | trtsendt | nmsind | mmcnt | nsdinx | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NAMES | 1980-12-12 | 35.0 | 357.0 | APPLE COMPUTER INC | 03783310 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
1 | NAMES | 1982-11-01 | 35.0 | 357.0 | APPLE COMPUTER INC | 03783310 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
2 | NAMES | 2004-06-10 | 35.0 | 357.0 | APPLE COMPUTER INC | 03783310 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
3 | NAMES | 2007-01-11 | 35.0 | 357.0 | APPLE INC | 03783310 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
4 | NAMES | 2017-12-28 | 35.0 | 357.0 | APPLE INC | 03783310 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
5 | NAMES | 2002-09-03 | 13.0 | 138.0 | CONOCOPHILLIPS | 20825C10 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
6 | NAMES | 2004-02-18 | 13.0 | 138.0 | CONOCOPHILLIPS | 20825C10 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
7 | NAMES | 2004-06-10 | 13.0 | 138.0 | CONOCOPHILLIPS | 20825C10 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
8 | NAMES | 2014-01-31 | 13.0 | 138.0 | CONOCOPHILLIPS | 20825C10 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
9 | NAMES | 1996-11-19 | 20.0 | 209.0 | CONSOLIDATED PRODUCTS INC | 08986R30 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
10 | NAMES | 1985-07-19 | 67.0 | 679.0 | COPLEY PROPERTY INC | 21745410 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
11 | NAMES | 1986-03-13 | 73.0 | 737.0 | MICROSOFT CORP | 59491810 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
12 | NAMES | 2004-06-10 | 73.0 | 737.0 | MICROSOFT CORP | 59491810 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
13 | NAMES | 2010-06-29 | 99.0 | 999.0 | TESLA MOTORS INC | 88160R10 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
14 | NAMES | 2017-02-02 | 99.0 | 999.0 | TESLA INC | 88160R10 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
15 | NAMES | 1972-11-20 | 53.0 | 531.0 | WAL MART STORES INC | 93114210 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
16 | NAMES | 2002-01-02 | 53.0 | 531.0 | WAL MART STORES INC | 93114210 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
17 | NAMES | 2004-06-10 | 53.0 | 531.0 | WAL MART STORES INC | 93114210 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
18 | NAMES | 2012-03-01 | 53.0 | 531.0 | WAL MART STORES INC | 93114210 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
19 | NAMES | 2014-01-07 | 53.0 | 531.0 | WAL MART STORES INC | 93114210 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
20 | NAMES | 2018-02-01 | 53.0 | 531.0 | WALMART INC | 93114210 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
21 | NAMES | 2020-11-18 | 53.0 | 531.0 | WALMART INC | 93114210 | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
22 rows × 50 columns
checking = conn.raw_sql('select * from crsp.stocknames WHERE ticker in %(tickers)s', params=check)
checking
permno | namedt | nameenddt | shrcd | exchcd | siccd | ncusip | ticker | comnam | shrcls | permco | hexcd | cusip | st_date | end_date | namedum | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10107.0 | 1986-03-13 | 2022-03-31 | 11.0 | 3.0 | 7370.0 | 59491810 | MSFT | MICROSOFT CORP | None | 8048.0 | 3.0 | 59491810 | 1986-03-13 | 2022-03-31 | 2.0 |
1 | 13928.0 | 2002-09-03 | 2004-02-17 | 11.0 | 1.0 | 1311.0 | 20825C10 | COP | CONOCOPHILLIPS | None | 21401.0 | 1.0 | 20825C10 | 1925-12-31 | 2022-03-31 | 2.0 |
2 | 13928.0 | 2004-02-18 | 2014-01-30 | 11.0 | 1.0 | 2911.0 | 20825C10 | COP | CONOCOPHILLIPS | None | 21401.0 | 1.0 | 20825C10 | 1925-12-31 | 2022-03-31 | 2.0 |
3 | 13928.0 | 2014-01-31 | 2022-03-31 | 11.0 | 1.0 | 1382.0 | 20825C10 | COP | CONOCOPHILLIPS | None | 21401.0 | 1.0 | 20825C10 | 1925-12-31 | 2022-03-31 | 2.0 |
4 | 14593.0 | 1980-12-12 | 2007-01-10 | 11.0 | 3.0 | 3573.0 | 03783310 | AAPL | APPLE COMPUTER INC | None | 7.0 | 3.0 | 03783310 | 1980-12-12 | 2022-03-31 | 2.0 |
5 | 14593.0 | 2007-01-11 | 2022-03-31 | 11.0 | 3.0 | 3571.0 | 03783310 | AAPL | APPLE INC | None | 7.0 | 3.0 | 03783310 | 1980-12-12 | 2022-03-31 | 2.0 |
6 | 26607.0 | 1996-11-19 | 2001-02-11 | 11.0 | 1.0 | 5812.0 | 20979810 | COP | CONSOLIDATED PRODUCTS INC | None | 4255.0 | 1.0 | 08986R30 | 1972-12-14 | 2022-03-31 | 2.0 |
7 | 55976.0 | 1972-11-20 | 2002-01-01 | 11.0 | 1.0 | 5311.0 | 93114210 | WMT | WAL MART STORES INC | None | 21880.0 | 1.0 | 93114210 | 1972-11-20 | 2022-03-31 | 2.0 |
8 | 55976.0 | 2002-01-02 | 2012-02-29 | 11.0 | 1.0 | 5331.0 | 93114210 | WMT | WAL MART STORES INC | None | 21880.0 | 1.0 | 93114210 | 1972-11-20 | 2022-03-31 | 2.0 |
9 | 55976.0 | 2012-03-01 | 2014-01-06 | 11.0 | 1.0 | 5311.0 | 93114210 | WMT | WAL MART STORES INC | None | 21880.0 | 1.0 | 93114210 | 1972-11-20 | 2022-03-31 | 2.0 |
10 | 55976.0 | 2014-01-07 | 2018-01-31 | 11.0 | 1.0 | 5331.0 | 93114210 | WMT | WAL MART STORES INC | None | 21880.0 | 1.0 | 93114210 | 1972-11-20 | 2022-03-31 | 2.0 |
11 | 55976.0 | 2018-02-01 | 2020-11-17 | 11.0 | 1.0 | 5331.0 | 93114210 | WMT | WALMART INC | None | 21880.0 | 1.0 | 93114210 | 1972-11-20 | 2022-03-31 | 2.0 |
12 | 55976.0 | 2020-11-18 | 2022-03-31 | 11.0 | 1.0 | 5311.0 | 93114210 | WMT | WALMART INC | None | 21880.0 | 1.0 | 93114210 | 1972-11-20 | 2022-03-31 | 2.0 |
13 | 67854.0 | 1985-07-19 | 1996-06-19 | 18.0 | 2.0 | 6799.0 | 21745410 | COP | COPLEY PROPERTY INC | None | 20519.0 | 2.0 | 21745410 | 1985-07-19 | 1996-06-19 | 2.0 |
14 | 93436.0 | 2010-06-29 | 2017-02-01 | 11.0 | 3.0 | 9999.0 | 88160R10 | TSLA | TESLA MOTORS INC | None | 53453.0 | 3.0 | 88160R10 | 2010-06-29 | 2022-03-31 | 2.0 |
15 | 93436.0 | 2017-02-02 | 2022-03-31 | 11.0 | 3.0 | 9999.0 | 88160R10 | TSLA | TESLA INC | None | 53453.0 | 3.0 | 88160R10 | 2010-06-29 | 2022-03-31 | 2.0 |
checking = conn.raw_sql('select * from crsp.dsenames WHERE ticker in %(tickers)s', params=check)
checking
permno | namedt | nameendt | shrcd | exchcd | siccd | ncusip | ticker | comnam | shrcls | ... | naics | primexch | trdstat | secstat | permco | compno | issuno | hexcd | hsiccd | cusip | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 14593.0 | 1980-12-12 | 1982-10-31 | 11.0 | 3.0 | 3573.0 | 03783310 | AAPL | APPLE COMPUTER INC | None | ... | None | Q | A | R | 7.0 | 60000006.0 | 8.0 | 3.0 | 3571.0 | 03783310 |
1 | 14593.0 | 1982-11-01 | 2004-06-09 | 11.0 | 3.0 | 3573.0 | 03783310 | AAPL | APPLE COMPUTER INC | None | ... | None | Q | A | R | 7.0 | 60000006.0 | 8.0 | 3.0 | 3571.0 | 03783310 |
2 | 14593.0 | 2004-06-10 | 2007-01-10 | 11.0 | 3.0 | 3573.0 | 03783310 | AAPL | APPLE COMPUTER INC | None | ... | 334111 | Q | A | R | 7.0 | 60000006.0 | 8.0 | 3.0 | 3571.0 | 03783310 |
3 | 14593.0 | 2007-01-11 | 2017-12-27 | 11.0 | 3.0 | 3571.0 | 03783310 | AAPL | APPLE INC | None | ... | 334111 | Q | A | R | 7.0 | 60000006.0 | 8.0 | 3.0 | 3571.0 | 03783310 |
4 | 14593.0 | 2017-12-28 | 2022-03-31 | 11.0 | 3.0 | 3571.0 | 03783310 | AAPL | APPLE INC | None | ... | 334220 | Q | A | R | 7.0 | 60000006.0 | 8.0 | 3.0 | 3571.0 | 03783310 |
5 | 13928.0 | 2002-09-03 | 2004-02-17 | 11.0 | 1.0 | 1311.0 | 20825C10 | COP | CONOCOPHILLIPS | None | ... | None | N | A | R | 21401.0 | 0.0 | 0.0 | 1.0 | 1382.0 | 20825C10 |
6 | 13928.0 | 2004-02-18 | 2004-06-09 | 11.0 | 1.0 | 2911.0 | 20825C10 | COP | CONOCOPHILLIPS | None | ... | None | N | A | R | 21401.0 | 0.0 | 0.0 | 1.0 | 1382.0 | 20825C10 |
7 | 13928.0 | 2004-06-10 | 2014-01-30 | 11.0 | 1.0 | 2911.0 | 20825C10 | COP | CONOCOPHILLIPS | None | ... | 324110 | N | A | R | 21401.0 | 0.0 | 0.0 | 1.0 | 1382.0 | 20825C10 |
8 | 13928.0 | 2014-01-31 | 2022-03-31 | 11.0 | 1.0 | 1382.0 | 20825C10 | COP | CONOCOPHILLIPS | None | ... | 213112 | N | A | R | 21401.0 | 0.0 | 0.0 | 1.0 | 1382.0 | 20825C10 |
9 | 26607.0 | 1996-11-19 | 2001-02-11 | 11.0 | 1.0 | 5812.0 | 20979810 | COP | CONSOLIDATED PRODUCTS INC | None | ... | None | N | A | R | 4255.0 | 60004254.0 | 5399.0 | 1.0 | 2099.0 | 08986R30 |
10 | 67854.0 | 1985-07-19 | 1996-06-19 | 18.0 | 2.0 | 6799.0 | 21745410 | COP | COPLEY PROPERTY INC | None | ... | None | A | A | R | 20519.0 | 0.0 | 0.0 | 2.0 | 6799.0 | 21745410 |
11 | 10107.0 | 1986-03-13 | 2004-06-09 | 11.0 | 3.0 | 7370.0 | 59491810 | MSFT | MICROSOFT CORP | None | ... | None | Q | A | R | 8048.0 | 60008001.0 | 10539.0 | 3.0 | 7370.0 | 59491810 |
12 | 10107.0 | 2004-06-10 | 2022-03-31 | 11.0 | 3.0 | 7370.0 | 59491810 | MSFT | MICROSOFT CORP | None | ... | 511210 | Q | A | R | 8048.0 | 60008001.0 | 10539.0 | 3.0 | 7370.0 | 59491810 |
13 | 93436.0 | 2010-06-29 | 2017-02-01 | 11.0 | 3.0 | 9999.0 | 88160R10 | TSLA | TESLA MOTORS INC | None | ... | 336111 | Q | A | R | 53453.0 | 60069832.0 | 66252.0 | 3.0 | 9999.0 | 88160R10 |
14 | 93436.0 | 2017-02-02 | 2022-03-31 | 11.0 | 3.0 | 9999.0 | 88160R10 | TSLA | TESLA INC | None | ... | 336111 | Q | A | R | 53453.0 | 60069832.0 | 66252.0 | 3.0 | 9999.0 | 88160R10 |
15 | 55976.0 | 1972-11-20 | 2002-01-01 | 11.0 | 1.0 | 5311.0 | 93114210 | WMT | WAL MART STORES INC | None | ... | None | N | A | R | 21880.0 | 0.0 | 0.0 | 1.0 | 5311.0 | 93114210 |
16 | 55976.0 | 2002-01-02 | 2004-06-09 | 11.0 | 1.0 | 5331.0 | 93114210 | WMT | WAL MART STORES INC | None | ... | None | N | A | R | 21880.0 | 0.0 | 0.0 | 1.0 | 5311.0 | 93114210 |
17 | 55976.0 | 2004-06-10 | 2012-02-29 | 11.0 | 1.0 | 5331.0 | 93114210 | WMT | WAL MART STORES INC | None | ... | 452990 | N | A | R | 21880.0 | 0.0 | 0.0 | 1.0 | 5311.0 | 93114210 |
18 | 55976.0 | 2012-03-01 | 2014-01-06 | 11.0 | 1.0 | 5311.0 | 93114210 | WMT | WAL MART STORES INC | None | ... | 452990 | N | A | R | 21880.0 | 0.0 | 0.0 | 1.0 | 5311.0 | 93114210 |
19 | 55976.0 | 2014-01-07 | 2018-01-31 | 11.0 | 1.0 | 5331.0 | 93114210 | WMT | WAL MART STORES INC | None | ... | 452990 | N | A | R | 21880.0 | 0.0 | 0.0 | 1.0 | 5311.0 | 93114210 |
20 | 55976.0 | 2018-02-01 | 2020-11-17 | 11.0 | 1.0 | 5331.0 | 93114210 | WMT | WALMART INC | None | ... | 452990 | N | A | R | 21880.0 | 0.0 | 0.0 | 1.0 | 5311.0 | 93114210 |
21 | 55976.0 | 2020-11-18 | 2022-03-31 | 11.0 | 1.0 | 5311.0 | 93114210 | WMT | WALMART INC | None | ... | 452210 | N | A | R | 21880.0 | 0.0 | 0.0 | 1.0 | 5311.0 | 93114210 |
22 rows × 21 columns
check = {'tickers': ('JNJ','COP')}
check
{'tickers': ('JNJ', 'COP')}
crsp = conn.raw_sql("""
select a.permno, a.date, a.ret, b.rf, b.mktrf, b.smb, b.hml
from crsp.dsf as a
left join ff.factors_daily as b
on a.date=b.date
where a.date > '01/01/2020'
""")
# this example shows that tickers are not a good screening id
checking = conn.raw_sql('select a.permno, a.ticker, a.comnam, b.date, b.prc, b.ret, b.retx, b.cfacpr from crsp.stocknames a join crsp.dsf b on a.permno = b.permno WHERE a.ticker in %(tickers)s and a.st_date <= b.date and b.date <= a.end_date', params=check)
checking
permno | ticker | comnam | date | prc | ret | retx | cfacpr | |
---|---|---|---|---|---|---|---|---|
0 | 13928.0 | COP | CONOCOPHILLIPS | 1925-12-31 | 46.500 | NaN | NaN | 161.302414 |
1 | 13928.0 | COP | CONOCOPHILLIPS | 1926-01-02 | 46.625 | 0.002688 | 0.002688 | 161.302414 |
2 | 13928.0 | COP | CONOCOPHILLIPS | 1926-01-04 | 46.500 | -0.002681 | -0.002681 | 161.302414 |
3 | 13928.0 | COP | CONOCOPHILLIPS | 1926-01-05 | 45.750 | -0.016129 | -0.016129 | 161.302414 |
4 | 13928.0 | COP | CONOCOPHILLIPS | 1926-01-06 | 45.750 | 0.000000 | 0.000000 | 161.302414 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
209834 | 67854.0 | COP | COPLEY PROPERTY INC | 1996-06-13 | 15.625 | 0.000000 | 0.000000 | 1.000000 |
209835 | 67854.0 | COP | COPLEY PROPERTY INC | 1996-06-14 | 15.250 | -0.006720 | -0.024000 | 1.000000 |
209836 | 67854.0 | COP | COPLEY PROPERTY INC | 1996-06-17 | 15.375 | 0.008197 | 0.008197 | 1.000000 |
209837 | 67854.0 | COP | COPLEY PROPERTY INC | 1996-06-18 | 15.375 | 0.000000 | 0.000000 | 1.000000 |
209838 | 67854.0 | COP | COPLEY PROPERTY INC | 1996-06-19 | 15.375 | 0.000000 | 0.000000 | 1.000000 |
209839 rows × 8 columns
dsenames
file to find unique ticker using "tsymbol", instead of "ticker".¶Now let's extract price data from CRSP using sql
statement
stocks = ['AAPL','TSLA','AMZN','FB', 'MSFT', 'WMT', 'HD', 'JNJ', 'JPM', 't']
# CRSP command takes tuples, not lists, so let's create tuples.
stocks = {'tickers': ('AAPL','TSLA','AMZN','FB', 'MSFT', 'WMT', 'HD', 'JNJ', 'JPM', 'COP')}
print(stocks)
type(stocks)
{'tickers': ('AAPL', 'TSLA', 'AMZN', 'FB', 'MSFT', 'WMT', 'HD', 'JNJ', 'JPM', 'COP')}
dict
raw_data_from_crsp = conn.raw_sql('select a.permno, a.ticker, a.comnam, a.tsymbol, b.date, b.prc, b.ret, b.retx, b.cfacpr from crsp.dsenames a join crsp.dsf b on a.permno = b.permno WHERE a.tsymbol in %(tickers)s and a.namedt <= b.date and b.date <= a.nameendt', params=stocks)
pd_data = pd.DataFrame(raw_data_from_crsp)
pd_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 56367 entries, 0 to 56366 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 permno 56367 non-null float64 1 ticker 56367 non-null object 2 comnam 56367 non-null object 3 tsymbol 56367 non-null object 4 date 56367 non-null object 5 prc 56367 non-null float64 6 ret 56363 non-null float64 7 retx 56363 non-null float64 8 cfacpr 56367 non-null float64 dtypes: float64(5), object(4) memory usage: 3.9+ MB
# note that the most recent date for which data is available is the March 31 2022
pd_data.tail()
permno | ticker | comnam | tsymbol | date | prc | ret | retx | cfacpr | |
---|---|---|---|---|---|---|---|---|---|
56362 | 93436.0 | TSLA | TESLA INC | TSLA | 2022-03-25 | 1010.640015 | -0.003235 | -0.003235 | 1.0 |
56363 | 93436.0 | TSLA | TESLA INC | TSLA | 2022-03-28 | 1091.839966 | 0.080345 | 0.080345 | 1.0 |
56364 | 93436.0 | TSLA | TESLA INC | TSLA | 2022-03-29 | 1099.569946 | 0.007080 | 0.007080 | 1.0 |
56365 | 93436.0 | TSLA | TESLA INC | TSLA | 2022-03-30 | 1093.989990 | -0.005075 | -0.005075 | 1.0 |
56366 | 93436.0 | TSLA | TESLA INC | TSLA | 2022-03-31 | 1077.599976 | -0.014982 | -0.014982 | 1.0 |
# convert date to datetime
pd_data['date'] = pd.to_datetime(pd_data['date'], format='%Y-%m-%d')
pd_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 56367 entries, 0 to 56366 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 permno 56367 non-null float64 1 ticker 56367 non-null object 2 comnam 56367 non-null object 3 tsymbol 56367 non-null object 4 date 56367 non-null datetime64[ns] 5 prc 56367 non-null float64 6 ret 56363 non-null float64 7 retx 56363 non-null float64 8 cfacpr 56367 non-null float64 dtypes: datetime64[ns](1), float64(5), object(3) memory usage: 3.9+ MB
# setting index
pd_data_index= pd_data.set_index('date')
pd_data_index.head()
permno | ticker | comnam | tsymbol | prc | ret | retx | cfacpr | |
---|---|---|---|---|---|---|---|---|
date | ||||||||
1986-03-13 | 10107.0 | MSFT | MICROSOFT CORP | MSFT | 28.00 | NaN | NaN | 288.0 |
1986-03-14 | 10107.0 | MSFT | MICROSOFT CORP | MSFT | 29.00 | 0.035714 | 0.035714 | 288.0 |
1986-03-17 | 10107.0 | MSFT | MICROSOFT CORP | MSFT | 29.50 | 0.017241 | 0.017241 | 288.0 |
1986-03-18 | 10107.0 | MSFT | MICROSOFT CORP | MSFT | 28.75 | -0.025424 | -0.025424 | 288.0 |
1986-03-19 | 10107.0 | MSFT | MICROSOFT CORP | MSFT | 28.25 | -0.017391 | -0.017391 | 288.0 |
# important: you need to adjust prices for splits
pd_data_index['adj_price'] = pd_data_index['prc'] / pd_data_index['cfacpr']
pd_data_index.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 56367 entries, 1986-03-13 to 2022-03-31 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 permno 56367 non-null float64 1 ticker 56367 non-null object 2 comnam 56367 non-null object 3 tsymbol 56367 non-null object 4 prc 56367 non-null float64 5 ret 56363 non-null float64 6 retx 56363 non-null float64 7 cfacpr 56367 non-null float64 8 adj_price 56367 non-null float64 dtypes: float64(6), object(3) memory usage: 4.3+ MB
#slicing ticker and price column
pd_data_index_tic_prc = pd_data_index[['ticker', 'prc']]
pd_data_index_tic_prc.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 56367 entries, 1986-03-13 to 2022-03-31 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ticker 56367 non-null object 1 prc 56367 non-null float64 dtypes: float64(1), object(1) memory usage: 1.3+ MB
b = pd_data_index_tic_prc['2016-01-01' <= pd_data_index_tic_prc.index]
b.head()
ticker | prc | |
---|---|---|
date | ||
2016-01-04 | MSFT | 54.799999 |
2016-01-05 | MSFT | 55.049999 |
2016-01-06 | MSFT | 54.049999 |
2016-01-07 | MSFT | 52.169998 |
2016-01-08 | MSFT | 52.330002 |
c = b[b.index <= '2022-03-30']
c.tail()
ticker | prc | |
---|---|---|
date | ||
2022-03-24 | TSLA | 1013.919983 |
2022-03-25 | TSLA | 1010.640015 |
2022-03-28 | TSLA | 1091.839966 |
2022-03-29 | TSLA | 1099.569946 |
2022-03-30 | TSLA | 1093.989990 |
# 564 obs for each stock times 10 = 5640
c.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 15720 entries, 2016-01-04 to 2022-03-30 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ticker 15720 non-null object 1 prc 15720 non-null float64 dtypes: float64(1), object(1) memory usage: 368.4+ KB
# I am transposing the table
table = c.pivot(columns='ticker')
# By specifying col[1] in below list comprehension
# You can select the stock names under multi-level column
table.columns = [col[1] for col in table.columns]
table.head()
AAPL | AMZN | COP | FB | HD | JNJ | JPM | MSFT | TSLA | WMT | |
---|---|---|---|---|---|---|---|---|---|---|
date | ||||||||||
2016-01-04 | 105.349998 | 636.989990 | 46.880001 | 102.220001 | 131.070007 | 100.480003 | 63.619999 | 54.799999 | 223.410004 | 61.459999 |
2016-01-05 | 102.709999 | 633.789978 | 47.400002 | 102.730003 | 130.429993 | 100.900002 | 63.730000 | 55.049999 | 223.429993 | 62.919998 |
2016-01-06 | 100.699997 | 632.650024 | 45.349998 | 102.970001 | 129.080002 | 100.389999 | 62.810001 | 54.049999 | 219.039993 | 63.549999 |
2016-01-07 | 96.449997 | 607.940002 | 44.060001 | 97.919998 | 125.400002 | 99.220001 | 60.270000 | 52.169998 | 215.649994 | 65.029999 |
2016-01-08 | 96.959999 | 607.049988 | 43.290001 | 97.330002 | 123.900002 | 98.160004 | 58.919998 | 52.330002 | 211.000000 | 63.540001 |
table.describe()
AAPL | AMZN | COP | FB | HD | JNJ | JPM | MSFT | TSLA | WMT | |
---|---|---|---|---|---|---|---|---|---|---|
count | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 |
mean | 175.330286 | 1902.983279 | 54.482258 | 199.213537 | 214.836648 | 138.392242 | 108.955929 | 144.355251 | 466.159653 | 104.908893 |
std | 69.720936 | 966.461703 | 13.797170 | 70.315908 | 72.500571 | 18.057629 | 29.268680 | 83.147982 | 313.004977 | 27.596488 |
min | 90.339996 | 482.070007 | 22.670000 | 94.160004 | 111.849998 | 95.750000 | 53.070000 | 48.430000 | 143.669998 | 60.840000 |
25% | 126.887501 | 978.232498 | 43.677500 | 148.205006 | 154.894997 | 126.040003 | 90.672501 | 72.514997 | 253.690002 | 78.907503 |
50% | 159.724998 | 1768.514954 | 53.010000 | 180.589996 | 197.290001 | 136.875000 | 107.010002 | 112.660000 | 333.779999 | 100.004997 |
75% | 198.482498 | 3042.352539 | 63.842500 | 239.764996 | 266.589989 | 148.975002 | 126.087500 | 210.292500 | 652.897491 | 132.330002 |
max | 506.089996 | 3731.409912 | 107.500000 | 382.179993 | 416.179993 | 179.589996 | 171.779999 | 343.109985 | 2238.750000 | 152.789993 |
table.describe([.01,.1, .9, .99])
AAPL | AMZN | COP | FB | HD | JNJ | JPM | MSFT | TSLA | WMT | |
---|---|---|---|---|---|---|---|---|---|---|
count | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 | 1572.000000 |
mean | 175.330286 | 1902.983279 | 54.482258 | 199.213537 | 214.836648 | 138.392242 | 108.955929 | 144.355251 | 466.159653 | 104.908893 |
std | 69.720936 | 966.461703 | 13.797170 | 70.315908 | 72.500571 | 18.057629 | 29.268680 | 83.147982 | 313.004977 | 27.596488 |
min | 90.339996 | 482.070007 | 22.670000 | 94.160004 | 111.849998 | 95.750000 | 53.070000 | 48.430000 | 143.669998 | 60.840000 |
1% | 93.958098 | 553.678387 | 30.198100 | 100.637500 | 120.730203 | 101.098801 | 57.068402 | 50.070000 | 180.739498 | 64.085202 |
10% | 108.184000 | 757.186993 | 39.472001 | 119.535997 | 132.811993 | 115.341997 | 65.810998 | 57.189999 | 212.926004 | 69.706997 |
50% | 159.724998 | 1768.514954 | 53.010000 | 180.589996 | 197.290001 | 136.875000 | 107.010002 | 112.660000 | 333.779999 | 100.004997 |
90% | 271.390991 | 3320.144116 | 71.685997 | 323.749991 | 325.509991 | 165.528999 | 155.119995 | 286.537009 | 879.536011 | 143.327997 |
99% | 441.468004 | 3589.468743 | 97.592703 | 370.564304 | 407.781591 | 175.211599 | 168.477704 | 336.495103 | 1548.547510 | 150.232899 |
max | 506.089996 | 3731.409912 | 107.500000 | 382.179993 | 416.179993 | 179.589996 | 171.779999 | 343.109985 | 2238.750000 | 152.789993 |
Let's first look at how the price of each stock has evolved within give time frame.
plt.figure(figsize=(14, 7))
for c in table.columns.values:
plt.plot(table.index, table[c], lw=3, alpha=0.8,label=c)
plt.legend(loc='upper left', fontsize=12)
plt.ylabel('price in $')
Text(0, 0.5, 'price in $')
Another way to plot this is plotting daily returns (percent change compared to the day before). By plotting daily returns instead of actual prices, we can see the stocks' volatility.
returns = table.pct_change()
plt.figure(figsize=(14, 7))
for c in returns.columns.values:
plt.plot(returns.index, returns[c], lw=3, alpha=0.8,label=c)
plt.legend(loc='upper right', fontsize=12)
plt.ylabel('daily returns')
Text(0, 0.5, 'daily returns')
returns.describe()
AAPL | AMZN | COP | FB | HD | JNJ | JPM | MSFT | TSLA | WMT | |
---|---|---|---|---|---|---|---|---|---|---|
count | 1571.000000 | 1571.000000 | 1571.000000 | 1571.000000 | 1571.000000 | 1571.000000 | 1571.000000 | 1571.000000 | 1571.000000 | 1571.000000 |
mean | 0.000895 | 0.001234 | 0.000838 | 0.000753 | 0.000672 | 0.000442 | 0.000675 | 0.001256 | 0.002125 | 0.000658 |
std | 0.026354 | 0.019121 | 0.026353 | 0.021755 | 0.015812 | 0.012041 | 0.018497 | 0.017024 | 0.041273 | 0.013474 |
min | -0.741522 | -0.079221 | -0.248401 | -0.263901 | -0.197938 | -0.100379 | -0.149649 | -0.147390 | -0.774862 | -0.101832 |
25% | -0.006677 | -0.007519 | -0.011754 | -0.008322 | -0.005609 | -0.004644 | -0.007231 | -0.005906 | -0.014911 | -0.005556 |
50% | 0.000988 | 0.001390 | 0.000000 | 0.001050 | 0.001029 | 0.000371 | 0.000259 | 0.001036 | 0.001367 | 0.000526 |
75% | 0.010258 | 0.010611 | 0.013214 | 0.011577 | 0.007966 | 0.005927 | 0.008627 | 0.009669 | 0.019257 | 0.006585 |
max | 0.119808 | 0.135359 | 0.252139 | 0.155214 | 0.137508 | 0.079977 | 0.180125 | 0.142169 | 0.198949 | 0.117085 |
Reference: Drechsler, Qingyi (Freda) S., 2022, Python Programs for Empirical Finance, https://www.fredasongdrechsler.com
sp500 = conn.raw_sql("""
select a.*, b.date, b.ret
from crsp.msp500list as a,
crsp.msf as b
where a.permno=b.permno
and b.date >= a.start and b.date<= a.ending
and b.date>='01/01/2000'
order by date;
""", date_cols=['start', 'ending', 'date'])
# Add Other Descriptive Variables
mse = conn.raw_sql("""
select comnam, ncusip, namedt, nameendt,
permno, shrcd, exchcd, hsiccd, ticker
from crsp.msenames
""", date_cols=['namedt', 'nameendt'])
# if nameendt is missing then set to today date
mse['nameendt']=mse['nameendt'].fillna(pd.to_datetime('today'))
# Merge with SP500 data
sp500_full = pd.merge(sp500, mse, how = 'left', on = 'permno')
# Impose the date range restrictions
sp500_full = sp500_full.loc[(sp500_full.date>=sp500_full.namedt) \
& (sp500_full.date<=sp500_full.nameendt)]
# Linking with Compustat through CCM
ccm=conn.raw_sql("""
select gvkey, liid as iid, lpermno as permno, linktype, linkprim,
linkdt, linkenddt
from crsp.ccmxpf_linktable
where substr(linktype,1,1)='L'
and (linkprim ='C' or linkprim='P')
""", date_cols=['linkdt', 'linkenddt'])
# if linkenddt is missing then set to today date
ccm['linkenddt']=ccm['linkenddt'].fillna(pd.to_datetime('today'))
# Merge the CCM data with S&P500 data
# First just link by matching PERMNO
sp500ccm = pd.merge(sp500_full, ccm, how='left', on=['permno'])
# Then set link date bounds
sp500ccm = sp500ccm.loc[(sp500ccm['date']>=sp500ccm['linkdt'])\
&(sp500ccm['date']<=sp500ccm['linkenddt'])]
# Rearrange columns for final output
sp500ccm = sp500ccm.drop(columns=['namedt', 'nameendt', \
'linktype', 'linkprim', 'linkdt', 'linkenddt'])
sp500ccm = sp500ccm[['date', 'permno', 'comnam', 'ncusip', 'shrcd', 'exchcd', 'hsiccd', 'ticker', \
'gvkey', 'iid', 'start', 'ending', 'ret']]
sp500ccm
date | permno | comnam | ncusip | shrcd | exchcd | hsiccd | ticker | gvkey | iid | start | ending | ret | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2000-01-31 | 40416.0 | AVON PRODUCTS INC | 05430310 | 11.0 | 1.0 | 2844.0 | AVP | 001920 | 01 | 1967-05-18 | 2015-03-20 | -0.035985 |
1 | 2000-01-31 | 44062.0 | SPRINGS INDUSTRIES INC | 85178310 | 11.0 | 1.0 | 2221.0 | SMI | 009963 | 01 | 1967-06-29 | 2000-12-11 | -0.089202 |
2 | 2000-01-31 | 26403.0 | DISNEY WALT CO | 25468710 | 11.0 | 1.0 | 4833.0 | DIS | 003980 | 01 | 1976-07-01 | 2022-03-31 | 0.241453 |
3 | 2000-01-31 | 60628.0 | FEDEX CORP | 31428X10 | 11.0 | 1.0 | 4513.0 | FDX | 004598 | 01 | 1980-11-06 | 2022-03-31 | -0.033588 |
4 | 2000-01-31 | 69032.0 | MORGAN STANLEY DEAN WITTER & CO | 61744644 | 11.0 | 1.0 | 6282.0 | MWD | 012124 | 01 | 1995-09-22 | 2022-03-31 | -0.069002 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
169638 | 2022-03-31 | 17478.0 | S & P GLOBAL INC | 78409V10 | 11.0 | 1.0 | 6282.0 | SPGI | 007163 | 01 | 1957-03-01 | 2022-03-31 | 0.091775 |
169639 | 2022-03-31 | 91152.0 | TRANSDIGM GROUP INC | 89364110 | 11.0 | 1.0 | 3728.0 | TDG | 148349 | 01 | 2016-06-03 | 2022-03-31 | -0.022578 |
169641 | 2022-03-31 | 24985.0 | AMEREN CORP | 02360810 | 11.0 | 1.0 | 4911.0 | AEE | 010860 | 01 | 1991-09-20 | 2022-03-31 | 0.097731 |
169642 | 2022-03-31 | 29946.0 | BROWN FORMAN CORP | 11563720 | 11.0 | 1.0 | 2085.0 | BF | 002435 | 01 | 1982-10-14 | 2022-03-31 | 0.030331 |
169643 | 2022-03-31 | 17700.0 | CERIDIAN H C M HOLDING INC | 15677J10 | 11.0 | 1.0 | 7372.0 | CDAY | 023546 | 01 | 2021-09-20 | 2022-03-31 | -0.062406 |
133640 rows × 13 columns
conn.describe_table('crsp','msi')
Approximately 1156 rows in crsp.msi.
name | nullable | type | |
---|---|---|---|
0 | date | True | DATE |
1 | vwretd | True | DOUBLE_PRECISION |
2 | vwretx | True | DOUBLE_PRECISION |
3 | ewretd | True | DOUBLE_PRECISION |
4 | ewretx | True | DOUBLE_PRECISION |
5 | sprtrn | True | DOUBLE_PRECISION |
6 | spindx | True | DOUBLE_PRECISION |
7 | totval | True | DOUBLE_PRECISION |
8 | totcnt | True | DOUBLE_PRECISION |
9 | usdval | True | DOUBLE_PRECISION |
10 | usdcnt | True | DOUBLE_PRECISION |
crsp_msi = conn.raw_sql("""
SELECT *
FROM crsp.msi
WHERE date BETWEEN '01/01/2020' AND '12/31/2021'
""", date_cols = ['date'])
crsp_msi
date | vwretd | vwretx | ewretd | ewretx | sprtrn | spindx | totval | totcnt | usdval | usdcnt | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-01-31 | -0.001728 | -0.002849 | -0.013334 | -0.014200 | -0.001628 | 3225.52 | 4.105976e+10 | 7267.0 | 4.111595e+10 | 7225.0 |
1 | 2020-02-28 | -0.077918 | -0.079868 | -0.069814 | -0.071321 | -0.084110 | 2954.22 | 3.777752e+10 | 7253.0 | 4.104252e+10 | 7213.0 |
2 | 2020-03-31 | -0.141733 | -0.143685 | -0.207501 | -0.209773 | -0.125119 | 2584.59 | 3.233352e+10 | 7226.0 | 3.774818e+10 | 7209.0 |
3 | 2020-04-30 | 0.129674 | 0.128408 | 0.153867 | 0.152519 | 0.126844 | 2912.43 | 3.650329e+10 | 7224.0 | 3.223454e+10 | 7195.0 |
4 | 2020-05-29 | 0.053739 | 0.051688 | 0.064070 | 0.062277 | 0.045282 | 3044.31 | 3.838482e+10 | 7217.0 | 3.642426e+10 | 7187.0 |
5 | 2020-06-30 | 0.025299 | 0.023522 | 0.057391 | 0.054855 | 0.018388 | 3100.29 | 3.943612e+10 | 7244.0 | 3.836352e+10 | 7177.0 |
6 | 2020-07-31 | 0.055529 | 0.054408 | 0.040737 | 0.039671 | 0.055101 | 3271.12 | 4.171822e+10 | 7273.0 | 3.941430e+10 | 7212.0 |
7 | 2020-08-31 | 0.068442 | 0.066798 | 0.038884 | 0.037315 | 0.070065 | 3500.31 | 4.460940e+10 | 7286.0 | 4.170614e+10 | 7230.0 |
8 | 2020-09-30 | -0.035056 | -0.036515 | -0.027356 | -0.029264 | -0.039228 | 3363.00 | 4.312178e+10 | 7336.0 | 4.460255e+10 | 7249.0 |
9 | 2020-10-30 | -0.020178 | -0.021218 | 0.000584 | -0.000483 | -0.027666 | 3269.96 | 4.229818e+10 | 7397.0 | 4.302700e+10 | 7289.0 |
10 | 2020-11-30 | 0.123707 | 0.122019 | 0.174412 | 0.172530 | 0.107546 | 3621.63 | 4.754201e+10 | 7457.0 | 4.219016e+10 | 7363.0 |
11 | 2020-12-31 | 0.045048 | 0.043244 | 0.072853 | 0.069761 | 0.037121 | 3756.07 | 4.983956e+10 | 7525.0 | 4.750443e+10 | 7418.0 |
12 | 2021-01-29 | -0.000631 | -0.001611 | 0.073192 | 0.072453 | -0.011137 | 3714.24 | 4.993087e+10 | 7606.0 | 4.977916e+10 | 7505.0 |
13 | 2021-02-26 | 0.029196 | 0.027888 | 0.067528 | 0.066370 | 0.026091 | 3811.15 | 5.154680e+10 | 7692.0 | 4.991748e+10 | 7587.0 |
14 | 2021-03-31 | 0.030573 | 0.029086 | 0.012449 | 0.010736 | 0.042439 | 3972.89 | 5.347375e+10 | 7838.0 | 5.151550e+10 | 7643.0 |
15 | 2021-04-30 | 0.048190 | 0.047301 | 0.016612 | 0.015805 | 0.052425 | 4181.17 | 5.625838e+10 | 7978.0 | 5.340985e+10 | 7798.0 |
16 | 2021-05-28 | 0.007092 | 0.005786 | 0.013179 | 0.011995 | 0.005487 | 4204.11 | 5.674877e+10 | 8085.0 | 5.622996e+10 | 7957.0 |
17 | 2021-06-30 | 0.023422 | 0.022021 | 0.021482 | 0.019631 | 0.022214 | 4297.50 | 5.826003e+10 | 8186.0 | 5.670801e+10 | 8053.0 |
18 | 2021-07-30 | 0.011828 | 0.010952 | -0.033644 | -0.034465 | 0.022748 | 4395.26 | 5.904011e+10 | 8310.0 | 5.816022e+10 | 8161.0 |
19 | 2021-08-31 | 0.027147 | 0.025842 | 0.018892 | 0.017615 | 0.028990 | 4522.68 | 6.067961e+10 | 8367.0 | 5.899042e+10 | 8280.0 |
20 | 2021-09-30 | -0.042243 | -0.043531 | -0.028472 | -0.030084 | -0.047569 | 4307.54 | 5.823152e+10 | 8473.0 | 6.065855e+10 | 8337.0 |
21 | 2021-10-29 | 0.064657 | 0.063766 | 0.026557 | 0.025517 | 0.069144 | 4605.38 | 6.209768e+10 | 8548.0 | 5.818295e+10 | 8419.0 |
22 | 2021-11-30 | -0.018347 | -0.019703 | -0.046540 | -0.047915 | -0.008334 | 4567.00 | 6.104353e+10 | 8644.0 | 6.204864e+10 | 8513.0 |
23 | 2021-12-31 | 0.033345 | 0.031577 | -0.001760 | -0.004799 | 0.043613 | 4766.18 | 6.314345e+10 | 8750.0 | 6.095814e+10 | 8598.0 |