!pip install wrds
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Collecting wrds Downloading wrds-3.1.1-py3-none-any.whl (12 kB) Collecting mock Downloading mock-4.0.3-py3-none-any.whl (28 kB) Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.7/dist-packages (from wrds) (1.4.37) Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from wrds) (1.21.6) Collecting psycopg2-binary Downloading psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB) |████████████████████████████████| 3.0 MB 8.6 MB/s Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from wrds) (1.3.5) Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->wrds) (2022.1) Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->wrds) (2.8.2) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->wrds) (1.15.0) Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from sqlalchemy->wrds) (4.11.4) Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.7/dist-packages (from sqlalchemy->wrds) (1.1.2) Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->sqlalchemy->wrds) (3.8.0) Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->sqlalchemy->wrds) (4.1.1) Installing collected packages: psycopg2-binary, mock, wrds Successfully installed mock-4.0.3 psycopg2-binary-2.9.3 wrds-3.1.1
import wrds
# Now I am connecting to WRDS site - you must ahve a WRDS account user id and password
conn=wrds.Connection()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
help(wrds)
Help on package wrds: NAME wrds DESCRIPTION WRDS Python Data Access Library ============================== WRDS-Py is a library for extracting data from WRDS data sources and getting it into Pandas. >>> import wrds >>> db = wrds.Connection() >>> db.list_libraries() ['aha', 'aha_sample', 'ahasamp', 'audit', 'audit_audit_comp', ...] >>> db.list_tables(library='crsp') ['acti', 'asia', 'asib', 'asic', 'asio', 'asix', 'bmdebt', 'bmheader', ...] >>> data = db.raw_sql('SELECT * FROM crsp.stocknames', index_col='permno') >>> data.head() permco namedt nameenddt cusip ncusip ticker permno 10000.0 7952.0 1986-01-07 1987-06-11 68391610 68391610 OMFGA 10001.0 7953.0 1986-01-09 1993-11-21 36720410 39040610 GFGC 10001.0 7953.0 1993-11-22 2008-02-04 36720410 29274A10 EWST 10001.0 7953.0 2008-02-05 2009-08-03 36720410 29274A20 EWST 10001.0 7953.0 2009-08-04 2009-12-17 36720410 29269V10 EGAS ... PACKAGE CONTENTS sql test DATA __copyright__ = '2017-2021 Wharton Research Data Services' __title__ = 'wrds-py' VERSION 3.1.1 AUTHOR Wharton Research Data Services FILE /usr/local/lib/python3.7/dist-packages/wrds/__init__.py
#listing all dtabases or libraries in WRDS
conn.list_libraries()
['compbd', 'compgd', 'compnad', 'compsegd', 'crspa', 'evts', 'issm', 'nastraq', 'tass', 'wrds_lib_internal', 'aha', 'estimize', 'levin', 'ciqsamp_capstrct', 'toyo', 'ahasamp', 'ciqsamp_pplintel', 'crspm', 'tresgsmp', 'trsdcgs', 'columnar', 'trws', 'lspd', 'twoiq', 'wcai', 'tresg', 'wind', 'wqa', 'msrb_all', 'custom_jl', 'risk', 'centris', 'wrdsapps_link_supplychain', 'block_all', 'zacks', 'crsp', 'contrib', 'audit', 'boardex_trial', 'emdb', 'calcbnch', 'comp_na_monthly_all', 'auditsmp', 'boardsmp', 'comp_segments_hist', 'wrdsrpts_rep_usage', 'public_all', 'pwt_all', 'msfanly', 'etfg', 'snlsamp_fig', 'wrdsapps_backtest_plus', 'pacap', 'ciqsamp_transcripts', 'ciq', 'tr_ds', 'ppublica', 'crsp_a_stock', 'preqin', 'etfg_samp', 'fisdsamp', 'windsmp', 'rent', 'sustainalyticssamp_all', 'wrdsapps_evtstudy_int_ginsight', 'compmcur', 'tr_dealscan', 'markit', 'secsamp_all', 'rpna', 'ibes', 'rpa', 'sdcsamp', 'wrdssec_midas', 'cisdmsmp', 'cboe_all', 'snl', 'cboe', 'sprat', 'ciqsamp', 'fjc_litigation', 'trucost', 'sustain', 'trdssamp', 'risksamp', 'midas', 'snlsamp', 'public', 'pwt', 'repsamp', 'snapsamp', 'taqsamp', 'totalq', 'optionm', 'comp_execucomp', 'fjc_linking', 'compa', 'bank', 'blab', 'compb', 'compg', 'block', 'taqmsec', 'trsamp_ds_eq', 'contrib_ceo_turnover', 'cisdm', 'clrvt', 'clrvtsmp', 'compm', 'compseg', 'compsamp', 'comph', 'compsnap', 'comscore', 'pitchbk', 'crspsamp', 'dmef', 'eursamp', 'contrib_kpss', 'auditsmp_all', 'crspq', 'execcomp', 'taqmsamp_all', 'ff', 'ftse', 'cusipm', 'boardex', 'eureka', 'eventus', 'ftsesamp', 'ginsight', 'ktsamp', 'factset', 'ifgrsamp', 'imssamp', 'gmi', 'fssamp', 'govpxsmp', 'hbsamp', 'hbase', 'hfr', 'ibescorp', 'mrktsamp_msf', 'iri', 'govpx', 'kld', 'ifgr', 'ibeskpi', 'fisd', 'lspdsamp', 'ims', 'ktmine', 'taq', 'aha_sample', 'compdcur', 'calcbench_trial', 'ciqsamp_common', 'ppubsamp_d4d', 'mrktsamp', 'otc', 'msrbsamp', 'contrib_general', 'preqsamp_all', 'twoiq_samp', 'msrb', 'mrktsamp_cdx', 'phlx_all', 'zacksamp_all', 'mfl', 'sustsamp', 'etfgsamp', 'phlx', 'ppubsamp', 'preqsamp', 'ciqsamp_keydev', 'tfn', 'toyosamp', 'wappsamp', 'wrdsapps_evtstudy_lr', 'crsp_a_ccm', 'zacksamp', 'revere', 'eurekahedge_sample', 'comp_na_daily_all', 'ims_obp_trial', 'wrdsrpts', 'compsamp_snapshot', 'comp_na_annual_all', 'rq_all', 'infogroupsamp_business', 'infogroupsamp_residential', 'ktmine_patents_samp', 'djones_all', 'doe_all', 'factsamp_all', 'toyosamp_all', 'wrdsapps_eushort', 'wrdsapps_subsidiary', 'wrdsappssamp_all', 'wrdsapps_link_datastream_wscope', 'wrdsapps_patents', 'dmef_all', 'macrofin_comm_trade', 'mrktsamp_cds', 'pitchsmp', 'wrdsapps_link_comp_eushort', 'totalq_all', 'trsamp_dscom', 'trsamp_dsecon', 'wrdsapps_finratio', 'reprisk', 'comp', 'trcstsmp', 'rpnasamp', 'audit_corp_legal', 'csmar', 'trsamp_dsfut', 'wrdsapps_backtest_basic', 'wrdsapps_link_crsp_bond', 'ff_all', 'wrdsapps_link_crsp_taq', 'contrib_intangible_value', 'wrdssec', 'wrdsapps', 'audit_common', 'contrib_char_returns', 'crsp_q_indexhist', 'bvd', 'secsamp', 'factsamp_revere', 'frb_all', 'hbase_sample', 'hfrsamp_hfrdb', 'trown', 'lvnsamp_all', 'risksamp_all', 'optionmsamp_europe', 'optionmsamp_us', 'reprisk_sample', 'wrdsapps_finratio_ccm', 'trace_standard', 'trdstrm', 'trace_enhanced', 'fjc', 'lvnsamp', 'omtrial', 'trace', 'bvdsamp', 'djones', 'doe', 'frb', 'hfrsamp', 'macrofin', 'comp_bank', 'trsamp', 'wrdsapps_link_crsp_factset', 'twoiqsmp', 'comp_bank_daily', 'audit_audit_comp', 'comp_segments_hist_daily', 'otc_endofday', 'dealscan', 'ravenpack_trial', 'msfinst', 'crsp_a_indexes', 'taqmsamp', 'iss', 'sdc']
conn.list_tables(library='comp')
['aco_amda', 'aco_imda', 'aco_indfnta', 'aco_indfntq', 'aco_indfntytd', 'aco_indsta', 'aco_indstq', 'aco_indstytd', 'aco_notesa', 'aco_notesq', 'aco_notessa', 'aco_notesytd', 'aco_pnfnda', 'aco_pnfndq', 'aco_pnfndytd', 'aco_pnfnta', 'aco_pnfntq', 'aco_pnfntytd', 'aco_transa', 'aco_transq', 'aco_transsa', 'aco_transytd', 'adsprate', 'asec_amda', 'asec_imda', 'asec_notesa', 'asec_notesq', 'asec_transa', 'asec_transq', 'bank_aacctchg', 'bank_adesind', 'bank_afnd1', 'bank_afnd2', 'bank_afnddc1', 'bank_afnddc2', 'bank_afntind', 'bank_funda', 'bank_funda_fncd', 'bank_fundq', 'bank_fundq_fncd', 'bank_iacctchg', 'bank_idesind', 'bank_ifndq', 'bank_ifndytd', 'bank_ifntq', 'bank_ifntytd', 'bank_names', 'bank_namesq', 'chars', 'co_aacctchg', 'co_aaudit', 'co_adesind', 'co_adjfact', 'co_afnd1', 'co_afnd2', 'co_afnddc1', 'co_afnddc2', 'co_afntind1', 'co_afntind2', 'co_ainvval', 'co_amkt', 'co_busdescl', 'co_cotype', 'co_filedate', 'co_fortune', 'co_hgic', 'co_iacctchg', 'co_iaudit', 'co_idesind', 'co_ifndq', 'co_ifndsa', 'co_ifndytd', 'co_ifntq', 'co_ifntsa', 'co_ifntytd', 'co_imkt', 'co_industry', 'co_ipcd', 'co_mthly', 'co_offtitl', 'company', 'currency', 'dd_group', 'dd_group_xref', 'dd_item', 'dd_package', 'ecind_desc', 'ecind_mth', 'exrt_dly', 'exrt_mth', 'filings', 'funda', 'funda_fncd', 'fundq', 'fundq_fncd', 'g_chars', 'g_co_aaudit', 'g_co_adesind', 'g_co_afnd1', 'g_co_afnd2', 'g_co_afnddc1', 'g_co_afnddc2', 'g_co_afntind1', 'g_co_afntind2', 'g_co_ainvval', 'g_co_gsuppl', 'g_co_hgic', 'g_co_iaudit', 'g_co_idesind', 'g_co_ifndq', 'g_co_ifndsa', 'g_co_ifndytd', 'g_co_ifntq', 'g_co_ifntsa', 'g_co_ifntytd', 'g_co_industry', 'g_co_ipcd', 'g_co_offtitl', 'g_company', 'g_currency', 'g_ecind_desc', 'g_ecind_mth', 'g_exrt_dly', 'g_exrt_mth', 'g_funda', 'g_funda_fncd', 'g_fundq', 'g_fundq_fncd', 'g_idx_daily', 'g_idx_index', 'g_idx_mth', 'g_idxcst_his', 'g_names', 'g_names_ix', 'g_names_ix_cst', 'g_namesq', 'g_sec_adesind', 'g_sec_adjfact', 'g_sec_afnd', 'g_sec_afnddc', 'g_sec_afnt', 'g_sec_divid', 'g_sec_dprc', 'g_sec_dtrt', 'g_sec_history', 'g_sec_idesind', 'g_sec_ifnd', 'g_sec_ifnt', 'g_sec_split', 'g_secd', 'g_secnamesd', 'g_security', 'g_sedolgvkey', 'g_tmptable_pkg6153_tbl4023', 'idx_ann', 'idx_anndes', 'idx_daily', 'idx_index', 'idx_mth', 'idx_qrt', 'idx_qrtdes', 'idxcst_his', 'io_qaggregate', 'io_qbuysell', 'io_qchanges', 'io_qfloatadj', 'io_qholders', 'it_mbuysell', 'it_msummary', 'it_r_rltn', 'names', 'names_aco_indsta', 'names_aco_indstq', 'names_aco_pnfnda', 'names_aco_pnfndq', 'names_adsprate', 'names_ix', 'names_ix_cst', 'names_seg', 'namesd', 'namesm', 'namesq', 'r_accstd', 'r_acqmeth', 'r_auditors', 'r_auopic', 'r_balpres', 'r_cf_formt', 'r_co_status', 'r_coindpre', 'r_compstat', 'r_consol', 'r_country', 'r_cstclscd', 'r_datacode', 'r_datafmt', 'r_divtaxmarker', 'r_docsrce', 'r_ex_codes', 'r_exchgtier', 'r_exrt_typ', 'r_fndfntcd', 'r_footnts', 'r_foricd', 'r_giccd', 'r_hcalendr', 'r_idxclscd', 'r_inactvcd', 'r_incstats', 'r_indfmt', 'r_indsec', 'r_invval', 'r_issuetyp', 'r_majidxcl', 'r_mic_codes', 'r_naiccd', 'r_notetype', 'r_ntsubtype', 'r_offcrso', 'r_ogmethod', 'r_opinions', 'r_prc_stat', 'r_qsrcdoc', 'r_sec_stat', 'r_secannfn', 'r_sectors', 'r_siccd', 'r_spiicd', 'r_spmicd', 'r_statalrt', 'r_states', 'r_stko', 'r_titles', 'r_updates', 'sec_adesind', 'sec_adjfact', 'sec_afnd', 'sec_afnddc', 'sec_afnt', 'sec_divid', 'sec_dprc', 'sec_dtrt', 'sec_history', 'sec_idesind', 'sec_ifnd', 'sec_ifnt', 'sec_mdivfn', 'sec_mshare', 'sec_msptfn', 'sec_mth', 'sec_mthdiv', 'sec_mthprc', 'sec_mthspt', 'sec_mthtrt', 'sec_shortint', 'sec_spind', 'sec_split', 'secd', 'secm', 'security', 'sedolgvkey', 'seg_ann', 'seg_annfund', 'seg_customer', 'seg_geo', 'seg_naics', 'seg_product', 'seg_type', 'spidx_cst', 'spind', 'spind_dly', 'spind_mth', 'tmptable_pkg6153_tbl4023', 'wrds_seg_customer', 'wrds_seg_geo', 'wrds_seg_product', 'wrds_segmerged', 'xfl_column', 'xfl_table']
sp500 = conn.raw_sql("""
select a.*, b.date, b.ret
from crsp.msp500list as a,
crsp.msf as b
where a.permno=b.permno
and b.date >= a.start and b.date<= a.ending
and b.date>='01/01/2000'
order by date;
""", date_cols=['start', 'ending', 'date'])
sp500
permno | start | ending | date | ret | |
---|---|---|---|---|---|
0 | 40416.0 | 1967-05-18 | 2015-03-20 | 2000-01-31 | -0.035985 |
1 | 44062.0 | 1967-06-29 | 2000-12-11 | 2000-01-31 | -0.089202 |
2 | 26403.0 | 1976-07-01 | 2022-03-31 | 2000-01-31 | 0.241453 |
3 | 60628.0 | 1980-11-06 | 2022-03-31 | 2000-01-31 | -0.033588 |
4 | 69032.0 | 1995-09-22 | 2022-03-31 | 2000-01-31 | -0.069002 |
... | ... | ... | ... | ... | ... |
133917 | 17478.0 | 1957-03-01 | 2022-03-31 | 2022-03-31 | 0.091775 |
133918 | 91152.0 | 2016-06-03 | 2022-03-31 | 2022-03-31 | -0.022578 |
133919 | 24985.0 | 1991-09-20 | 2022-03-31 | 2022-03-31 | 0.097731 |
133920 | 29946.0 | 1982-10-14 | 2022-03-31 | 2022-03-31 | 0.030331 |
133921 | 17700.0 | 2021-09-20 | 2022-03-31 | 2022-03-31 | -0.062406 |
133922 rows × 5 columns
# Add Other Descriptive Variables
mse = conn.raw_sql("""
select comnam, ncusip, namedt, nameendt,
permno, shrcd, exchcd, hsiccd, ticker
from crsp.msenames
""", date_cols=['namedt', 'nameendt'])
# if nameendt is missing then set to today date
mse['nameendt']=mse['nameendt'].fillna(pd.to_datetime('today'))
# Merge with SP500 data
sp500_full = pd.merge(sp500, mse, how = 'left', on = 'permno')
# Impose the date range restrictions
sp500_full = sp500_full.loc[(sp500_full.date>=sp500_full.namedt) \
& (sp500_full.date<=sp500_full.nameendt)]
# Linking with Compustat through CCM
ccm=conn.raw_sql("""
select gvkey, liid as iid, lpermno as permno, linktype, linkprim,
linkdt, linkenddt
from crsp.ccmxpf_linktable
where substr(linktype,1,1)='L'
and (linkprim ='C' or linkprim='P')
""", date_cols=['linkdt', 'linkenddt'])
# if linkenddt is missing then set to today date
ccm['linkenddt']=ccm['linkenddt'].fillna(pd.to_datetime('today'))
# Merge the CCM data with S&P500 data
# First just link by matching PERMNO
sp500ccm = pd.merge(sp500_full, ccm, how='left', on=['permno'])
# Then set link date bounds
sp500ccm = sp500ccm.loc[(sp500ccm['date']>=sp500ccm['linkdt'])\
&(sp500ccm['date']<=sp500ccm['linkenddt'])]
# Rearrange columns for final output
sp500ccm = sp500ccm.drop(columns=['namedt', 'nameendt', \
'linktype', 'linkprim', 'linkdt', 'linkenddt'])
sp500ccm = sp500ccm[['date', 'permno', 'comnam', 'ncusip', 'shrcd', 'exchcd', 'hsiccd', 'ticker', \
'gvkey', 'iid', 'start', 'ending', 'ret']]
sp500ccm
date | permno | comnam | ncusip | shrcd | exchcd | hsiccd | ticker | gvkey | iid | start | ending | ret | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2000-01-31 | 40416.0 | AVON PRODUCTS INC | 05430310 | 11.0 | 1.0 | 2844.0 | AVP | 001920 | 01 | 1967-05-18 | 2015-03-20 | -0.035985 |
1 | 2000-01-31 | 44062.0 | SPRINGS INDUSTRIES INC | 85178310 | 11.0 | 1.0 | 2221.0 | SMI | 009963 | 01 | 1967-06-29 | 2000-12-11 | -0.089202 |
2 | 2000-01-31 | 26403.0 | DISNEY WALT CO | 25468710 | 11.0 | 1.0 | 4833.0 | DIS | 003980 | 01 | 1976-07-01 | 2022-03-31 | 0.241453 |
3 | 2000-01-31 | 60628.0 | FEDEX CORP | 31428X10 | 11.0 | 1.0 | 4513.0 | FDX | 004598 | 01 | 1980-11-06 | 2022-03-31 | -0.033588 |
4 | 2000-01-31 | 69032.0 | MORGAN STANLEY DEAN WITTER & CO | 61744644 | 11.0 | 1.0 | 6282.0 | MWD | 012124 | 01 | 1995-09-22 | 2022-03-31 | -0.069002 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
169638 | 2022-03-31 | 17478.0 | S & P GLOBAL INC | 78409V10 | 11.0 | 1.0 | 6282.0 | SPGI | 007163 | 01 | 1957-03-01 | 2022-03-31 | 0.091775 |
169639 | 2022-03-31 | 91152.0 | TRANSDIGM GROUP INC | 89364110 | 11.0 | 1.0 | 3728.0 | TDG | 148349 | 01 | 2016-06-03 | 2022-03-31 | -0.022578 |
169641 | 2022-03-31 | 24985.0 | AMEREN CORP | 02360810 | 11.0 | 1.0 | 4911.0 | AEE | 010860 | 01 | 1991-09-20 | 2022-03-31 | 0.097731 |
169642 | 2022-03-31 | 29946.0 | BROWN FORMAN CORP | 11563720 | 11.0 | 1.0 | 2085.0 | BF | 002435 | 01 | 1982-10-14 | 2022-03-31 | 0.030331 |
169643 | 2022-03-31 | 17700.0 | CERIDIAN H C M HOLDING INC | 15677J10 | 11.0 | 1.0 | 7372.0 | CDAY | 023546 | 01 | 2021-09-20 | 2022-03-31 | -0.062406 |
133640 rows × 13 columns
sp500ccm.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 133640 entries, 0 to 169643 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 133640 non-null datetime64[ns] 1 permno 133640 non-null float64 2 comnam 133640 non-null object 3 ncusip 133640 non-null object 4 shrcd 133640 non-null float64 5 exchcd 133640 non-null float64 6 hsiccd 133640 non-null float64 7 ticker 133640 non-null object 8 gvkey 133640 non-null object 9 iid 133640 non-null object 10 start 133640 non-null datetime64[ns] 11 ending 133640 non-null datetime64[ns] 12 ret 133576 non-null float64 dtypes: datetime64[ns](3), float64(5), object(5) memory usage: 14.3+ MB
import pandas as pd
table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
df = table[0]
df
Symbol | Security | SEC filings | GICS Sector | GICS Sub-Industry | Headquarters Location | Date first added | CIK | Founded | |
---|---|---|---|---|---|---|---|---|---|
0 | MMM | 3M | reports | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1976-08-09 | 66740 | 1902 |
1 | AOS | A. O. Smith | reports | Industrials | Building Products | Milwaukee, Wisconsin | 2017-07-26 | 91142 | 1916 |
2 | ABT | Abbott | reports | Health Care | Health Care Equipment | North Chicago, Illinois | 1964-03-31 | 1800 | 1888 |
3 | ABBV | AbbVie | reports | Health Care | Pharmaceuticals | North Chicago, Illinois | 2012-12-31 | 1551152 | 2013 (1888) |
4 | ABMD | Abiomed | reports | Health Care | Health Care Equipment | Danvers, Massachusetts | 2018-05-31 | 815094 | 1981 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
498 | YUM | Yum! Brands | reports | Consumer Discretionary | Restaurants | Louisville, Kentucky | 1997-10-06 | 1041061 | 1997 |
499 | ZBRA | Zebra Technologies | reports | Information Technology | Electronic Equipment & Instruments | Lincolnshire, Illinois | 2019-12-23 | 877212 | 1969 |
500 | ZBH | Zimmer Biomet | reports | Health Care | Health Care Equipment | Warsaw, Indiana | 2001-08-07 | 1136869 | 1927 |
501 | ZION | Zions Bancorporation | reports | Financials | Regional Banks | Salt Lake City, Utah | 2001-06-22 | 109380 | 1873 |
502 | ZTS | Zoetis | reports | Health Care | Pharmaceuticals | Parsippany, New Jersey | 2013-06-21 | 1555280 | 1952 |
503 rows × 9 columns
import bs4 as bs
import pickle
import requests
def save_sp500_tickers():
resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text
tickers.append(ticker)
with open("sp500tickers.pickle","wb") as f:
pickle.dump(tickers,f)
return tickers
save_sp500_tickers()
['MMM\n', 'AOS\n', 'ABT\n', 'ABBV\n', 'ABMD\n', 'ACN\n', 'ATVI\n', 'ADM\n', 'ADBE\n', 'ADP\n', 'AAP\n', 'AES\n', 'AFL\n', 'A\n', 'APD\n', 'AKAM\n', 'ALK\n', 'ALB\n', 'ARE\n', 'ALGN\n', 'ALLE\n', 'LNT\n', 'ALL\n', 'GOOGL\n', 'GOOG\n', 'MO\n', 'AMZN\n', 'AMCR\n', 'AMD\n', 'AEE\n', 'AAL\n', 'AEP\n', 'AXP\n', 'AIG\n', 'AMT\n', 'AWK\n', 'AMP\n', 'ABC\n', 'AME\n', 'AMGN\n', 'APH\n', 'ADI\n', 'ANSS\n', 'ANTM\n', 'AON\n', 'APA\n', 'AAPL\n', 'AMAT\n', 'APTV\n', 'ANET\n', 'AJG\n', 'AIZ\n', 'T\n', 'ATO\n', 'ADSK\n', 'AZO\n', 'AVB\n', 'AVY\n', 'BKR\n', 'BALL\n', 'BAC\n', 'BBWI\n', 'BAX\n', 'BDX\n', 'WRB\n', 'BRK.B\n', 'BBY\n', 'BIO\n', 'TECH\n', 'BIIB\n', 'BLK\n', 'BK\n', 'BA\n', 'BKNG\n', 'BWA\n', 'BXP\n', 'BSX\n', 'BMY\n', 'AVGO\n', 'BR\n', 'BRO\n', 'BF.B\n', 'CHRW\n', 'CDNS\n', 'CZR\n', 'CPT\n', 'CPB\n', 'COF\n', 'CAH\n', 'KMX\n', 'CCL\n', 'CARR\n', 'CTLT\n', 'CAT\n', 'CBOE\n', 'CBRE\n', 'CDW\n', 'CE\n', 'CNC\n', 'CNP\n', 'CDAY\n', 'CF\n', 'CRL\n', 'SCHW\n', 'CHTR\n', 'CVX\n', 'CMG\n', 'CB\n', 'CHD\n', 'CI\n', 'CINF\n', 'CTAS\n', 'CSCO\n', 'C\n', 'CFG\n', 'CTXS\n', 'CLX\n', 'CME\n', 'CMS\n', 'KO\n', 'CTSH\n', 'CL\n', 'CMCSA\n', 'CMA\n', 'CAG\n', 'COP\n', 'ED\n', 'STZ\n', 'CEG\n', 'COO\n', 'CPRT\n', 'GLW\n', 'CTVA\n', 'COST\n', 'CTRA\n', 'CCI\n', 'CSX\n', 'CMI\n', 'CVS\n', 'DHI\n', 'DHR\n', 'DRI\n', 'DVA\n', 'DE\n', 'DAL\n', 'XRAY\n', 'DVN\n', 'DXCM\n', 'FANG\n', 'DLR\n', 'DFS\n', 'DISH\n', 'DIS\n', 'DG\n', 'DLTR\n', 'D\n', 'DPZ\n', 'DOV\n', 'DOW\n', 'DTE\n', 'DUK\n', 'DRE\n', 'DD\n', 'DXC\n', 'EMN\n', 'ETN\n', 'EBAY\n', 'ECL\n', 'EIX\n', 'EW\n', 'EA\n', 'LLY\n', 'EMR\n', 'ENPH\n', 'ETR\n', 'EOG\n', 'EPAM\n', 'EFX\n', 'EQIX\n', 'EQR\n', 'ESS\n', 'EL\n', 'ETSY\n', 'RE\n', 'EVRG\n', 'ES\n', 'EXC\n', 'EXPE\n', 'EXPD\n', 'EXR\n', 'XOM\n', 'FFIV\n', 'FDS\n', 'FAST\n', 'FRT\n', 'FDX\n', 'FITB\n', 'FRC\n', 'FE\n', 'FIS\n', 'FISV\n', 'FLT\n', 'FMC\n', 'F\n', 'FTNT\n', 'FTV\n', 'FBHS\n', 'FOXA\n', 'FOX\n', 'BEN\n', 'FCX\n', 'GRMN\n', 'IT\n', 'GNRC\n', 'GD\n', 'GE\n', 'GIS\n', 'GM\n', 'GPC\n', 'GILD\n', 'GL\n', 'GPN\n', 'GS\n', 'HAL\n', 'HIG\n', 'HAS\n', 'HCA\n', 'PEAK\n', 'HSIC\n', 'HSY\n', 'HES\n', 'HPE\n', 'HLT\n', 'HOLX\n', 'HD\n', 'HON\n', 'HRL\n', 'HST\n', 'HWM\n', 'HPQ\n', 'HUM\n', 'HBAN\n', 'HII\n', 'IBM\n', 'IEX\n', 'IDXX\n', 'ITW\n', 'ILMN\n', 'INCY\n', 'IR\n', 'INTC\n', 'ICE\n', 'IP\n', 'IPG\n', 'IFF\n', 'INTU\n', 'ISRG\n', 'IVZ\n', 'IQV\n', 'IRM\n', 'JBHT\n', 'JKHY\n', 'J\n', 'JNJ\n', 'JCI\n', 'JPM\n', 'JNPR\n', 'K\n', 'KDP\n', 'KEY\n', 'KEYS\n', 'KMB\n', 'KIM\n', 'KMI\n', 'KLAC\n', 'KHC\n', 'KR\n', 'LHX\n', 'LH\n', 'LRCX\n', 'LW\n', 'LVS\n', 'LDOS\n', 'LEN\n', 'LNC\n', 'LIN\n', 'LYV\n', 'LKQ\n', 'LMT\n', 'L\n', 'LOW\n', 'LUMN\n', 'LYB\n', 'MTB\n', 'MRO\n', 'MPC\n', 'MKTX\n', 'MAR\n', 'MMC\n', 'MLM\n', 'MAS\n', 'MA\n', 'MTCH\n', 'MKC\n', 'MCD\n', 'MCK\n', 'MDT\n', 'MRK\n', 'META\n', 'MET\n', 'MTD\n', 'MGM\n', 'MCHP\n', 'MU\n', 'MSFT\n', 'MAA\n', 'MRNA\n', 'MHK\n', 'MOH\n', 'TAP\n', 'MDLZ\n', 'MPWR\n', 'MNST\n', 'MCO\n', 'MS\n', 'MOS\n', 'MSI\n', 'MSCI\n', 'NDAQ\n', 'NTAP\n', 'NFLX\n', 'NWL\n', 'NEM\n', 'NWSA\n', 'NWS\n', 'NEE\n', 'NLSN\n', 'NKE\n', 'NI\n', 'NDSN\n', 'NSC\n', 'NTRS\n', 'NOC\n', 'NLOK\n', 'NCLH\n', 'NRG\n', 'NUE\n', 'NVDA\n', 'NVR\n', 'NXPI\n', 'ORLY\n', 'OXY\n', 'ODFL\n', 'OMC\n', 'ON\n', 'OKE\n', 'ORCL\n', 'OGN\n', 'OTIS\n', 'PCAR\n', 'PKG\n', 'PARA\n', 'PH\n', 'PAYX\n', 'PAYC\n', 'PYPL\n', 'PENN\n', 'PNR\n', 'PEP\n', 'PKI\n', 'PFE\n', 'PM\n', 'PSX\n', 'PNW\n', 'PXD\n', 'PNC\n', 'POOL\n', 'PPG\n', 'PPL\n', 'PFG\n', 'PG\n', 'PGR\n', 'PLD\n', 'PRU\n', 'PEG\n', 'PTC\n', 'PSA\n', 'PHM\n', 'PVH\n', 'QRVO\n', 'PWR\n', 'QCOM\n', 'DGX\n', 'RL\n', 'RJF\n', 'RTX\n', 'O\n', 'REG\n', 'REGN\n', 'RF\n', 'RSG\n', 'RMD\n', 'RHI\n', 'ROK\n', 'ROL\n', 'ROP\n', 'ROST\n', 'RCL\n', 'SPGI\n', 'CRM\n', 'SBAC\n', 'SLB\n', 'STX\n', 'SEE\n', 'SRE\n', 'NOW\n', 'SHW\n', 'SBNY\n', 'SPG\n', 'SWKS\n', 'SJM\n', 'SNA\n', 'SEDG\n', 'SO\n', 'LUV\n', 'SWK\n', 'SBUX\n', 'STT\n', 'STE\n', 'SYK\n', 'SIVB\n', 'SYF\n', 'SNPS\n', 'SYY\n', 'TMUS\n', 'TROW\n', 'TTWO\n', 'TPR\n', 'TGT\n', 'TEL\n', 'TDY\n', 'TFX\n', 'TER\n', 'TSLA\n', 'TXN\n', 'TXT\n', 'TMO\n', 'TJX\n', 'TSCO\n', 'TT\n', 'TDG\n', 'TRV\n', 'TRMB\n', 'TFC\n', 'TWTR\n', 'TYL\n', 'TSN\n', 'USB\n', 'UDR\n', 'ULTA\n', 'UNP\n', 'UAL\n', 'UPS\n', 'URI\n', 'UNH\n', 'UHS\n', 'VLO\n', 'VTR\n', 'VRSN\n', 'VRSK\n', 'VZ\n', 'VRTX\n', 'VFC\n', 'VTRS\n', 'VICI\n', 'V\n', 'VNO\n', 'VMC\n', 'WAB\n', 'WBA\n', 'WMT\n', 'WBD\n', 'WM\n', 'WAT\n', 'WEC\n', 'WFC\n', 'WELL\n', 'WST\n', 'WDC\n', 'WRK\n', 'WY\n', 'WHR\n', 'WMB\n', 'WTW\n', 'GWW\n', 'WYNN\n', 'XEL\n', 'XYL\n', 'YUM\n', 'ZBRA\n', 'ZBH\n', 'ZION\n', 'ZTS\n']