!pip install wrds import wrds # Now I am connecting to WRDS site - you must ahve a WRDS account user id and password conn=wrds.Connection() import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns plt.style.use('fivethirtyeight') %matplotlib inline %config InlineBackend.figure_format = 'retina' help(wrds) #listing all dtabases or libraries in WRDS conn.list_libraries() conn.list_tables(library='comp') sp500 = conn.raw_sql(""" select a.*, b.date, b.ret from crsp.msp500list as a, crsp.msf as b where a.permno=b.permno and b.date >= a.start and b.date<= a.ending and b.date>='01/01/2000' order by date; """, date_cols=['start', 'ending', 'date']) sp500 # Add Other Descriptive Variables mse = conn.raw_sql(""" select comnam, ncusip, namedt, nameendt, permno, shrcd, exchcd, hsiccd, ticker from crsp.msenames """, date_cols=['namedt', 'nameendt']) # if nameendt is missing then set to today date mse['nameendt']=mse['nameendt'].fillna(pd.to_datetime('today')) # Merge with SP500 data sp500_full = pd.merge(sp500, mse, how = 'left', on = 'permno') # Impose the date range restrictions sp500_full = sp500_full.loc[(sp500_full.date>=sp500_full.namedt) \ & (sp500_full.date<=sp500_full.nameendt)] # Linking with Compustat through CCM ccm=conn.raw_sql(""" select gvkey, liid as iid, lpermno as permno, linktype, linkprim, linkdt, linkenddt from crsp.ccmxpf_linktable where substr(linktype,1,1)='L' and (linkprim ='C' or linkprim='P') """, date_cols=['linkdt', 'linkenddt']) # if linkenddt is missing then set to today date ccm['linkenddt']=ccm['linkenddt'].fillna(pd.to_datetime('today')) # Merge the CCM data with S&P500 data # First just link by matching PERMNO sp500ccm = pd.merge(sp500_full, ccm, how='left', on=['permno']) # Then set link date bounds sp500ccm = sp500ccm.loc[(sp500ccm['date']>=sp500ccm['linkdt'])\ &(sp500ccm['date']<=sp500ccm['linkenddt'])] # Rearrange columns for final output sp500ccm = sp500ccm.drop(columns=['namedt', 'nameendt', \ 'linktype', 'linkprim', 'linkdt', 'linkenddt']) sp500ccm = sp500ccm[['date', 'permno', 'comnam', 'ncusip', 'shrcd', 'exchcd', 'hsiccd', 'ticker', \ 'gvkey', 'iid', 'start', 'ending', 'ret']] sp500ccm sp500ccm.info() import pandas as pd table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies') df = table[0] df import bs4 as bs import pickle import requests def save_sp500_tickers(): resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies') soup = bs.BeautifulSoup(resp.text, 'lxml') table = soup.find('table', {'class': 'wikitable sortable'}) tickers = [] for row in table.findAll('tr')[1:]: ticker = row.findAll('td')[0].text tickers.append(ticker) with open("sp500tickers.pickle","wb") as f: pickle.dump(tickers,f) return tickers save_sp500_tickers()