#!/usr/bin/env python # coding: utf-8 #

# ## Data Source: FactSet, searching for all S&P500 company PE ratio, no theoretical justification regarding the model specification # In[36]: import numpy as np import pandas as pd from pylab import mpl, plt import statsmodels.formula.api as smf import statsmodels.api as sm plt.style.use('seaborn') mpl.rcParams['font.family'] = 'serif' get_ipython().run_line_magic('matplotlib', 'inline') # In[37]: import os os.getcwd() # In[38]: data = pd.read_csv('https://raw.githubusercontent.com/cyrus723/my-first-binder/main/data/sp500.csv', header=0) data # In[39]: data.info() # In[48]: data['cr'].plot.hist() # In[60]: data2=data.loc[(data['pe'] < 200) & (data['gm'] < 10000)] data2.plot.scatter(x='pe', y='gm') # In[ ]: # In[ ]: data2=data.dropna(subset=['mv'],axis=0) # In[ ]: # In[ ]: data2 # In[ ]: data2.info() # In[ ]: data2.describe() # In[ ]: data2['log_mv'] = np.log(data2['mv']) # In[ ]: # In[ ]: # In[ ]: formula = 'pe ~ beta + roe + de + altman + log_mv + gm + gsales + pvol + cap_s + dy + cr + cur' results = smf.ols(formula, data2).fit() print(results.summary()) # In[ ]: # In[ ]: data2.corr() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: