#!/usr/bin/env python # coding: utf-8 # Open In Colab # ## Data Source: FactSet, searching for all public company PE ratio, deleting row with missing observations, Could be significant bias introduced during deletion # # In[1]: import numpy as np import pandas as pd from pylab import mpl, plt import statsmodels.formula.api as smf import statsmodels.api as sm plt.style.use('seaborn') mpl.rcParams['font.family'] = 'serif' get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: import os os.getcwd() # In[10]: data = pd.read_csv('https://raw.githubusercontent.com/cyrus723/my-first-binder/main/data/Factset_corp.csv', header=0) data # In[11]: data.info() # In[ ]: # In[ ]: # In[12]: count_nan = data.isnull().sum() print(count_nan) # In[13]: data2 = data.dropna() data2 # In[14]: data2.info() data2.shape # In[15]: count_nan = data2.isnull().sum() print(count_nan) # In[16]: formula = 'PE ~ EV_EBITDA + DY + ROE + D_EBITDA + SalesGrowth + EV_Sales + IntCoverage' results = smf.ols(formula, data2).fit() print(results.summary()) # In[17]: data2.corr() # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: