import numpy as np
import pandas as pd
from pylab import mpl, plt
import statsmodels.formula.api as smf
import statsmodels.api as sm
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'
%matplotlib inline
/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead. import pandas.util.testing as tm
import os
os.getcwd()
'/content'
data = pd.read_csv('https://raw.githubusercontent.com/cyrus723/my-first-binder/main/data/Factset_corp.csv', header=0)
data
Identifier | Name | Revenue | Industry | Country | EV_EBITDA | PE | DY | ROE | D_EBITDA | SalesGrowth | EV_Sales | IntCoverage | Sales_Growth | NAICS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | III-GB | 3i Group plc | 310.80 | Investment Managers | United Kingdom | 5.71 | 6.00 | 3.34 | 21.93 | 0.48 | 35.23 | 50.11 | 44.26 | -15.75 | Finance and Insurance |
1 | MMM-US | 3M Company | 35,355.00 | Industrial Conglomerates | United States | 12.36 | 17.56 | 3.33 | 42.42 | 1.85 | 9.85 | 3.33 | 16.47 | 2.57 | Manufacturing |
2 | MAERSK.B-DK | A.P. Moller - Maersk A/S | 61,802.00 | Marine Shipping | Denmark | 3.18 | 3.96 | 10.66 | 47.77 | 0.67 | 49.62 | 1.23 | 23.30 | 16.15 | Transportation and Warehousing |
3 | SAGA.B-SE | AB Sagax | 376.95 | Real Estate Development | Sweden | 61.96 | 13.80 | 0.70 | 36.64 | 10.53 | 9.81 | 48.63 | 5.68 | 11.86 | Real Estate and Rental and Leasing |
4 | ABBN-CH | ABB Ltd. | 28,949.00 | Electrical Products | Switzerland | 18.57 | 16.95 | 2.35 | 30.14 | 1.33 | 8.15 | 2.71 | 22.22 | -0.83 | Manufacturing |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1595 | ZM-US | Zoom Video Communications, Inc. | 4,099.86 | Packaged Software | United States | 34.88 | 34.31 | 0.00 | 28.53 | 0.00 | 54.63 | 10.21 | NaN | 131.49 | Information |
1596 | ZI-US | ZoomInfo Technologies, Inc. | 747.20 | Internet Software/Services | United States | 117.64 | 216.53 | 0.00 | 9.34 | 5.54 | 56.91 | 35.16 | 3.49 | 73.00 | Information |
1597 | ZS-US | Zscaler, Inc. | 673.10 | Packaged Software | United States | NaN | NaN | 0.00 | -51.70 | NaN | 56.07 | 46.75 | -3.89 | 52.40 | Information |
1598 | 2057-HK | ZTO Express (Cayman), Inc. | 4,714.55 | Air Freight/Couriers | China | 20.29 | 30.96 | 0.00 | 9.75 | 0.54 | 29.34 | 4.58 | 37.26 | 20.68 | Professional, Scientific, and Technical Services |
1599 | ZURN-CH | Zurich Insurance Group AG | 69,591.00 | Multi-Line Insurance | Switzerland | NaN | 12.52 | 5.49 | 13.92 | NaN | 15.45 | 1.23 | NaN | 11.39 | Finance and Insurance |
1600 rows × 15 columns
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1600 entries, 0 to 1599 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Identifier 1600 non-null object 1 Name 1600 non-null object 2 Revenue 1600 non-null object 3 Industry 1600 non-null object 4 Country 1600 non-null object 5 EV_EBITDA 1271 non-null float64 6 PE 1482 non-null float64 7 DY 1516 non-null float64 8 ROE 1559 non-null float64 9 D_EBITDA 1303 non-null float64 10 SalesGrowth 1594 non-null float64 11 EV_Sales 1539 non-null float64 12 IntCoverage 1328 non-null float64 13 Sales_Growth 1579 non-null float64 14 NAICS 1600 non-null object dtypes: float64(9), object(6) memory usage: 187.6+ KB
count_nan = data.isnull().sum()
print(count_nan)
Identifier 0 Name 0 Revenue 0 Industry 0 Country 0 EV_EBITDA 329 PE 118 DY 84 ROE 41 D_EBITDA 297 SalesGrowth 6 EV_Sales 61 IntCoverage 272 Sales_Growth 21 NAICS 0 dtype: int64
data2 = data.dropna()
data2
Identifier | Name | Revenue | Industry | Country | EV_EBITDA | PE | DY | ROE | D_EBITDA | SalesGrowth | EV_Sales | IntCoverage | Sales_Growth | NAICS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | III-GB | 3i Group plc | 310.80 | Investment Managers | United Kingdom | 5.71 | 6.00 | 3.34 | 21.93 | 0.48 | 35.23 | 50.11 | 44.26 | -15.75 | Finance and Insurance |
1 | MMM-US | 3M Company | 35,355.00 | Industrial Conglomerates | United States | 12.36 | 17.56 | 3.33 | 42.42 | 1.85 | 9.85 | 3.33 | 16.47 | 2.57 | Manufacturing |
2 | MAERSK.B-DK | A.P. Moller - Maersk A/S | 61,802.00 | Marine Shipping | Denmark | 3.18 | 3.96 | 10.66 | 47.77 | 0.67 | 49.62 | 1.23 | 23.30 | 16.15 | Transportation and Warehousing |
3 | SAGA.B-SE | AB Sagax | 376.95 | Real Estate Development | Sweden | 61.96 | 13.80 | 0.70 | 36.64 | 10.53 | 9.81 | 48.63 | 5.68 | 11.86 | Real Estate and Rental and Leasing |
4 | ABBN-CH | ABB Ltd. | 28,949.00 | Electrical Products | Switzerland | 18.57 | 16.95 | 2.35 | 30.14 | 1.33 | 8.15 | 2.71 | 22.22 | -0.83 | Manufacturing |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1591 | 2899-HK | Zijin Mining Group Co., Ltd. | 34,366.65 | Precious Metals | China | 8.59 | 12.88 | 2.49 | 25.03 | 2.42 | 40.66 | 1.28 | 9.11 | 29.27 | Mining, Quarrying, and Oil and Gas Extraction |
1593 | ZBH-US | Zimmer Biomet Holdings, Inc. | 7,836.20 | Medical Specialties | United States | 14.70 | 66.55 | 0.76 | 3.23 | 3.12 | 11.56 | 4.29 | 5.77 | -0.41 | Manufacturing |
1594 | ZTS-US | Zoetis Inc. | 7,776.00 | Pharmaceuticals: Major | United States | 36.74 | 57.11 | 0.44 | 49.01 | 2.03 | 16.49 | 15.38 | 10.95 | 10.11 | Manufacturing |
1596 | ZI-US | ZoomInfo Technologies, Inc. | 747.20 | Internet Software/Services | United States | 117.64 | 216.53 | 0.00 | 9.34 | 5.54 | 56.91 | 35.16 | 3.49 | 73.00 | Information |
1598 | 2057-HK | ZTO Express (Cayman), Inc. | 4,714.55 | Air Freight/Couriers | China | 20.29 | 30.96 | 0.00 | 9.75 | 0.54 | 29.34 | 4.58 | 37.26 | 20.68 | Professional, Scientific, and Technical Services |
1106 rows × 15 columns
data2.info()
data2.shape
<class 'pandas.core.frame.DataFrame'> Int64Index: 1106 entries, 0 to 1598 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Identifier 1106 non-null object 1 Name 1106 non-null object 2 Revenue 1106 non-null object 3 Industry 1106 non-null object 4 Country 1106 non-null object 5 EV_EBITDA 1106 non-null float64 6 PE 1106 non-null float64 7 DY 1106 non-null float64 8 ROE 1106 non-null float64 9 D_EBITDA 1106 non-null float64 10 SalesGrowth 1106 non-null float64 11 EV_Sales 1106 non-null float64 12 IntCoverage 1106 non-null float64 13 Sales_Growth 1106 non-null float64 14 NAICS 1106 non-null object dtypes: float64(9), object(6) memory usage: 138.2+ KB
(1106, 15)
count_nan = data2.isnull().sum()
print(count_nan)
Identifier 0 Name 0 Revenue 0 Industry 0 Country 0 EV_EBITDA 0 PE 0 DY 0 ROE 0 D_EBITDA 0 SalesGrowth 0 EV_Sales 0 IntCoverage 0 Sales_Growth 0 NAICS 0 dtype: int64
formula = 'PE ~ EV_EBITDA + DY + ROE + D_EBITDA + SalesGrowth + EV_Sales + IntCoverage'
results = smf.ols(formula, data2).fit()
print(results.summary())
OLS Regression Results ============================================================================== Dep. Variable: PE R-squared: 0.140 Model: OLS Adj. R-squared: 0.135 Method: Least Squares F-statistic: 25.61 Date: Thu, 19 May 2022 Prob (F-statistic): 1.40e-32 Time: 06:59:28 Log-Likelihood: -6991.6 No. Observations: 1106 AIC: 1.400e+04 Df Residuals: 1098 BIC: 1.404e+04 Df Model: 7 Covariance Type: nonrobust =============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------- Intercept 17.3022 7.352 2.353 0.019 2.876 31.729 EV_EBITDA 2.2773 0.191 11.910 0.000 1.902 2.652 DY 1.4979 1.589 0.943 0.346 -1.619 4.615 ROE -0.0073 0.015 -0.484 0.629 -0.037 0.022 D_EBITDA -3.5974 0.575 -6.259 0.000 -4.725 -2.470 SalesGrowth -0.0539 0.044 -1.223 0.222 -0.140 0.033 EV_Sales -1.7662 0.729 -2.423 0.016 -3.197 -0.336 IntCoverage -0.0075 0.011 -0.670 0.503 -0.029 0.014 ============================================================================== Omnibus: 2200.053 Durbin-Watson: 2.028 Prob(Omnibus): 0.000 Jarque-Bera (JB): 4265213.315 Skew: 15.095 Prob(JB): 0.00 Kurtosis: 305.726 Cond. No. 679. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
data2.corr()
EV_EBITDA | PE | DY | ROE | D_EBITDA | SalesGrowth | EV_Sales | IntCoverage | Sales_Growth | |
---|---|---|---|---|---|---|---|---|---|
EV_EBITDA | 1.000000 | 0.328878 | -0.260691 | -0.022547 | 0.576225 | 0.076867 | 0.479353 | 0.017639 | 0.189467 |
PE | 0.328878 | 1.000000 | -0.079341 | -0.019495 | 0.062110 | -0.008015 | 0.149428 | -0.010633 | 0.049153 |
DY | -0.260691 | -0.079341 | 1.000000 | -0.023072 | -0.021939 | -0.017622 | -0.237462 | -0.038418 | -0.100704 |
ROE | -0.022547 | -0.019495 | -0.023072 | 1.000000 | -0.019213 | 0.009836 | -0.024447 | -0.004273 | -0.002051 |
D_EBITDA | 0.576225 | 0.062110 | -0.021939 | -0.019213 | 1.000000 | 0.014906 | 0.005065 | -0.041356 | -0.016838 |
SalesGrowth | 0.076867 | -0.008015 | -0.017622 | 0.009836 | 0.014906 | 1.000000 | 0.097530 | 0.040734 | 0.734134 |
EV_Sales | 0.479353 | 0.149428 | -0.237462 | -0.024447 | 0.005065 | 0.097530 | 1.000000 | 0.095952 | 0.221300 |
IntCoverage | 0.017639 | -0.010633 | -0.038418 | -0.004273 | -0.041356 | 0.040734 | 0.095952 | 1.000000 | 0.060372 |
Sales_Growth | 0.189467 | 0.049153 | -0.100704 | -0.002051 | -0.016838 | 0.734134 | 0.221300 | 0.060372 | 1.000000 |