#!/usr/bin/env python # coding: utf-8 # # Simple Explanation of Statsmodels Summary # > Implementation in python # In[8]: import pandas as pd import numpy as np import matplotlib.pyplot as plt # In[40]: import statsmodels.api as sm import statsmodels.formula.api as smf import pylab as py # In[97]: df = pd.read_csv("salary.csv") df # In[118]: fig = plt.figure(figsize=(15,10)) plt.plot(df['People_managing'], df['Salary'], 'o') plt.grid() # In[119]: fig = plt.figure(figsize=(15,10)) plt.plot(df['Projects'], df['Salary'], 'o') plt.grid() # In[117]: fig = plt.figure(figsize=(15,10)) plt.plot(df['YearsExperience'], df['Salary'], 'o') plt.xlabel('YearsExperience') plt.ylabel('Salary') plt.grid() # In[ ]: # In[144]: model = smf.ols(formula = 'Salary ~ Projects + People_managing + YearsExperience', data = df) # model = smf.ols(formula = 'Salary ~ YearsExperience', data = df) # model = smf.ols(formula = 'Salary ~ Projects', data = df) model = model.fit() # model = model.fit(cov_type="hc0") # In[145]: # y = df['YearsExperience'] # model.predict(y) # In[146]: print(model.summary()) # In[ ]: # In[ ]: # In[ ]: