#!/usr/bin/env python
# coding: utf-8

# # Simple Explanation of Statsmodels Summary
# > Implementation in python

# In[8]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# In[40]:


import statsmodels.api as sm
import statsmodels.formula.api as smf
import pylab as py


# In[97]:


df = pd.read_csv("salary.csv")
df


# In[118]:


fig = plt.figure(figsize=(15,10))
plt.plot(df['People_managing'], df['Salary'], 'o')
plt.grid()


# In[119]:


fig = plt.figure(figsize=(15,10))
plt.plot(df['Projects'], df['Salary'], 'o')
plt.grid()


# In[117]:


fig = plt.figure(figsize=(15,10))
plt.plot(df['YearsExperience'], df['Salary'], 'o')
plt.xlabel('YearsExperience')
plt.ylabel('Salary')
plt.grid()


# In[ ]:


# In[144]:


model = smf.ols(formula = 'Salary ~ Projects + People_managing + YearsExperience', data = df)
# model = smf.ols(formula = 'Salary ~ YearsExperience', data = df)
# model = smf.ols(formula = 'Salary ~ Projects', data = df)
model = model.fit()
# model = model.fit(cov_type="hc0")


# In[145]:


# y = df['YearsExperience']
# model.predict(y)


# In[146]:


print(model.summary())


# In[ ]:


# In[ ]:


# In[ ]: