%pylab inline
import pylab as pl
import numpy as np
#from sklearn import datasets, linear_model
import pandas as pd
import statsmodels.api as sm
# import the cleaned up dataset
df = pd.read_csv('../datasets/loanf.csv')
intrate = df['Interest.Rate']
loanamt = df['Loan.Amount']
fico = df['FICO.Score']
# reshape the data from a pandas Series to columns
# the dependent variable
y = np.matrix(intrate).transpose()
# the independent variables shaped as columns
x1 = np.matrix(fico).transpose()
x2 = np.matrix(loanamt).transpose()
# put the two columns together to create an input matrix
# if we had n independent variables we would have n columns here
x = np.column_stack([x1,x2])
# create a linear model and fit it to the data
X = sm.add_constant(x)
model = sm.OLS(y,X)
f = model.fit()
print 'Coefficients: ', f.params[0:2]
print 'Intercept: ', f.params[2]
print 'P-Values: ', f.pvalues
print 'R-Squared: ', f.rsquared
Populating the interactive namespace from numpy and matplotlib Coefficients: [ 72.88279832 -0.08844242] Intercept: 0.000210747768548 P-Values: [ 0.00000000e+000 0.00000000e+000 5.96972978e-203] R-Squared: 0.656632624649
from IPython.core.display import HTML
def css_styling():
styles = open("../styles/custom.css", "r").read()
return HTML(styles)
css_styling()