In [1]:

%pylab inline
import pylab as pl
import numpy as np
#from sklearn import datasets, linear_model
import pandas as pd
import statsmodels.api as sm

# import the cleaned up dataset
df = pd.read_csv('../datasets/loanf.csv')

intrate = df['Interest.Rate']
loanamt = df['Loan.Amount']
fico = df['FICO.Score']

# reshape the data from a pandas Series to columns 
# the dependent variable
y = np.matrix(intrate).transpose()
# the independent variables shaped as columns
x1 = np.matrix(fico).transpose()
x2 = np.matrix(loanamt).transpose()

# put the two columns together to create an input matrix 
# if we had n independent variables we would have n columns here
x = np.column_stack([x1,x2])

# create a linear model and fit it to the data
X = sm.add_constant(x)
model = sm.OLS(y,X)
f = model.fit()

print 'Coefficients: ', f.params[0:2]
print 'Intercept: ', f.params[2]
print 'P-Values: ', f.pvalues
print 'R-Squared: ', f.rsquared

Populating the interactive namespace from numpy and matplotlib
Coefficients:  [ 72.88279832  -0.08844242]
Intercept:  0.000210747768548
P-Values:  [  0.00000000e+000   0.00000000e+000   5.96972978e-203]
R-Squared:  0.656632624649

In [1]:

from IPython.core.display import HTML
def css_styling():
    styles = open("../styles/custom.css", "r").read()
    return HTML(styles)
css_styling()

Out[1]:

In [ ]: