使用sklearn 模块¶

In [1]:

#-*- coding: utf-8 -*-
import numpy as np
from sklearn import linear_model
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler    #引入归一化的包
% matplotlib inline

In [2]:

print u"加载数据...\n"
data = np.loadtxt("../data/1-linear_regression/data.txt",delimiter=",",dtype=np.float64) #读取数据
X = np.array(data[:,0:-1],dtype=np.float64)      # X对应0到倒数第2列                  
y = np.array(data[:,-1],dtype=np.float64)        # y对应最后一列  

plt.scatter(X[:,0],X[:,1])

# 归一化操作
scaler = StandardScaler()   
scaler.fit(X)


x_train = scaler.transform(X)
x_test = scaler.transform(np.array([1650.0,3.0]).reshape(1,-1))

# 线性模型拟合
model = linear_model.LinearRegression()
model.fit(x_train, y)

加载数据...

Out[2]:

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

预测结果¶

In [3]:

#预测结果
result = model.predict(x_test)
print model.coef_       # Coefficient of the features 决策函数中的特征系数
print model.intercept_  # 又名bias偏置,若设置为False，则为0
print result            # 预测结果

[ 109447.79646964   -6578.35485416]
340412.659574
[ 293081.4643349]