# Get the data from sklearn.datasets import load_digits digits = load_digits() X = digits.data y = digits.target # Instantiate and train the classifier from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors=1) clf.fit(X, y) # Check the results using metrics from sklearn import metrics y_pred = clf.predict(X) print metrics.confusion_matrix(y_pred, y) %pylab inline from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeRegressor data = load_boston() clf = DecisionTreeRegressor().fit(data.data, data.target) predicted = clf.predict(data.data) expected = data.target plt.scatter(expected, predicted) plt.plot([0, 50], [0, 50], '--k') plt.axis('tight') plt.xlabel('True price ($1000s)') plt.ylabel('Predicted price ($1000s)') from sklearn import cross_validation X = digits.data y = digits.target X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.25, random_state=0) print X.shape, X_train.shape, X_test.shape clf = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train) y_pred = clf.predict(X_test) print metrics.confusion_matrix(y_test, y_pred) print metrics.classification_report(y_test, y_pred) metrics.f1_score(y_test, y_pred) metrics.f1_score(y_train, clf.predict(X_train)) data = load_boston() X = data.data y = data.target X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.25, random_state=0) print X.shape, X_train.shape, X_test.shape est = DecisionTreeRegressor().fit(X_train, y_train) print "validation:", metrics.explained_variance_score(y_test, est.predict(X_test)) print "training:", metrics.explained_variance_score(y_train, est.predict(X_train)) from sklearn.ensemble import GradientBoostingRegressor est = GradientBoostingRegressor().fit(X_train, y_train) print "validation:", metrics.explained_variance_score(y_test, est.predict(X_test)) print "training:", metrics.explained_variance_score(y_train, est.predict(X_train)) from sklearn.svm import LinearSVC from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier %load solutions/04C_validation_exercise.py