#!/usr/bin/env python # coding: utf-8 # # MNIST digits: Softmax Regression # In[2]: import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import fetch_openml # MNIST data from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score # In[3]: # load MNIST data from https://www.openml.org/d/554 X, y = fetch_openml('mnist_784', version=1, return_X_y=True) X = X.reshape((X.shape[0], -1)) # In[4]: # generate training and test datasets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # In[5]: plt.subplots(1, 6, figsize=(15,5)) for i in range(6): index = 9000 + i # image number pixels = np.array(X_train[index], dtype='uint8') pixels = pixels.reshape((28, 28)) plt.subplot(1, 6, i+1) plt.title('Label is {label}'.format(label=y_train[index])) plt.imshow(pixels, cmap='gray') plt.xticks(()) plt.yticks(()) # In[6]: # apply logistic regressor with 'sag' solver, C is the inverse regularization strength clf = LogisticRegression(multi_class='multinomial', penalty='none', solver='sag', tol=0.1) # In[7]: # fit data clf.fit(X_train, y_train) # In[8]: #Test the model predictions = clf.predict(X_test) #Precision, recall, f1-score print(classification_report(y_test, predictions)) print(accuracy_score(y_test, predictions)) # In[9]: import png filename = "mnist_my_digit_3.png" image = np.zeros((1, 28, 28, 1), dtype=np.uint8) pngdata = png.Reader(open(filename, 'rb')).asDirect() for i_row, row in enumerate(pngdata[2]): image[0, i_row, :, 0] = row plt.imshow(np.squeeze(image), cmap="gray") plt.xticks(()) plt.yticks(()) plt.show() # one digit, -1: unspecified number determined by numpy my_X = image.reshape((1,-1)) # In[11]: probabilities = clf.predict_proba(my_X) prediction = np.argmax(probabilities) print(probabilities) print(f"prediction = {prediction}")