import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml # MNIST data
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
# load MNIST data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X = X.reshape((X.shape[0], -1))
# generate training and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
plt.subplots(1, 6, figsize=(15,5))
for i in range(6):
index = 9000 + i # image number
pixels = np.array(X_train[index], dtype='uint8')
pixels = pixels.reshape((28, 28))
plt.subplot(1, 6, i+1)
plt.title('Label is {label}'.format(label=y_train[index]))
plt.imshow(pixels, cmap='gray')
plt.xticks(())
plt.yticks(())
# apply logistic regressor with 'sag' solver, C is the inverse regularization strength
clf = LogisticRegression(multi_class='multinomial',
penalty='none', solver='sag', tol=0.1)
# fit data
clf.fit(X_train, y_train)
LogisticRegression(multi_class='multinomial', penalty='none', solver='sag', tol=0.1)
#Test the model
predictions = clf.predict(X_test)
#Precision, recall, f1-score
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
precision recall f1-score support 0 0.95 0.97 0.96 1330 1 0.95 0.98 0.96 1588 2 0.90 0.91 0.91 1401 3 0.93 0.88 0.90 1432 4 0.92 0.92 0.92 1365 5 0.87 0.87 0.87 1225 6 0.93 0.95 0.94 1367 7 0.95 0.92 0.94 1484 8 0.90 0.87 0.88 1409 9 0.90 0.91 0.90 1399 accuracy 0.92 14000 macro avg 0.92 0.92 0.92 14000 weighted avg 0.92 0.92 0.92 14000 0.9205714285714286
import png
filename = "mnist_my_digit_3.png"
image = np.zeros((1, 28, 28, 1), dtype=np.uint8)
pngdata = png.Reader(open(filename, 'rb')).asDirect()
for i_row, row in enumerate(pngdata[2]):
image[0, i_row, :, 0] = row
plt.imshow(np.squeeze(image), cmap="gray")
plt.xticks(())
plt.yticks(())
plt.show()
# one digit, -1: unspecified number determined by numpy
my_X = image.reshape((1,-1))
probabilities = clf.predict_proba(my_X)
prediction = np.argmax(probabilities)
print(probabilities)
print(f"prediction = {prediction}")
[[1.56738219e-06 1.92166516e-04 6.93915018e-04 9.94419559e-01 2.07413296e-07 1.79084390e-03 8.59282575e-09 1.45219903e-07 2.90016962e-03 1.41756864e-06]] prediction = 3