import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Data are from the wikipedia article on logistic regression
# data: 1. hours studies, 2. passed (0/1)
filename = "https://www.physi.uni-heidelberg.de/~reygers/lectures/2020/smipp/exam.txt"
df = pd.read_csv(filename, engine='python', sep='\s+')
x_tmp = df['hours_studied'].values
x = np.reshape(x_tmp, (-1, 1))
y = df['passed'].values
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(penalty='none', fit_intercept=True)
clf.fit(x, y);
hours_studied_tmp = np.linspace(0., 6., 1000)
hours_studied = np.reshape(hours_studied_tmp, (-1, 1))
y_pred = clf.predict_proba(hours_studied)
df.plot.scatter(x='hours_studied', y='passed')
plt.plot(hours_studied, y_pred[:,1])
plt.xlabel("preparation time in hours", fontsize=14)
plt.ylabel("probability of passing exam", fontsize=14)
plt.savefig("logistic_regression.pdf")
clf.get_params()
{'C': 1.0, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'none', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}
print('Coefficient: ', clf.coef_)
print('Intercept: ', clf.intercept_)
Coefficient: [[1.50464522]] Intercept: [-4.07771764]