#!/usr/bin/env python # coding: utf-8 # # Simple example of logistic regression with scikit-learn # In[1]: import numpy as np import pandas as pd import matplotlib.pyplot as plt # ### Read data # Data are from the [wikipedia article on logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) # In[2]: # data: 1. hours studies, 2. passed (0/1) filename = "https://www.physi.uni-heidelberg.de/~reygers/lectures/2020/smipp/exam.txt" df = pd.read_csv(filename, engine='python', sep='\s+') # In[3]: x_tmp = df['hours_studied'].values x = np.reshape(x_tmp, (-1, 1)) y = df['passed'].values # ### Fit the model # In[37]: from sklearn.linear_model import LogisticRegression clf = LogisticRegression(penalty='none', fit_intercept=True) clf.fit(x, y); # ### Calculate predictions # In[38]: hours_studied_tmp = np.linspace(0., 6., 1000) hours_studied = np.reshape(hours_studied_tmp, (-1, 1)) y_pred = clf.predict_proba(hours_studied) # ### Plot result # In[39]: df.plot.scatter(x='hours_studied', y='passed') plt.plot(hours_studied, y_pred[:,1]) plt.xlabel("preparation time in hours", fontsize=14) plt.ylabel("probability of passing exam", fontsize=14) plt.savefig("logistic_regression.pdf") # In[40]: clf.get_params() # In[41]: print('Coefficient: ', clf.coef_) print('Intercept: ', clf.intercept_)