#!/usr/bin/env python # coding: utf-8 # # Regresión Logística # Documentación en sklearn del método de [regresión logística](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) # In[1]: import numpy as np import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn import datasets import matplotlib.pyplot as plt import seaborn as sns import warnings warnings.filterwarnings('ignore') # In[2]: import mlutils # Ejemplo basado en [este link](https://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html). # Vamos a usar la base de datos de Iris para ver la regresión logística: # In[3]: # import some data to play with iris = datasets.load_iris() # In[4]: dir(iris) # In[5]: print(iris.DESCR) # In[6]: df = pd.DataFrame(iris.data,columns=iris.feature_names) df['target'] = iris.target sns.pairplot(df,hue='target') plt.show() # Reduzcamos la base de datos para facilitar la comprensión de lo que estamos haciendo # In[7]: X = iris.data[:, :2] # we only take the first two features. # sepal length, sepal width y = iris.target X = X[y<2] # we only take two classes y = y[y<2] # we only take two classes # setosa y versicolor # In[8]: def plot_decision_boundary(X, y, regression): # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 h = .02 # step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = regression.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure(1, figsize=(4, 3)) plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.Paired) plt.xlabel('Sepal length') plt.ylabel('Sepal width') plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.show() # Probemos diferentes argumentos del método *LogisticRegression* para ver qué pasa... # In[9]: get_ipython().run_line_magic('pinfo', 'LogisticRegression') # In[10]: # create an instance of Logistic Regression Classifier logreg = LogisticRegression(C=1e+3, solver='liblinear', max_iter=2).fit(X, y) plot_decision_boundary(X, y, logreg) # Algo de información acerca de los [solvers](https://stackoverflow.com/questions/38640109/logistic-regression-python-solvers-definitions) o métodos de optimización que utiliza scikit-learn para encontrar un mínimo. # In[11]: # update the number of iterations logreg = LogisticRegression(C=1e+3, solver='liblinear', max_iter=20).fit(X, y) plot_decision_boundary(X, y, logreg) # In[12]: # update the number of iterations logreg = LogisticRegression(C=1e+3, solver='liblinear', max_iter=20, penalty='l1').fit(X, y) plot_decision_boundary(X, y, logreg) # In[13]: logreg = LogisticRegression(C=1e+8, solver='saga', max_iter=20, penalty='elasticnet',l1_ratio=0.7).fit(X, y) plot_decision_boundary(X, y, logreg)