#!/usr/bin/env python # coding: utf-8 # # Xgboost - Clasificación # In[7]: from google.colab import drive import os drive.mount('/content/drive') # Establecer ruta de acceso en drive import os print(os.getcwd()) os.chdir("/content/drive/My Drive") print(os.getcwd()) # In[2]: get_ipython().system('pip install xgboost') # In[8]: import xgboost as xgb #pip install xgboost import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression as LR from sklearn.model_selection import train_test_split data = pd.read_csv('winequality-red.csv') data # In[9]: data.quality.unique() # Vamos a hacer un problema de clasificacion. Asi que lo que vamos a predecir es # si el vino es de buena calidad o no. Tomaremos el 6 como umbral para decidir si # es bueno o no. # In[10]: data.loc[data['quality'] < 6, 'quality'] = 0 #baja calidad data.loc[data['quality'] >= 6, 'quality'] = 1 #alta calidad # In[11]: #Veamos que tenemos! data # In[12]: X = data.drop("quality", axis=1) #Elimino de mi dataset la variable a predecir y = data.quality #Defino el Target # In[13]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=123) # In[14]: clf_xgb = xgb.XGBClassifier(objective='binary:logistic', n_estimators=10,seed=42,max_depth=6, learning_rate=0.01) # In[15]: clf_xgb.fit(X_train,y_train) #Entrenamos el modelo # In[16]: y_train_pred = clf_xgb.predict(X_train) #Prediccion en Train y_test_pred = clf_xgb.predict(X_test) #Prediccion en Test # In[17]: from sklearn.metrics import accuracy_score #Calculo el accuracy en Train #train_accuracy = accuracy_score(y_train, y_train_pred) #Calculo el accuracy en Test test_accuracy = accuracy_score(y_test, y_test_pred) #print('% de aciertos sobre el set de entrenamiento:', train_accuracy) print('% de aciertos sobre el set de evaluación:',test_accuracy) # # XGboost - Regresión # In[18]: import pandas as pd import xgboost as xgb from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error # In[19]: boston = load_boston() X = pd.DataFrame(boston.data, columns=boston.feature_names) y = pd.Series(boston.target) # In[21]: #Vemos que tenemos! X.head() # In[22]: y # In[23]: #Creamos el objeteo XGBoost regressor = xgb.XGBRegressor( n_estimators=80, reg_lambda=1, # L1 regularization term on weights gamma=0, # Minimum loss reduction required to make a further partition on a leaf node of the tree max_depth=3 ) # In[24]: #Fiteamos regressor.fit(X_train, y_train) # In[25]: #Predecimos y_pred = regressor.predict(X_test) # In[26]: #Error mean_squared_error(y_test, y_pred) # # Created in deepnote.com # Created in Deepnote