# librerias from numpy import mean from numpy import std from pandas import read_csv from sklearn.model_selection import LeaveOneOut from sklearn.model_selection import cross_val_score from sklearn.ensemble import RandomForestClassifier # datos url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv' dataframe = read_csv(url, header=None) data = dataframe.values dataframe.head() dataframe.shape dataframe.isnull().sum() sum(dataframe.isna().sum()) # no hay nulos # Separar en X y y X, y = data[:, :-1], data[:, -1] print(X.shape, y.shape) # crear el procedimiento loocv cv = LeaveOneOut() LeaveOneOut? # crear modelo model = RandomForestClassifier(random_state=1,n_estimators=30, criterion="gini",max_depth=4) cross_val_score? # evaluar el modelo scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, verbose=1) # reportar el performance print('Accuracy: %.3f (%.3f)' % (mean(scores), std(scores))) # librerias from pandas import read_csv # cargar datos url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv' dataframe = read_csv(url, header=None) # shape print(dataframe.shape) dataframe dataframe.dtypes dataframe.isnull().sum() # separar en X y y data= dataframe.values X, y = data[:, :-1], data[:, -1] print(X.shape, y.shape) # crear el procediemiento LOOCV cv = LeaveOneOut() # crear el modelo from sklearn.ensemble import RandomForestRegressor model = RandomForestRegressor(random_state=42, n_estimators=10,max_depth=4) cross_val_score? # evaluar el modelo (criterio de comparacion MAE) from sklearn.metrics import mean_squared_error, make_scorer, mean_absolute_error MAE = make_scorer(mean_absolute_error) scores = cross_val_score(model, X, y, scoring=MAE, cv=cv,error_score='raise',verbose=1) # convertir a postivos scores = abs(scores) # reportar el performance print('MAE: %.3f (%.3f)' % (mean(scores), std(scores))) # datos from sklearn.model_selection import train_test_split url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv' dataframe = read_csv(url, header=None) dataframe.head() train_test_split? data= dataframe.values X,y = data[:,:-1],data[:, -1] X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.2, shuffle=True) print(X_train.shape, X_test.shape) # Modelo model = RandomForestClassifier(random_state=1,n_estimators=30, criterion="gini",max_depth=4) # Ajuste model.fit(X_train,y_train) predicciones= model.predict(X_test) predicciones[0:5] # Validacion simple from sklearn.metrics import classification_report print(classification_report(y_true= y_test, y_pred= predicciones)) # librerias from pandas import read_csv # cargar datos url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv' dataframe = read_csv(url, header=None) # shape print(dataframe.shape) data= dataframe.values X,y = data[:,:-1],data[:, -1] X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.2, shuffle=True) print(X_train.shape, X_test.shape)3 # Modelo model =RandomForestRegressor(random_state=42, n_estimators=10,max_depth=4) # Ajuste model.fit(X_train,y_train) predicciones= model.predict(X_test) # Validacion simple from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score print('MSE: ',mean_squared_error(y_true= y_test, y_pred= predicciones)) print('MAE: ',mean_absolute_error(y_true= y_test, y_pred= predicciones)) print('R2: ',r2_score(y_true= y_test, y_pred= predicciones))