# librerias
from numpy import mean
from numpy import std
from pandas import read_csv
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

# datos
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv'
dataframe = read_csv(url, header=None)
data = dataframe.values
dataframe.head()

dataframe.shape

dataframe.isnull().sum()

sum(dataframe.isna().sum())
# no hay nulos

# Separar en X y y
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)

# crear el procedimiento loocv 
cv = LeaveOneOut()

LeaveOneOut?

# crear modelo
model = RandomForestClassifier(random_state=1,n_estimators=30, criterion="gini",max_depth=4)

cross_val_score?

# evaluar el modelo
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, verbose=1)

# reportar el performance
print('Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))

# librerias
from pandas import read_csv
# cargar datos
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv'
dataframe = read_csv(url, header=None)
# shape
print(dataframe.shape)

dataframe

dataframe.dtypes

dataframe.isnull().sum()

# separar en X y y
data= dataframe.values
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)

# crear el procediemiento LOOCV
cv = LeaveOneOut()

# crear el modelo
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=42, n_estimators=10,max_depth=4)

cross_val_score?

# evaluar el modelo (criterio de comparacion MAE)
from sklearn.metrics import mean_squared_error,  make_scorer, mean_absolute_error
MAE = make_scorer(mean_absolute_error)
scores = cross_val_score(model, X, y, scoring=MAE, cv=cv,error_score='raise',verbose=1)
# convertir a postivos
scores = abs(scores)
# reportar el performance
print('MAE: %.3f (%.3f)' % (mean(scores), std(scores)))

# datos
from sklearn.model_selection import train_test_split
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv'
dataframe = read_csv(url, header=None)
dataframe.head()

train_test_split?

data= dataframe.values
X,y = data[:,:-1],data[:, -1]
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.2, shuffle=True)
print(X_train.shape, X_test.shape)

# Modelo
model = RandomForestClassifier(random_state=1,n_estimators=30, criterion="gini",max_depth=4)
# Ajuste
model.fit(X_train,y_train)

predicciones= model.predict(X_test)
predicciones[0:5]

# Validacion simple
from sklearn.metrics import classification_report
print(classification_report(y_true= y_test, y_pred= predicciones))

# librerias
from pandas import read_csv
# cargar datos
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv'
dataframe = read_csv(url, header=None)
# shape
print(dataframe.shape)

data= dataframe.values
X,y = data[:,:-1],data[:, -1]
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.2, shuffle=True)
print(X_train.shape, X_test.shape)3

# Modelo
model =RandomForestRegressor(random_state=42, n_estimators=10,max_depth=4)
# Ajuste
model.fit(X_train,y_train)

predicciones= model.predict(X_test)

# Validacion simple
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score
print('MSE: ',mean_squared_error(y_true= y_test, y_pred= predicciones))
print('MAE: ',mean_absolute_error(y_true= y_test, y_pred= predicciones))
print('R2: ',r2_score(y_true= y_test, y_pred= predicciones))