import numpy as np import pandas as pd # Creamos las categorias sexo=['M','F','Otro'] estatura= ['<150cm','150-170cm','170-190cm','>190cm'] cabello=['Corto','Mediano (hasta orejas)','Largo (supera hombros)'] color=['Rojo','Rubio','CastaƱo','Negro','Otro'] delineador=['Si','No'] # Creacion de datos artificiales v_sexo=np.random.choice(sexo, 1000) v_estatura=np.random.choice(estatura, 1000) v_cabello=np.random.choice(cabello, 1000) v_color=np.random.choice(color, 1000) #v_delineador=np.random.choice(delineador, 100) # Creacion del dataframe df=pd.DataFrame() df['Sexo']=v_sexo df['Estatura']=v_estatura df['Cabello']=v_cabello df['Color']=v_color df['Delineador'] = np.where(((df['Sexo']=='F') & ((df['Estatura'] == '<150cm')|(df['Estatura'] == '150-170cm'))) | ((df['Sexo']=='M') & ((df['Estatura'] == '150-170cm')|(df['Estatura'] == '>190cm'))), 'Si', 'No') df.head() df.Delineador.value_counts() # Split en train y test X= df.drop(columns='Delineador') y= df['Delineador'] #from sklearn.preprocessing import OneHotEncoder X_new=pd.get_dummies(X) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.33, random_state=42) # Entrenar el arbol from sklearn import tree clf = tree.DecisionTreeClassifier() clf = clf.fit(X_train, y_train) y_train.unique() # Graficando from matplotlib import pyplot as plt fig = plt.figure(figsize=(18,10)) _ = tree.plot_tree(clf,feature_names=X_train.columns, class_names=y_train.unique(), filled=True) y_pred= clf.predict(X_test) from sklearn.metrics import classification_report print(classification_report(y_true=y_test,y_pred=y_pred))