import numpy as np
import pandas as pd
# Creamos las categorias
sexo=['M','F','Otro']
estatura= ['<150cm','150-170cm','170-190cm','>190cm']
cabello=['Corto','Mediano (hasta orejas)','Largo (supera hombros)']
color=['Rojo','Rubio','Castaño','Negro','Otro']
delineador=['Si','No']

# Creacion de datos artificiales
v_sexo=np.random.choice(sexo, 1000)
v_estatura=np.random.choice(estatura, 1000)
v_cabello=np.random.choice(cabello, 1000)
v_color=np.random.choice(color, 1000)
#v_delineador=np.random.choice(delineador, 100)

# Creacion del dataframe
df=pd.DataFrame()
df['Sexo']=v_sexo 
df['Estatura']=v_estatura
df['Cabello']=v_cabello
df['Color']=v_color
df['Delineador'] = np.where(((df['Sexo']=='F') & ((df['Estatura'] == '<150cm')|(df['Estatura'] == '150-170cm'))) | 
                            ((df['Sexo']=='M') & ((df['Estatura'] == '150-170cm')|(df['Estatura'] == '>190cm'))), 
                            'Si', 'No')

df.head()

df.Delineador.value_counts()

# Split en train y test 
X= df.drop(columns='Delineador')
y= df['Delineador']
#from sklearn.preprocessing import OneHotEncoder
X_new=pd.get_dummies(X)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.33, random_state=42)

# Entrenar el arbol
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)

y_train.unique()

# Graficando
from matplotlib import pyplot as plt
fig = plt.figure(figsize=(18,10))
_ = tree.plot_tree(clf,feature_names=X_train.columns,  
                   class_names=y_train.unique(),
                   filled=True)

y_pred= clf.predict(X_test)

from sklearn.metrics import classification_report
print(classification_report(y_true=y_test,y_pred=y_pred))