! pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip 

from google.colab import drive
import os
drive.mount('/content/gdrive')
# Establecer ruta de acceso en drive
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive")

#Importamos las librerias

# Operaciones Basicas
import numpy as np
import pandas as pd
import pandas_profiling

# Visualizacion de Datos
import matplotlib.pyplot as plt
import seaborn as sns

#Lectura del dataframe
data = pd.read_csv('heart.csv')
data.head()

#Veamos el shape
data.shape

#Totalidad de registros por columnas
data.count()

#Tipo de dato de cada columna
data.dtypes

#Tipo de estructura del dataset
type(data)

#Principales medidas estadisticas
data.describe().T

#Data Profiling
profile = pandas_profiling.ProfileReport(data)
profile

#Hagamos un HeatMap del df
plt.rcParams['figure.figsize'] = (20, 15)

sns.heatmap(data.corr(), annot = True, cmap = 'Wistia')
plt.title('Heatmap for the Dataset', fontsize = 20)
plt.show()

# tresbps vs target
plt.rcParams['figure.figsize'] = (12, 9)
sns.boxplot(data['target'], data['trestbps'], palette = 'viridis')
plt.title('Relación entre tresbps y target', fontsize = 20)
plt.show()

# cholestrol vs target
plt.rcParams['figure.figsize'] = (12, 9)
sns.violinplot(data['target'], data['chol'], palette = 'colorblind')
plt.title('Relación entre Cholestrol y Target', fontsize = 20, fontweight = 30)
plt.show()

# Relacion entre sex and target
sns.boxenplot(data['target'], data['sex'], palette = 'Set3')
plt.title('Relacion entre Sex and target', fontsize = 20, fontweight = 30)
plt.show()

#https://seaborn.pydata.org/generated/seaborn.boxenplot.html

#Age vs chol
sns.scatterplot(x="age", y="chol", data=data)
plt.title('Relacion entre Sex and target', fontsize = 20, fontweight = 30)

#Importemos el archivo
vg_df = pd.read_csv('Video_Games.csv')

#Head del df
vg_df.head()

#Veamos el shape
vg_df.shape

#Totalidad de registros por columnas
vg_df.count()

#Tipo de dato de cada columna
vg_df.dtypes

#Principales medidas estadisticas
vg_df.describe().T

#Data Profiling
profile = pandas_profiling.ProfileReport(vg_df)
profile

#Correlaciones
plt.figure(figsize=(12, 8))

vg_corr = vg_df.corr()
sns.heatmap(vg_corr, 
            xticklabels = vg_corr.columns.values,
            yticklabels = vg_corr.columns.values,
            annot = True);

#Ejemplo: Categórica vs. categórica
pd.crosstab(vg_df.Genre, vg_df.Rating)

pd.crosstab(vg_df.Genre, vg_df.Rating, normalize=True) #Agregamos los valores en relativo

#Análisis de numérica vs. categórica
vg_df.groupby('Genre')['Global_Sales'].mean()

vg_df.groupby('Genre')['Critic_Score'].mean()

vg_df.groupby('Genre')['EU_Sales'].mean()

vg_df.groupby('Genre')['EU_Sales'].mean().sort_values(ascending=False) #Ordenamiento descendiente

vg_df.groupby('Genre')['Critic_Score'].describe()

#Boxplot
plt.figure(figsize=(20,20)) #defino el tamaño del grafico
sns.boxplot(y = 'Critic_Score', x = 'Genre', data = vg_df)
plt.show()

import pandas as pd
pokemon = pd.read_csv("Pokemon.csv", index_col=0)

#Head del df
pokemon.head()

#Veamos el shape
pokemon.shape

#Totalidad de registros por columnas
pokemon.count()

#Tipo de dato de cada columna
pokemon.dtypes

#Principales medidas estadisticas
pokemon.describe().T

#Scatterplot
pokemon.plot.scatter(x='Attack', y='Defense')

pokemon.plot.hexbin(x='Attack', y='Defense', gridsize=15)

#Filtramos la data y realizamos una agrupacion
pokemon_stats_legendary = pokemon.groupby(['Legendary', 'Generation']).mean()[['Attack', 'Defense']]
pokemon_stats_legendary

pokemon_stats_legendary.plot.bar(stacked=True)

#Generamos otro dataset
pokemon_stats_by_generation = pokemon.groupby('Generation').mean()[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]
pokemon_stats_by_generation

pokemon_stats_by_generation.plot.line()