from google.colab import drive
import os
drive.mount('/content/gdrive')
# Establecer ruta de acceso en drive
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive")

import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
tips.head()

sns.distplot(tips['total_bill'], kde=False) 
plt.show()

sns.histplot(data=tips,x='total_bill',hue='sex') 
plt.title('Histograma de gasto por sexo')
plt.show()

import plotly.express as px
df = px.data.tips()
fig = px.histogram(df, x="total_bill", nbins=10)
fig.update_layout(
    title="Comportamiento de total_bill",
    xaxis=dict(
        showgrid=False,
        showline=True,
        linecolor='rgb(102, 102, 102)',
        tickfont_color='rgb(102, 102, 102)',
        showticklabels=True,
        dtick=10,
        ticks='outside',
        tickcolor='rgb(102, 102, 102)',
    ),
    margin=dict(l=140, r=40, b=50, t=80),
    legend=dict(
        font_size=10,
        yanchor='middle',
        xanchor='right',
    ),
    width=800,
    height=600,
    paper_bgcolor='white',
    plot_bgcolor='white',
    hovermode='closest',
)

import pandas as pd
df=pd.read_csv('accidents.csv',delimiter=";")
type(df)

print(df.shape)
df.head()

#Agrupe los datos disponibles mensualmente y genere un line plot de accidentes a lo largo del tiempo. ¿Ha aumentado el número de accidentes durante el último año y medio?
df['DATE']=pd.to_datetime(df['DATE'])
monthly_accidents =df.groupby(df['DATE'].dt.to_period('M')).size()
monthly_accidents.plot.line()

df['TIME']=pd.to_datetime(df['TIME'])
df['HOUR'] = df['TIME'].dt.hour

df1 = pd.DataFrame({'count': df.groupby(['BOROUGH', 'HOUR']).size()})
df1
df1 = df1.reset_index()
df1.head(10)

df1 = pd.DataFrame({'count': df.groupby(['BOROUGH', 'HOUR']).size()})
df1 = df1.reset_index()
chart = sns.FacetGrid(df1, col='BOROUGH', margin_titles=True, col_wrap=3, aspect=2, row_order=df['BOROUGH'].unique)
chart.map(sns.barplot, 'HOUR', 'count',)

df_prueba=df[['DATE','BOROUGH']]
pie_borough = df_prueba.groupby('BOROUGH').agg('count')
pie_borough=pie_borough.rename(columns={'DATE': 'Frecuencia'})
pie_borough

labels = pie_borough.index
print(labels)
pie, ax = plt.subplots(figsize=[10,6])
fig=plt.pie(x=pie_borough, autopct="%.1f%%",labels=labels,explode=[0.05]*5,\
            pctdistance=0.5)
plt.title("Distribucion de barrios", fontsize=14);


pie_borough

import plotly.express as px
fig = px.pie(pie_borough, values='Frecuencia', \
             names=pie_borough.index, title='Piechart Boroughs')
fig.show()

import plotly.express as px
fig = px.pie(pie_borough, values='Frecuencia', \
             names=pie_borough.index, title='Piechart Boroughs')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

import scipy 
scipy.stats.describe(monthly_accidents)

scipy.stats.gmean(monthly_accidents) # Media geometrica

scipy.stats.hmean(monthly_accidents) # Media armonica

scipy.stats.trim_mean(monthly_accidents,0.1) # Media recortada (Proporcion removida en cada cola 10%)

scipy.stats.mode(monthly_accidents) # Moda

monthly_accidents

scipy.stats.describe(monthly_accidents) # Calcular el coeficiente de variacion

scipy.stats.variation(monthly_accidents) # Calcular el coeficiente de variacion

scipy.stats.iqr(monthly_accidents) # Calcular el IQR

scipy.stats.sem(monthly_accidents) # Calcular el Error estandar

scipy.stats.skew(monthly_accidents) # Calcular el CA

scipy.stats.kurtosis(monthly_accidents) # Calcular el CA_p

plt.hist(monthly_accidents)

import seaborn as sns
sns.set_theme(style="whitegrid")
ax = sns.boxplot(x=monthly_accidents)
plt.title('Boxplot de accidentes mensuales')
plt.xlabel('Accidentes')

tips = sns.load_dataset('tips')
tips.head()

sns.set_theme(style="whitegrid")
ax = sns.boxplot(y=tips.total_bill, x=tips.sex)
plt.title('Boxplot de accidentes mensuales')
plt.xlabel('Accidentes')

import plotly.express as px
fig = px.box(tips, x="sex", y="total_bill")
fig.show()

fig = px.box(tips, x="day", y="total_bill", color="smoker")
fig.show()