#!/usr/bin/env python # coding: utf-8 # # Student Alcohol Cunsumption # ## by Nuttaphat Arunoprayoch # Student life is messy and stressful, eventually it might lead to depression. However, we all know that cracking open a cold one with your lads always helps. In this analysis, I, for no particular reasons, would like to welcome you all to dive into Alcohol Consumption by students. Enjoy :) # ## Dataset # This dataset is provided and maintained by UCI. The data were obtained in a survey of students math secondary school. # In[105]: # Data Manipulation import pandas as pd import numpy as np import pandas_profiling # Data Visualization get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns from pywaffle import Waffle mpl.style.use(['ggplot']) # optional: for ggplot-like style sns.set(rc={'figure.figsize':(11.7,8.27)}) # In[140]: # Hide Warning import warnings warnings.filterwarnings('ignore') # In[5]: # Dalc - workday alcohol consumption (numeric: from 1 - very low to 5 - very high) # Walc - weekend alcohol consumption (numeric: from 1 - very low to 5 - very high) df = pd.read_csv('student-mat.csv') df.head() # In[37]: # Expand to see the whole profile pandas_profiling.ProfileReport(df) # ## Visualization # In[34]: # Checking normal distribution df.hist() # Even though some of data are not normalized, this is just for fun, why so serious? :) #
# ### Gender and Alcohol Consumption # In[92]: # Gender on Alcohol Consumption ax = df.groupby('sex')[['Dalc', 'Walc']].mean().plot.bar() ax.set_title('Gender on Alcohol Consumption') ax.set_ylabel('Average of Alcohol Consumption') ax.set_xlabel('Gender') df.groupby('sex')[['Dalc', 'Walc']].mean().head() # As can be observed, males consumed alcohol more than female students both weekdays and weekends. But it doesn't seem significantly different. #
** Dalc - workday alcohol consumption (numeric: from 1 - very low to 5 - very high) #
** Walc - weekend alcohol consumption (numeric: from 1 - very low to 5 - very high) #
# ### Parent's cohabitation status and Alcohol Consumption # In[93]: # Parent's cohabitation status on Alcohol Consumption ax = df.groupby('Pstatus')[['Dalc', 'Walc']].mean().plot.bar() ax.set_title('Parent\'s cohabitation status on Alcohol Consumption') ax.set_ylabel('Sum of Alcohol Consumption') ax.set_xlabel('Parent\'s cohabitation status') df.groupby('Pstatus')[['Dalc', 'Walc']].mean().head() # By exploring Parent's cohabitation status, it was found that students whose parents were living apart (A) had a slightly higher alcohol consumption rate than the other. However, it should be noted that (A) reprents only 10% of all students, it still shows a higher rate. Therefore, it can be concluded that students whose parents were living apart comsumed alcohol significantly higher than the students whose parents were living together. #
# ### Guardians and Alcohol Consumption # In[155]: temp_data = df.groupby('guardian')['TotalAlc'].count() df_gu_count = temp_data.to_dict() ax = plt.figure( FigureClass=Waffle, rows=10, values=df_gu_count, legend={'loc': 'upper left', 'bbox_to_anchor': (1.1, 1)} ) plt.title('Students\' Guardians') print(df_gu_count) # According to the data collected, most of students lived with their mothers, followed by fathers and other consequently. # In[149]: df_gu = df.groupby('guardian', axis=0).mean() colors_list = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue', 'lightgreen', 'pink'] explode_list = [0.1, 0, 0] # ratio for each continent with which to offset each wedge. ax = df_gu['TotalAlc'].plot(kind='pie', figsize=(15, 10), autopct='%1.1f%%', startangle=90, shadow=True, labels=None, # turn off labels on pie chart pctdistance=1.12, # the ratio between the center of each pie slice and the start of the text generated by autopct colors=colors_list, # add custom colors explode=explode_list ) ax.set_title('Alcohol Consumption based on Students\' Guardians') ax.legend(df_gu.index.tolist()) df_gu.head() # As can be observed from the pie chart depicted above, the average of alcohol consumption is almost identical. However, the students whose guardians are fathers had a slightly higher of alcohol consumption (34.3%) #
# ### Alcohol Consumption with Various Activities # In[170]: df_abs = df.groupby('TotalAlc')[['absences', 'failures', 'studytime', 'traveltime']].mean() ax = df_abs.plot(kind='area', alpha=0.3, # 0-1, default value a= 0.5 stacked=True, figsize=(20, 10), ) ax.set_title('Alcohol Consumption with Various Activities') ax.set_ylabel('Level of Activities') ax.set_xlabel('Alcohol Consumption Levels (1-10)') #