# Import Pandas to read file
import pandas as pd
traffic_flow = pd.read_csv('Metro_Interstate_Traffic_Volume.csv')
traffic_flow.info()
# importy MatPlotLib to use it's features and adding '%matplotlib inline' to use its features in Jupyter
import matplotlib.pyplot as plt
%matplotlib inline
traffic_flow['traffic_volume'].plot.hist()
traffic_flow['traffic_volume'].describe()
traffic_flow['date_time'] = pd.to_datetime(traffic_flow['date_time'])
day_time = traffic_flow[(traffic_flow['date_time'].dt.hour >= 7) & (traffic_flow['date_time'].dt.hour < 19)]
night_time = traffic_flow[(traffic_flow['date_time'].dt.hour >= 19) & (traffic_flow['date_time'].dt.hour <= 23)]
day_time.head()
night_time.head()
plt.figure(figsize = (10, 10))
plt.subplot(1,2,1)
plt.hist(day_time['traffic_volume'])
plt.title('Day Time Traffic Volume')
plt.xlim([0, 7500])
plt.ylim([0, 8000])
plt.subplot(1,2,2)
plt.hist(night_time['traffic_volume'])
plt.title('Night Time Traffic Volume')
plt.xlim([0, 7500])
plt.ylim([0, 8000])
traffic_flow['traffic_volume'].describe()
day_time['month'] = day_time['date_time'].dt.month
by_month = day_time.groupby('month').mean()
by_month['traffic_volume']
plt.plot(by_month['traffic_volume'])
plt.show()
day_time['dayofweek'] = day_time['date_time'].dt.dayofweek
by_dayofweek = day_time.groupby('dayofweek').mean()
by_dayofweek['traffic_volume'] # 0 is Monday, 6 is Sunday
plt.plot(by_dayofweek['traffic_volume'])
plt.show()
day_time['hour'] = day_time['date_time'].dt.hour
bussiness_days = day_time.copy()[day_time['dayofweek'] <= 4] # 4 == Friday
weekend = day_time.copy()[day_time['dayofweek'] >= 5] # 5 == Saturday
by_hour_business = bussiness_days.groupby('hour').mean()
by_hour_weekend = weekend.groupby('hour').mean()
print(by_hour_business['traffic_volume'])
print(by_hour_weekend['traffic_volume'])
plt.figure(figsize = (8, 3))
plt.subplot(1 , 2, 1)
plt.plot(by_hour_business['traffic_volume'])
plt.title('Business Days Traffic Volume')
plt.xlim([7, 18])
plt.ylim([1500, 6300])
plt.subplot(1 , 2, 2)
plt.plot(by_hour_weekend['traffic_volume'])
plt.title('WeekEnds Traffic Volume')
plt.xlim([7, 18])
plt.ylim([1500, 4500])
corr_temp = day_time['traffic_volume'].corr(day_time['temp'])
print("Temp vs Traffic Volume:", corr_temp)
#traffic_flow['traffic_volume'].corr(traffic_flow['weather_main'])
#traffic_flow['traffic_volume'].corr(traffic_flow['weather_description'])
corr_snow = day_time['traffic_volume'].corr(day_time['snow_1h'])
print("Snow vs Traffic Volume:", corr_snow)
corr_clouds = day_time['traffic_volume'].corr(day_time['clouds_all'])
print("Clouds vs Traffic Volume:", corr_clouds)
corr_rain = day_time['traffic_volume'].corr(day_time['rain_1h'])
print("Rain vs Traffic Volume:", corr_rain)
plt.scatter(x = day_time['temp'], y = day_time['traffic_volume'])
plt.title('Traffic Flow vs. Temp')
plt.xlabel('Temperature')
plt.ylabel('Traffic Volume')
plt.show()
by_weather_main = day_time.groupby('weather_main').mean()
by_weather_description = day_time.groupby('weather_description').mean()
by_weather_main['traffic_volume'].plot.barh()
by_weather_description['traffic_volume'].plot.barh(figsize = (6, 9))