import pandas as pd
import numpy as np
import matplotlib
# display graphs inline
%matplotlib inline
# Make graphs prettier
pd.set_option('display.max_columns', 15)
pd.set_option('display.line_width', 400)
pd.set_option('display.mpl_style', 'default')
# Make the fonts bigger
matplotlib.rc('figure', figsize=(14, 7))
matplotlib.rc('font', family='normal', weight='bold', size=22)
bike_data_2012 = pd.read_csv("./2012.csv", encoding='latin1', sep=',', index_col='Date', parse_dates=True, dayfirst=True)
bike_data_2012 = bike_data_2012[['Berri 1', 'Maisonneuve 1', 'Rachel1']]
bike_data_2012.index = bike_data_2012['Rachel1'].index.map(lambda x: x + datetime.timedelta(366))
bike_data_2013 = pd.read_csv("./2013.csv", encoding='latin1', sep=',', index_col='Date', parse_dates=True, dayfirst=True)
bike_data_2013 = bike_data_2013[['Berri1', u'CSC', 'Mais1', 'Rachel1']]
rachel = pd.DataFrame(index=bike_data_2012.index)
rachel['2012'] = bike_data_2012['Rachel1']
rachel['2013'] = bike_data_2013['Rachel1']
rachel = pd.DataFrame({'2012': bike_data_2012['Rachel1'], '2013': bike_data_2013['Rachel1']})
rachel.plot()
<matplotlib.axes.AxesSubplot at 0x4168190>
bike_data['weekday'] = bike_data.index.weekday
bike_data.head()
Berri 1 | Côte-Sainte-Catherine | Maisonneuve 1 | weekday | |
---|---|---|---|---|
Date | ||||
2012-01-01 | 35 | 0 | 38 | 6 |
2012-01-02 | 83 | 1 | 68 | 0 |
2012-01-03 | 135 | 2 | 104 | 1 |
2012-01-04 | 144 | 1 | 116 | 2 |
2012-01-05 | 197 | 2 | 124 | 3 |
5 rows × 4 columns
counts_by_day = bike_data.groupby('weekday').aggregate(np.sum)
counts_by_day
Berri 1 | Côte-Sainte-Catherine | Maisonneuve 1 | |
---|---|---|---|
weekday | |||
0 | 134298 | 60329 | 90051 |
1 | 135305 | 58708 | 92035 |
2 | 152972 | 67344 | 104891 |
3 | 160131 | 69028 | 111895 |
4 | 141771 | 56446 | 98568 |
5 | 101578 | 34018 | 62067 |
6 | 99310 | 36466 | 55324 |
7 rows × 3 columns
counts_by_day.index = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
counts_by_day['Berri 1'].plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x4590f90>
bike_data['Berri 1'].plot()
<matplotlib.axes.AxesSubplot at 0x57c2650>
weather_data = pd.read_csv('weather_2012.csv', parse_dates=True, index_col='Date/Time')
bike_data['mean temp'] = weather_data['Temp (C)'].resample('D', how='mean')
bike_data.head()
Berri 1 | Côte-Sainte-Catherine | Maisonneuve 1 | weekday | mean temp | |
---|---|---|---|---|---|
Date | |||||
2012-01-01 | 35 | 0 | 38 | 6 | 0.629167 |
2012-01-02 | 83 | 1 | 68 | 0 | 0.041667 |
2012-01-03 | 135 | 2 | 104 | 1 | -14.416667 |
2012-01-04 | 144 | 1 | 116 | 2 | -13.645833 |
2012-01-05 | 197 | 2 | 124 | 3 | -6.750000 |
5 rows × 5 columns
weather_data[:5]
Temp (C) | Dew Point Temp (C) | Rel Hum (%) | Wind Spd (km/h) | Visibility (km) | Stn Press (kPa) | Weather | |
---|---|---|---|---|---|---|---|
Date/Time | |||||||
2012-01-01 00:00:00 | -1.8 | -3.9 | 86 | 4 | 8.0 | 101.24 | Fog |
2012-01-01 01:00:00 | -1.8 | -3.7 | 87 | 4 | 8.0 | 101.24 | Fog |
2012-01-01 02:00:00 | -1.8 | -3.4 | 89 | 7 | 4.0 | 101.26 | Freezing Drizzle,Fog |
2012-01-01 03:00:00 | -1.5 | -3.2 | 88 | 6 | 4.0 | 101.27 | Freezing Drizzle,Fog |
2012-01-01 04:00:00 | -1.5 | -3.3 | 88 | 7 | 4.8 | 101.23 | Fog |
5 rows × 7 columns
bike_data[['Berri 1', 'mean temp']].plot(subplots=True)
array([<matplotlib.axes.AxesSubplot object at 0x5316c50>, <matplotlib.axes.AxesSubplot object at 0x5d43c10>], dtype=object)
bike_data['Rain'] = weather_data['Weather'].str.contains('Rain').resample('D', how='mean')
bike_data[['Berri 1', 'Rain']].plot(subplots=True)
array([<matplotlib.axes.AxesSubplot object at 0x6353ed0>, <matplotlib.axes.AxesSubplot object at 0x682c590>], dtype=object)