import plotly plotly.__version__ import plotly.plotly as py # import plotly.tools as tls # from plotly.graph_objs import Data, Layout, Figure from plotly.graph_objs import Heatmap, Contour from plotly.graph_objs import XAxis, YAxis, ColorBar, Margin from plotly.graph_objs import Font, Contours import numpy as np # import pandas as pd # import urllib2 # import datetime # import locale locale.setlocale(locale.LC_ALL, 'fr_CA.utf8') #tls.embed() #tls.embed() #tls.embed() #tls.embed() # url_csv = 'http://donnees.ville.montreal.qc.ca/storage/f/\ 2014-01-20T20%3A48%3A50.296Z/2013.csv' fichier_csv = urllib2.urlopen(url_csv) # df = pd.read_csv(fichier_csv) # df.head() # affiche les 5 premières lignes du tableau df.tail() # affiche les 5 premières lignes du tableau df.shape # Sites' full names sites = ['Berri/
de Maisonneuve', 'Côte-Ste-Catherine
(parc Beaubien)', 'de Maisonneuve/
Berri', 'de Maisonneuve/
Peel', 'du Parc/
Duluth', 'Pierre-Dupuy
(Habitat 67)', 'Rachel/
Marquette', 'Laurier
(métro)'] # Somme de toutes les rangées du tableau, z = np.array(df.sum(axis=1).tolist()) # The resulting array is 1d, 1 entry per date from Jan to Aug z.shape # Define a convertion function def dates_a_jourXmois(X, zz): dates = df_in['Date'].tolist() # make list of dates in dataframe # (1.1) List of months for all item in dates months = np.array([datetime.datetime.strptime(date,'%d/%m/%Y').strftime('%B') for date in dates]) # (1.2) Find indices of first day of the months _,ind_tmp = np.unique(months,return_index=True) # -> array([90,212,31,0, ...]) the_months_ind = np.sort(ind_tmp).tolist() # -> array([0,31,59,90, ...]) # (1*) Use these indices to make list months' name the_months = months[the_months_ind] N_the_months = len(the_months) # 8 months, in our case, from Jan to Aug # (2*) Make list of days of the month N_the_days = 31 the_days = np.arange(1,N_the_days+1) # [1, ..., 31] # (3.1) Make tmp array filled with NaNs Z_tmp = np.empty((N_the_days,N_the_months)) Z_tmp.fill(np.nan) # (3.2) Make list of indices to fill in Z_tmp month by month fill_i = the_months_ind + [len(months)] # (3.3) Loop through months for i in range(N_the_months): i0 = fill_i[i] # get start i1 = fill_i[i+1] # and end index delta_i = i1-i0 # compute their difference (either 30,31 or 28) Z_tmp[0:delta_i,i] = z_in[i0:i1] # fill in rows of # (3*) Copy tmp array to output variable Z = Z_tmp return (the_months, the_days, Z) # output coordinates for our plot # Call function, get coordinates for our plot les_mois, les_jours, Z_jourXmois = dates_a_jourXmois(df,z) # les_mois.shape, les_jours.shape, Z_jourXmois.shape data = Data([ Heatmap( x=les_mois, # x-axis labels y=les_jours, # y-axis labels z=Z_jourXmois, # 2D array scl='YIGnBu', # N.B. select colormap reversescl=True, # N.B. reverve color ordering colorbar=ColorBar( title='Nombre de cyclistes par jour', titleside='right', ) )]) # URL of the data source url = 'http://donnees.ville.montreal.qc.ca/dataset/velos-comptage' def make_anno(text='',x=1,y=1): return Annotation( text= text, # annotation text x= x, # position's x-coord y= y, # and y-coord xref='paper', # use paper coords yref='paper', # for both coordinates font= Font( size=14), # increase font size (default is 12) showarrow=False, # remove arrow ) # title = "Totaux journaliers de l'affluence cyclistes
\ sur 8 sites de comptage montréalais en 2013" #Bike traffic daily totals across 8 Montreal sites in 2013" # anno_text = "Source et info: \ Données ouvertes de la Ville de Montréal" plot_width = 650 # plot width plot_height = 800 # plot height in pixels # layout = Layout( title= title, # plot's title font= Font( family='PT Sans Narrow, sans-serif', # global font size=13, color='#635F5D' ), xaxis = XAxis( title='Mois', showgrid=False ), # remove grid yaxis= YAxis( title='Jours du mois', # y-axis title autorange='reversed', # N.B. reverse tick ordering showgrid=False, # remove grid autotick=False, # custom ticks dtick=1), # showing 1 tick per day autosize=False, # custom size height=650, # plot's height in pixels width=800, # plot's width in pixels annotations= [make_anno(anno_text,1,1.03)] ) figure = Figure(data=data, layout=layout) py.iplot(figure, filename='velo-comptage-1', width=plot_width, height=plot_height) # set notebook output frame size # Liste of booleans corresponding to days in Avril à Août i_AA = ['/01/' not in date and '/02/' not in date and '/03/' not in date and '/09/' not in date for date in df['Date']] # Trim the Jan, Feb and Mar days in df_JanAug df_AA = df[i_AA] df_AA.head() # dataframe now starts on April 1st df_AA.tail() # Row sum of dataframe, get a numpy array z_AA = np.array(df_AA.sum(axis=1).tolist()) # The dataframe .sum() method ignores non-number (e.g. the date strings) # while computing the row (with axis=1) sum. # The resulting array is 1d, 1 entry per date from Apr to Aug z_AA.shape # Define another convertion function def dates_a_joursemXmois(X, zz): dates = X['Date'].tolist() # make list of dates in dataframe # (1.1) List of months for all item in dates months = np.array([datetime.datetime.strptime(i,'%d/%m/%Y').strftime('%B') for i in dates]) # (1.2) Find indices of first day of the months _,ind_tmp = np.unique(months,return_index=True) # -> array([0,122,91,61,30]) the_months_ind = np.sort(ind_tmp).tolist() # -> array([0,30,61,91,122]) # (1*) Use these indices to make list months' names the_months = months[the_months_ind] N_the_months = len(the_months) # 5 months, in our case, from Apr to Aug # (2.1) List of week day names for all items in dates wdays = np.array([datetime.datetime.strptime(i,'%d/%m/%Y').strftime('%A') for i in dates]) # (2*) Make list of week day names the_wdays = np.array(['Lundis','Mardis','Mercredis','Jeudis','Vendredis', 'Samedis','Dimanches']) N_the_wdays = len(the_wdays) # 7 days in a week # (3.1) Init. tmp array 7 days-of-week by 5 months Z_tmp = np.empty((N_the_wdays,N_the_months)) i_month = 0 # init. month counter for the_month in the_months: # loop through the months # (3.2) Find indices corresp. to the_month, trim wdays and z_in ind_month = np.where(months == the_month) wdays_month = wdays[ind_month] z_month = zz[ind_month] i_wday = 0 # init/re-init week day counter for the_wday in the_wdays: # loop through the week days # (3.3) Find indices corresp. to the week day, trim z_month, # fill in Z_tmp with mean value ind_month_wday = np.where(wdays_month == the_wday) Z_tmp[i_wday,i_month] = np.mean(z_month[ind_month_wday]) i_wday += 1 # inc. week day counter i_month += 1 # inc. month counter # (3*) Copy tmp array to output variable Z = Z_tmp return (the_months, the_wdays, Z) # output coordinates for our next plot # Call conversion function, get coordinates for our next plot les_mois, les_joursem, Z_moisXjoursem = dates_a_joursemXmois(df_AA, z_AA) les_mois.shape, les_joursem.shape, Z_moisXjoursem.shape # Normalize 2D array by max value Z_moisXjoursem_norm = Z_moisXjoursem/Z_moisXjoursem.max() Z_moisXjoursem my_cbartitle = "Mean bike traffic
per week day
\ norm. by max value
(so 1=max value)" # Make instance of data object data = Data([ Heatmap( x=les_mois, # x-axis labels y=les_joursem, # y-axis labels z=Z_moisXjoursem_norm, # 2D array for heatmap scl='Greys', # N.B. try other predefined colormap reversescl=True, # reverse its color ordering opacity=0.9, # a slightly transparent color scale colorbar= ColorBar( title=my_cbartitle, # color bar title titleside='bottom', # placed below the color bar thickness=25, # color bar thickness in px #autotick=False, # custom colorbar ticks ticks='outside', # tick outside colorbar dtick=0.1 # distance between ticks ) )]) # Update the figure's height (in px) layout.update(height=500) # Update y-axis title and set tick angle layout['yaxis'].update(title='Days of the weeks', tickangle=-20) # Update annotation's position layout['annotations'][0].update(y=1.08) # Update title #my_title = "Fig 5.1b: Bike traffic across 8 Montreal sites in 2013" #my_layout.update(title=my_title) # Make instance of figure object figure = Figure(data=data, layout=layout) # Send figure object to Plotly's server, show result in notebook py.iplot(figure, filename='s5_cyclists-per-wday') # Get values of every row of columns 2 and up Z_dayXsite = df.ix[:,2:].values # 153 sample days (rows) at 8 sites (columns) Z_dayXsite.shape # Compute correlation, rows (i.e. days of the year) are our sample space Corr_site = np.corrcoef(Z_dayXsite,rowvar=0) # 8 sites correlated with themselves Corr_site.shape # (*) Import Margin from plotly.graph_objs import Margin help(Margin) # help! Corr_site.min(), Corr_site.max() # Make data object my_data = Data([Heatmap(x=sites, # sites on both y=sites, # axes z=Corr_site, # correlation as color contours zauto=False, # N.B. overwrite Plotly default color levels zmin=0.4, # value of min color level zmax=1, # value of max color level scl='YIOrRd', # N.B. light yellow-orange-red colormap reversescl=True # N.B. inverse colormap order )]) # Make layout object my_title = "Fig 5.2a: Bike traffic correlations between different sites in 2013" my_layout = Layout(title= my_title, # plot title font= Font(family='Georgia, serif', # global font color='#635F5D'), autosize=False, # turn off autosize height=500, # plot's height in pixels width=600, # plot's width in pixels margin= Margin(t=100,b=120,r=100,l=100) # N.B. margin between frame ) # and axis in pixels # Make Figure object, send to Plotly and show in notebook my_fig = Figure(data=my_data, layout=my_layout) py.iplot(my_fig, filename='s5_correlations') # Convert data and labels array to match Seaborn def make_seaborn(x_old,y_old,z_old): x_sns = x_old[0:-1] # remove last entry in x-corrd array y_sns = y_old[-1:0:-1] # remove first entry, reverse order in y-coord array m,n = z_old.shape # get dimension of original array tmp = np.empty((m,n)) # init. tmp array tmp.fill(np.nan) # with NaNs for i in range(m): # loop through rows tmp[i,0:i] = z_old[i,0:i] # add items below the diagonal (the unique entries) tmp = np.flipud(tmp) # reverse order of all columns in tmp array z_sns = tmp # copy tmp array to outpur variable return (x_sns,y_sns,z_sns) # return new coodinates in tuple # Call conversion function, get coordinates for our next plot sites_x_sns, sites_y_sns, Corr_site_sns = make_seaborn(sites,sites,Corr_site) # Check position of NaNs in new 2D array np.isnan(Corr_site_sns) scl_sns = [[0,"#00008B"], # color of minimun level (from 'zmin') [0.25,"#6A5ACD"], [0.5,"#FFE6F8"], [0.75, "#C71585"], # in-between [1, "#8B0000"]] # color of maximum level (from 'zmax') # Make new instance of data object my_data = Data([Heatmap(x=sites_x_sns, # x-labels y=sites_y_sns, # y-labels z=Corr_site_sns, # 2D array scl=scl_sns, # N.B. custom color scales list of lists zauto=False, # N.B. custom color levels zmin=0.3, # value of min color level zmax=1, # value of max color level )]) # Add a few style options to XAxis and YAxis objects def make_axes(tickangle_in): return dict(autotick=False, # custom ticks tickangle=tickangle_in, # rotate tick labels showgrid=False, # remove grid showline=False) # remove axes line # Tilt the labels of both axes my_layout.update(xaxis=XAxis(make_axes(90))) my_layout.update(yaxis=YAxis(make_axes(0))) # Add an annotation citing the data source my_anno_text1 = "Source and info:
\ Données ouvertes de la Ville de Montréal" my_anno1 = make_anno(my_anno_text1,0.9,0.875) # Add an annotation citing start and end of sample my_anno_text2 = "Data used:
\ April 01, 2013
to
August 31, 2013" my_anno2 = make_anno(my_anno_text2,0.8,0.55) # Update 'annotations' in layout object my_layout.update(annotations=[my_anno1,my_anno2]) # Update title my_title = "Fig 5.2b: Bike traffic correlations between different sites" my_layout.update(title=my_title) # Make new instance of figure object my_fig = Figure(data=my_data, layout=my_layout) # Send figure object to Plotly py.iplot(my_fig, filename='s5_correlations-seaborn') # (*) Import Contour from plotly.graph_objs import Contour help(Contour) # Column sum of dataframe, keep columns correps. to sites, get a numpy array z_site = np.array(df.sum(axis=0).tolist()[2:]) # The dataframe .sum() method ignores non-number (e.g. the date strings) # while computing the column (with axis=0) sum. # Show list of sites and total cyclist count zip(sites, z_site) # with the datetime module def convert_in_order(df_in,sites,z_site): dates = df_in['Date'].tolist() # make list of dates in dataframe # Get values of every row of columns 2 and up (as in subsection 5.2) Z_dayXsite = df.ix[:,2:].values # (1-) Get list of dates in words (e.g. Mon, Jan 1) dates_in_words = [datetime.datetime.strptime(i,'%d/%m/%Y').strftime('%a, %b %d') for i in dates] # (2.1-3.1) Get indices of sorted array in decreasing order ind_in_order = np.argsort(z_site)[::-1] # (2-) Shuffle sites list in order sites_in_order = [sites[i] for i in ind_in_order] # (3-) Shuffle columns (corresp. to the sites) of 2D array in order Z_dayXsite_in_order = Z_dayXsite[:,ind_in_order] # Output coordinates for our plot return (dates_in_words, sites_in_order, Z_dayXsite_in_order) # Get plot coordinates dates_in_words,sites_in_order,Z_dayXsite_in_order = convert_in_order(df,sites,z_site) # (*) Import Contours from plotly.graph_objs import Contours help(Contours) # run help() # (*) Import Line from plotly.graph_objs import Line help(Line) my_cbartitle = "Daily cyclist count per site" # Make instance of the colorbar object my_colorbar = ColorBar(title=my_cbartitle, # colorbar title titleside='right', # colorbar title at right of colorbar thickness=30, # colorbar thickness in px len=0.8, # colorbar length in plot fraction ticks='outside') # tick outside colorbar # Make instance of the data object my_data = Data([Contour(x=sites_in_order, # sites on the x-axis y=dates_in_words, # dates on the y-axis z=Z_dayXsite_in_order, # 2D array a f(sites,dates) scl='Greens', # choose a color scl from the pre-defined reversescl=True, # and reversed its order line= Line(smoothing=1.5, # N.B. default is 1 color='#999999', # default is black width=1), # default is 0.5 colorbar=my_colorbar # link colorbar object )]) plot_width = 650 # plot width in px plot_height = 1000 # plot height in px my_title = "Fig 5.3: Montral bike traffic daily progression
\ from the most trafficked site to the least in 2013
" my_xtitle = 'Sites [from the most trafficked to the least in 2013]' my_layout = Layout(title=my_title, # plot's title font= Font(family='Raleway, sans-serif', # global font color='#635F5D'), xaxis = XAxis(title=my_xtitle), # x-axis title yaxis= YAxis(autorange='reversed', # N.B. start y-axis at top autotick=False, # custom tick dtick=7), # 1 tick per week autosize=False, # custom size height=plot_height, width=plot_width, margin= Margin(b=120)) # increase bottom margin # Package data and layout into a figure object, send it to Plotly my_fig = Figure(data=my_data, layout=my_layout) py.iplot(my_fig, filename='s5_cyclist-time-progression', width=plot_width, height=plot_height) # set notebook output frame size # (*) Import Histogram2d from plotly.graph_objs import Histogram2d help(Histogram2d) # Get 2D index (as a tuple) of the least correlated sites ind_least_corr = np.unravel_index(Corr_site.argmin(), Corr_site.shape) # Get 1st site, remove
tag from string site1 = sites[ind_least_corr[0]].replace('
','') z_day1 = Z_dayXsite[:,ind_least_corr[0]] # And similarly for the 2nd site, remove
tag from string site2 = sites[ind_least_corr[1]].replace('
',' ') z_day2 = Z_dayXsite[:,ind_least_corr[1]] site1, site2 # the 2 least-correlated sites z_day1.max(), z_day2.max() # (*) Import a few more graph objects from plotly.graph_objs import Scatter, Marker, XBins, YBins # Select axis and bin range and bin size my_range = [0,5000] my_bin_size = 500 my_bins = dict(start=my_range[0], # store them in dictionary end=my_range[1], # to be places in XBins and YBins instances size=my_bin_size) my_cbar_title='Number of occurences from Apr 1 to Aug 31' # colorbar title # Make instance of Histogram2d object # with no histogram normalization (the default) my_hist2d = Histogram2d(x=z_day1, # sample of the x-axis y=z_day2, # sample of the y-axis xbins= XBins(my_bins), # custom x-axis bins ybins= YBins(my_bins), # custom y-axis bins zsmooth='best', # N.B. apply smoothing to contours scl='Portland', # N.B. choose a pre-defined color scale colorbar= ColorBar(title=my_cbar_title,# colorbar title titleside='right', # title right of colorbar ticks='outside')) # ticks outside colorbar # Make instance of Scatter my_scatter = Scatter(x=z_day1, # x coordinates y=z_day2, # y coordinates mode='markers', # just marker pts name='', # no name text=dates_in_words, # text label corresp. to date marker= Marker(size=5, # marker size color='#e0e0e0')) # and color # Package in Data, plot scatter on top of 2d histogram my_data = Data([my_hist2d, my_scatter]) # Define figure and axis titles my_title = 'Fig 5.4a: Joint frequency distribution
\ of daily cyclist counts at two Montreal sites in 2013' my_xtitle = 'Daily cyclist count at {}'.format(site1) my_ytitle = 'Daily cyclist count at {}'.format(site2) # Make instance of Layout my_layout = Layout(title=my_title, # figure title font= Font(family='PT Sans Narrow, sans-serif', # global font size=13), xaxis= XAxis(title=my_xtitle, # x-axis title range=my_range, # x-axis range zeroline=False), # remove x=0 line yaxis= YAxis(title=my_ytitle, # y-axis title range=my_range, # y-axis range zeroline=False), # remove y=0 line showlegend=False, # remove legend autosize=False, # custom size width=650, # figure width height=520) # and height # Add an annotation citing the data source my_anno_text = "Source and info: \ Données ouvertes de la Ville de Montréal" my_layout.update(annotations=[make_anno(my_anno_text,1,1.055)]) # Make instance of figure object my_fig = Figure(data=my_data, layout=my_layout) # Define filename in relation to the sites chosen, call Plotly my_filename = 's5_hist2d-sites-{}-{}'.format(ind_least_corr[0],ind_least_corr[1]) py.iplot(my_fig, filename=my_filename) # (*) Import Histogram from plotly.graph_objs import Histogram # Adjust the existing axes my_layout['xaxis'].update(domain=[0, 0.7]) # decrease domain of x-axis1 (norm. coord.) my_layout['yaxis'].update(domain=[0, 0.7], # decrease domain of y-axis1 showgrid=False) # remove vertical grid lines # Set up new axes my_layout.update(xaxis2= XAxis(domain=[0.75, 1], # domain of x-axis2 zeroline=False, # remove x=0 line showgrid=True)) # show horizontal grid line my_layout.update(yaxis2= YAxis(domain=[0.75, 1], # domain of y-axis2 zeroline=False, # remove y=0 line showgrid=True)) # show vertical line # Change location of the annotation citing the data source my_anno_text = "Source and info:
\ Données ouvertes
de la Ville de Montréal" my_layout['annotations'][0].update(make_anno(my_anno_text,0.98,0.95)) # Make instance of Histogram, with vertical bins and no normalization my_histx = Histogram(x=z_day1, # x sample, bind bins to x-axis name='', # no name on hover xbins= XBins(my_bins), # use same bins as in the 2D histogram marker= Marker(color="rgb(242,211,56)"), # choose bin color xaxis='x1', # bind x coords to xaxis1 yaxis='y2') # bind y coords to yaxis2 # Make instance of Histogram, with horizontal bins and no normalization my_histy = Histogram(y=z_day2, # y sample, bind bins to y-axis name='', # no name on hover ybins= YBins(my_bins), # use same bins as in the 2D histogram marker= Marker(color="rgb(242,211,56)"), # choose bin color xaxis='x2', # bind x coords to xaxis2 yaxis='y1') # bind y coords to yaxis2 # Append data object my_data += [my_histx, my_histy] # Updata colorbar position, length and thickness my_data[0]['colorbar'].update(x=0.99,y=0.35,len=0.7,thickness=20) # Update title my_title = 'Fig 5.4b: Joint frequency distribution
\ of daily cyclist counts at two Montreal sites in 2013' my_layout.update(title=my_title) # Mkae new instance of figure object, send it to Plotly my_fig = Figure(data=my_data, layout=my_layout) my_filename = 's5_hist2dsplt-sites-{}-{}'.format(ind_least_corr[0],ind_least_corr[1]) py.iplot(my_fig, filename=my_filename) # (*) Import Histogram2dContour from plotly.graph_objs import Histogram2dContour help(Histogram2dContour) import colorbrewer as cb # import colorbrewer # uses numpy as well def convert_cb_to_scl(cb_color,N=5): ''' cb_color (positional): colorbrewer color dictionary N (keyword): number of colors in color scale ''' colors = cb_color[N] # get list of N color tuples from cb dict levels = np.linspace(0,1,N).tolist() # get list of N levels # Make color scale list of lists, conveting each tuple to 'rgb( , , )' scl_cb = [] scl_cb += [[i, "rgb("+','.join(map(str,color))+")"] for i,color in zip(levels,colors)] return scl_cb # Convert the Purples colorbrewer scale to Plotly syntax scl_cb = convert_cb_to_scl(cb.Purples) my_cbar_title='Number of occurences from Apr 1 to Aug 31' # colorbar title # Make instance of Histogram2dContour object # with no histogram normalization (the default) my_data = Data([Histogram2dContour( x=z_day1, # sample of the x-axis y=z_day2, # sample of the y-axis xbins= XBins(my_bins), # custom x-axis bins (as in fig 5.4a & 5.4b) ybins= YBins(my_bins), # custom y-axis bins scl= scl_cb, # N.B. colorbrewer color scale colorbar= ColorBar(title=my_cbar_title, # colorbar title titleside='right', # title below colorbar autotick=False, # custom ticks dtick=1, # 1 tick for every number of occurences ticks=''), # no colorbar ticks contours= Contours(coloring='lines'), # N.B. color only the lines line= Line(width=3))]) # N.B. increse line width # Define figure and axis titles my_title = 'Fig 5.4c: Joint frequency distribution
\ of daily cyclist counts at two Montreal sites in 2013' my_xtitle = 'Daily cyclist count at {}'.format(site1) my_ytitle = 'Daily cyclist count at {}'.format(site2) # Make instance of Layout my_layout = Layout(title=my_title, # figure title font= Font(family='PT Sans Narrow, sans-serif', # global font size=13), xaxis= XAxis(title=my_xtitle, # x-axis title range=my_range, # x-axis range zeroline=False), # remove x=0 line yaxis= YAxis(title=my_ytitle, # y-axis title range=my_range, # y-axis range zeroline=False), # remove y=0 line showlegend=False, # remove legend autosize=False, # custom size width=650, # figure width height=520) # and height # Add an annotation citing the data source my_anno_text = "Source and info: \ Données ouvertes de la Ville de Montréal" my_layout.update(annotations=[make_anno(my_anno_text,1,1.055)]) my_fig = Figure(data=my_data, layout=my_layout) my_filename = 's5_hist2contour-sites-{}-{}'.format(ind_least_corr[0],ind_least_corr[1]) py.iplot(my_fig, filename=my_filename) from IPython.display import display, HTML url = 'https://raw.githubusercontent.com/plotly/python-user-guide/master/custom.css' display(HTML(urllib2.urlopen(url).read()))