#!/usr/bin/env python # coding: utf-8 # # Built-Up Area Leafiness analysis # by Robin Wilson (robin@rtwilson.com) # Import relevant libraries # In[1]: import pandas as pd import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # Read the CSV data which was exported from the GIS data using QGIS # In[2]: df = pd.read_csv('BuiltUpAreas_WithLeaf_WithArea.csv') # In[3]: df.columns # How many rows are there to start with? # In[4]: len(df) # How many rows if we exclude BUA's under 10km^2 # In[5]: len(df[df.area > 10000000]) # Ok, lets do the rest of our analysis with just these large areas # In[6]: large = df[df.area > 10000000] # What are the top areas if we sort by *mean* leafiness? # In[7]: large.sort_values("Leaf_mean", ascending=False).head() # What are the top areas if we sort by *median* leafiness? # In[8]: large.sort_values("Leaf_media", ascending=False).head() # What are the lowest areas if we sort by mean leafiness # In[9]: large.sort_values("Leaf_mean", ascending=True).head() # What are the areas that have the most variability in their leafiness? Each area has a very different # mean leafiness value, so we can't just compare standard deviation values. Instead, we'll calculate the co-efficient of variation (standard deviation as a proportion of the mean) and look at the variability in that. # In[10]: large['Leaf_cv'] = large.Leaf_stdev / large.Leaf_mean # In[11]: large.sort_values("Leaf_cv", ascending=False).head() # and the least variability? # In[12]: large.sort_values('Leaf_cv', ascending=True).head() # Now let's have a look at this on a graph... # In[13]: from bokeh.plotting import figure, ColumnDataSource from bokeh.models import HoverTool def scatter_with_hover(df, x, y, fig=None, cols=None, name=None, marker='x', fig_width=500, fig_height=500, **kwargs): """ Plots an interactive scatter plot of `x` vs `y` using bokeh, with automatic tooltips showing columns from `df`. Parameters ---------- df : pandas.DataFrame DataFrame containing the data to be plotted x : str Name of the column to use for the x-axis values y : str Name of the column to use for the y-axis values fig : bokeh.plotting.Figure, optional Figure on which to plot (if not given then a new figure will be created) cols : list of str Columns to show in the hover tooltip (default is to show all) name : str Bokeh series name to give to the scattered data marker : str Name of marker to use for scatter plot **kwargs Any further arguments to be passed to fig.scatter Returns ------- bokeh.plotting.Figure Figure (the same as given, or the newly created figure) Example ------- fig = scatter_with_hover(df, 'A', 'B') show(fig) fig = scatter_with_hover(df, 'A', 'B', cols=['C', 'D', 'E'], marker='x', color='red') show(fig) Author ------ Robin Wilson with thanks to Max Albert for original code example """ # If we haven't been given a Figure obj then create it with default # size etc. if fig is None: fig = figure(width=fig_width, height=fig_height, tools=['box_zoom', 'reset']) # We're getting data from the given dataframe source = ColumnDataSource(data=df) # We need a name so that we can restrict hover tools to just this # particular 'series' on the plot. You can specify it (in case it # needs to be something specific for other reasons), otherwise # we just use 'main' if name is None: name = 'main' # Actually do the scatter plot - the easy bit # (other keyword arguments will be passed to this function) fig.scatter(x, y, source=source, name=name, marker=marker, **kwargs) # Now we create the hover tool, and make sure it is only active with # the series we plotted in the previous line hover = HoverTool(names=[name]) if cols is None: # Display *all* columns in the tooltips hover.tooltips = [(c, '@' + c) for c in df.columns] else: # Display just the given columns in the tooltips hover.tooltips = [(c, '@' + c) for c in cols] #hover.tooltips.append(('index', '$index')) # Finally add/enable the tool fig.add_tools(hover) return fig # In[ ]: # In[14]: fig = scatter_with_hover(large, 'Leaf_mean', 'Leaf_cv', cols=['name']) fig.xaxis.axis_label = "Leafiness Mean" fig.yaxis.axis_label = "Leafiness CV" # In[15]: from bokeh.io import output_notebook from bokeh.plotting import show # In[16]: output_notebook() # In[17]: show(fig) # In[ ]: