#!/usr/bin/env python # coding: utf-8 # # UK unemployment data uncertainty 2013-2018 # Examples of fan charts and their variations explored by the Winton Centre for Risk and Evidence Communication. # # In this example we'll be using UK unemployment data provided by the [Office for National Statistics](https://www.ons.gov.uk/employmentandlabourmarket/peoplenotinwork/unemployment) between 2013 and 2018 to represent statistical uncertainty in data collection. # ## Part 1 - Cleaning up the data. # Generating data to find confidence intervals of 30%, 60%, and 95% to represent in visual form. # # Read in the data and generate confidence intervals of 30%, 60%, 95%. # In[1]: import os import pandas as pd import numpy as np from scipy.stats import norm def generate_interval_point(p, center, std): point = [p] boundary_point = norm.ppf(point, loc=center, scale=std) return boundary_point[0] # Takes csv with CI and generate data. def create_data(): # Read the data df = pd.read_csv('uk_unemployment_2013-2018.csv') # Finds std from given 95% CI. Assumes it follows normal curve df['std'] = df['95%CI'] / 1000 / 1.96 # print(list(df.dtypes.index)) std = list(df['std']) # Scale to be in thousands y = list(df['Number of unemployed people']/1000) y_n_95 = [] y_p_95 = [] y_n_60 = [] y_p_60 = [] y_n_30 = [] y_p_30 = [] y_median = [] for i in range(len(y)): y_n_95.append(generate_interval_point(0.025, y[i], std[i])) y_p_95.append(generate_interval_point(0.975, y[i], std[i])) y_n_60.append(generate_interval_point(0.2, y[i], std[i])) y_p_60.append(generate_interval_point(0.8, y[i], std[i])) y_n_30.append(generate_interval_point(0.35, y[i], std[i])) y_p_30.append(generate_interval_point(0.65, y[i], std[i])) y_median.append(generate_interval_point(0.5, y[i], std[i])) # Improving x-axis label for this chart. df['DateLabel'] = df['DateLabel'].replace(np.nan, '', regex=True) x = list(df['Date']) x_label = list((df['DateLabel'])) return x, x_label, y_median, y_p_95, y_n_95, y_p_30, y_n_30, y_p_60, y_n_60 # In[2]: # Lets take quick look at x, y values typically produced without uncertainty. x, x_label, y_median, y_p_95, y_n_95, y_p_30, y_n_30, y_p_60, y_n_60 = create_data() print(x) # dates print() print(y_median) # y # ## Part 2 - Using the library # ### Plot 2.1 - A solid fan chart # This is an example of a fan chart as used by the Bank of England and Office for National Statistics to represent uncertainty. # # Below we'll create a basic fan chart and customise the chart. # # Click and drag on the charts to enlarge a section. Double click to zoom back out. # In[3]: from fuzzy.core import FanPlotly solid_ci = FanPlotly( x, y_median, ci95p=y_p_95, ci95n=y_n_95, ci60p=y_p_60, ci60n=y_n_60, ci30p=y_p_30, ci30n=y_n_30, ) solid_ci.plot() # The scaling and labeling defaults are not optimal. # # For example, the y-axis needs to be labelled (2,500 really means 2,500,000). To change the defaults, we can pass in a plotly [layout](https://plot.ly/python/user-guide/#layout) object. # # Refer to [plotly documation](https://plot.ly/python/reference/#layout) for all the options. # # We will be customizing # - Labels # - x-axis # - y-axis # - Font # - Ticks # - Margin # In[4]: # for ticks, change step size. so it only plots ticks on those parts. x_new = x[0::2] x_label_new = x_label[0::2] # Add in new line between month and year x_label_new = [x_label.replace('-', '
20') for x_label in x_label_new] layout = { 'showlegend': False, 'title': 'UK Migration figures (2013-2018)', 'xaxis': { 'title': 'Date', 'titlefont': { 'family': 'Arial, sans-serif', 'size': 18, 'color': 'black', }, 'ticktext':x_label_new, 'tickvals':x_new, 'showgrid':False, 'showline': True, 'tickmode':'array', 'ticks': 'outside', 'tickangle': 0, 'showticklabels': True, 'tickwidth': 2, 'tickcolor': '#000', }, 'yaxis': { 'title': 'Unemployment (in thousands)', 'titlefont': { 'family': 'Arial, sans-serif', 'size': 18, 'color': 'black', }, 'showgrid':False, 'range': [1000000/1000, 2600000/1000], 'showline': True, 'tickmode': 'array', 'ticks': 'outside', 'mirror': False, 'tickwidth': 2, }, 'margin': { 'pad':14, } } # ### Plot 2.2 - Improved solid fan chart # # Lets pass in a layout object to improve things. # In[5]: from fuzzy.core import FanPlotly solid_ci = FanPlotly( x, y_median, ci95p=y_p_95, ci95n=y_n_95, ci60p=y_p_60, ci60n=y_n_60, ci30p=y_p_30, ci30n=y_n_30, layout=layout ) solid_ci.plot() # ### Plot 2.3 - Standard error chart # # This chart simply plots the 95% confidence interval as a uniform band of colour. # # We can pass in hex value to change the colour of uncertainty. # In[6]: from fuzzy.core import StandardErrorPlot standard_error = StandardErrorPlot( x, y_median, ci95p=y_p_95, ci95n=y_n_95, layout=layout, color='#fc8f8f', ) standard_error.plot() # ### Plot 2.4 - Density chart # This example uses a normal distribution of colour opacity to represent the likely true position of the line. # # We can pass in hex value to change the color and width of central line. Default value is 1. # In[7]: from fuzzy.core import DensPlotly dens_chart = DensPlotly( x=x, y=y_median, ci95p=y_p_95, ci95n=y_n_95, color_levels=30, layout=layout, median_line_color='#8b4513', median_line_width=0.5, ) dens_chart.plot() # ### Plot 2.5 - Fuzzy fan chart # # This is a fan chart where the boundaries between the bands has been blurred to emphasise that the uncertainty. The original bands are stll distinguishable in magnification. # # You may perceive more bands than are actually drawn when using this representation. Your perception of these visual artefacts will vary with parameters such as colour, fuzz_size (The width of the blurring), color_levels (The number of colour levels used to implement the blur). # # Note: Depending on your machine, a chart using a color_levels value above 70 will take a long time to display. # In[8]: from fuzzy.core import FuzzyPlotly fuzzy_fan = FuzzyPlotly( x, y_median, ci95p=y_p_95, ci95n=y_n_95, ci60p=y_p_60, ci60n=y_n_60, ci30p=y_p_30, ci30n=y_n_30, fuzz_size=1, color_levels=20, layout=layout, ) fuzzy_fan.plot() # ## Part 3 - Advanced usage # # ### Plot 3.1 - Overlaying another graph on the fuzzy plot # # Simply pass in your own plotly data structure in a 'fig' parameter when creating a class instance. # # 'fig' is a list of additional charts. # In[9]: from fuzzy.core import StandardErrorPlot # Creating my own plotly figure. my_plot_red = { 'marker': {'color': 'red', 'size': 10, 'symbol': 104}, 'mode': 'lines', 'name': 'My custom plot', 'text': ['one', 'two', 'three'], 'type': 'scatter', 'x': ['Aug-Oct 2013', 'Sep-Nov 2013', 'Oct-Dec 2013', 'Nov-Jan 2014', 'Dec-Feb 2014'], 'y': [1200, 1800, 1400, 1500, 2400] } # Creating Standard Error plot. standard_error = StandardErrorPlot( x, y_median, ci95p=y_p_95, ci95n=y_n_95, layout=layout, figs=[my_plot_red] ) standard_error.plot() # ### 3.2 - Exporting the plotly data structure # # The plotly data structure is a serialised form of the plot which may be useful in a larger system. # # Here we export a StandardErrorPlot and then compose it with other line plots. # In[10]: from fuzzy.core import StandardErrorPlot # Creating Standard Error plot. standard_error = StandardErrorPlot( x, y_median, ci95p=y_p_95, ci95n=y_n_95, layout=layout, ) # Extract standard_error_export = standard_error.export() # Creating my own plotly figure. my_plot_green = { 'marker': {'color': 'green', 'size': 10, 'symbol': 104}, 'mode': 'lines', 'name': 'My custom plot green', 'text': ['one', 'two', 'three'], 'type': 'scatter', 'x': ['Aug-Oct 2013', 'Sep-Nov 2013', 'Oct-Dec 2013', 'Nov-Jan 2014', 'Dec-Feb 2014'], 'y': [2421, 1251, 1861, 1500, 1765] } # Append addictional plot(s) to the list. all_data = standard_error_export + [my_plot_green] # Plot using plotly. import plotly.plotly as py py.iplot(all_data) # In[11]: # Add both custom plots all_data_two = standard_error_export + [my_plot_red, my_plot_green] py.iplot(all_data_two)