#!/usr/bin/env python # coding: utf-8 # # Some gun violence analysis with Wikipedia data # As [requested by John Stokes](https://twitter.com/jonst0kes/status/282330530412888064), # here are per-capita numbers for gun-related homicides, # relating to GDP and total homicides, # so the situation in the United States can be put in context relative to other nations. # main data source is UNODC (via Wikipedia [here](http://en.wikipedia.org/wiki/List_of_countries_by_intentional_homicide_rate) # and [here](http://en.wikipedia.org/wiki/List_of_countries_by_firearm-related_death_rate)). # # GDP data from World Bank, again [via Wikipedia](http://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28PPP%29_per_capita). # # If the numbers on Wikipedia are inaccurate, or their relationship is not sound # (e.g. numbers taken from different years, during which significant change occured) # then obviously None of this analysis is valid. # # To summarize the data, # every possible way you look at it the US is lousy at preventing gun violence. # Even when compared to significantly more violent places, # gun violence in the US is a serious problem, # and when compared to similarly wealthy places, # the US is an outstanding disaster. # **UPDATE:** the relationship of the gun data and totals does not seem to be valid. # [FBI data](http://www2.fbi.gov/ucr/cius2009/offenses/violent_crime/index.html) suggests that # the relative contribution of guns to homicides in the US is 47%, # but relating these two data sources gives 80%. # Internal comparisons should still be fine, but 'fraction' analysis has been stricken. # # **UPDATE:** this is an updated version of [the original notebook](http://nbviewer.ipython.org/gist/minrk/4358066/Gun%20Data.ipynb), run with data provided by Haruo Kamioka. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import seaborn as sns from IPython.display import display, set_matplotlib_formats set_matplotlib_formats('retina') import pandas pandas.set_option('display.precision', 2) # Some utility functions for display # In[2]: def plot_percent(df, limit=10): df['Gun Percent'][:limit].plot() plt.ylim(0,100) plt.title("% Gun Homicide") plt.show() # In[3]: def plot_percapita(df, limit=10): df = df.ix[:,['Homicides', 'Gun Homicides']][:limit] df['Total Homicides'] = df['Homicides'] - df['Gun Homicides'] del df['Homicides'] df.plot(kind='bar', stacked=True, sort_columns=True) plt.ylabel("per 100k") plt.show() # In[4]: def display_relevant(df, limit=10): display(df.ix[:,['Homicides', 'Gun Homicides', 'Sources and notes']][:limit]) # Load the data # In[5]: totals = pandas.read_csv('totals.csv', sep=';', index_col='Country') guns = pandas.read_csv('guns.csv', sep=';', index_col='Country') gdp = pandas.read_csv('gdp.csv', sep=';', index_col='Country', thousands=',') # rename a few columns: gdp.columns = ['Rank', 'GDP', 'Year'] guns['Gun Homicides'] = guns['Homicides'] del guns['Homicides'] totals['Homicides'] = totals['Rate'] # In[6]: data = totals.join(guns).join(gdp, rsuffix=' (GDP)') data['Gun Percent'] = 100 * data['Gun Homicides'] / data['Homicides'] del data['Unintentional'], data['Undetermined'], data['Suicides'] data = data.dropna() # In[7]: data # Of all sampled countries (Found data for 68 countries), # the US is in the top 15 in Gun Homicides per capita. # # Numbers are per 100k. # In[8]: data = data.sort("Gun Homicides", ascending=False) display_relevant(data, 15) # Take top 30 Countries by GDP # In[9]: top = data.sort('GDP')[-30:] # and rank them by Gun Homicides per capita: # In[10]: top_by_guns = top.sort("Gun Homicides", ascending=False) display_relevant(top_by_guns, 5) plot_percapita(top_by_guns, 10) # **NOTE:** these bar graphs should not be interpreted as fractions of a total, # as the two data sources do not appear to be comparable. # But the red and blue bar graphs should still be internally comparable. # The US is easily #1 of 30 wealthiest countries in Gun Homicides per capita, # by a factor of 4:1 # Adding USA, Canada, and Mexico to all of Europe, # USA is a strong #2 behind Mexico in total gun homicides per-capita # In[11]: index = (data['Region'] == 'Europe') | \ (data.index == 'United States') | \ (data.index == 'Canada') | \ (data.index == 'Mexico') selected = data[index] print("By Total Gun Homicides") sys.stdout.flush() by_guns = selected.sort("Gun Homicides", ascending=False) #by_guns['Gun Homicides'].plot(kind='bar') plot_percapita(by_guns, limit=25) display_relevant(selected, limit=None) # Let's just compare US, Canada, and UK: # In[12]: select = data.ix[['United States', 'Canada', 'United Kingdom']] plot_percapita(select) # Normalize to the US numbers (inverse) # In[13]: select['Homicides'] = select['Homicides']['United States'] / select['Homicides'] select['Gun Homicides'] = select['Gun Homicides']['United States'] / select['Gun Homicides'] display_relevant(select) # In[14]: select['Homicides'] = select['Homicides']['United States'] / select['Homicides'] select['Gun Homicides'] = select['Gun Homicides']['United States'] / select['Gun Homicides'] display_relevant(select) # So, you are 2.6 times more likely to be killed in the US than Canada, # and 3.5 times more likely than in the UK. # That's bad, but not extreme. # # However, you are 4.9 times more likely to be killed *with a gun* in the US than Canada, # and almost 100 times more likely than in the UK. That is pretty extreme. # # Countries represented: # In[15]: for country in data.index: print(country)