#!/usr/bin/env python # coding: utf-8 # # My First Data Science Project # ## Helicopter Escapes # We begin by importing some helper functions. # In[4]: from helper import * # ## Get the Data # Now, let's get the data from the [List of helicopter prison escapes](https://en.wikipedia.org/wiki/List_of_helicopter_prison_escapes) Wikipedia article. # In[5]: url = 'https://en.wikipedia.org/wiki/List_of_helicopter_prison_escapes' data = data_from_url(url) # Let's print the first three rows # In[6]: for item in data[0:3]: print(item) # Next, let's remove the column with the long text that provides the background information on the prison breaks. # In[7]: index = 0 for row in data: data[index] = row[:-1] index += 1 print(data[0:3]) # Using a provided function, change the date so that we have only the year: # In[8]: for row in data: row[0] = fetch_year(row[0]) print(data[0:3]) # Next, we are going to create a table that contains the year and a holder position for the number of attempts in that year. # In[9]: min_year = min(data, key=lambda x: x[0])[0] max_year = max(data, key=lambda x: x[0])[0] years = [] for y in range(min_year, max_year + 1): years.append(y) attempts_per_year = [] for year in years: attempts_per_year.append([year,0]) print(attempts_per_year) # Next, we will populate the attempts per year element of the list. # In[10]: # Instruction 1 - for each row in data for row in data: for ya in attempts_per_year: # Instruction 2 - nothing to do here # Instruction 3 - assign the year value in ya to y y = ya[0] if row[0] == y: ya[1] += 1 # Instruction 4 - print the results print(attempts_per_year) # Using provided code, create a bar graph showing the number of attempts per year to better visualize the data. # In[11]: get_ipython().run_line_magic('matplotlib', 'inline') barplot(attempts_per_year) # # The most attempts at prison breakouts with a helicopter were 1986, 2001, 2007, and 2009. # # Next, we will create a frequency table showing the number of prison break attempts by helicopter by country. # In[12]: countries_frequency = df["Country"].value_counts() # In[13]: print_pretty_table(countries_frequency) # Next, let's try to answer the question: # In which countries do helicopter prison breaks have a higher chance of success? # In[14]: # First, get a list of the countries countries = [] for row in data: country = row[2] if country not in countries: countries.append(country) print(countries) # In[15]: # Next, add empty fields for number of attempts and number # of successes countries_success = [] for row in countries: countries_success.append([row,0,0]) print(countries_success) # In[16]: # Next, populate number of attempts and number of successes for row in data: for item in countries_success: country = (item[0]) if row[2] == country: country_count = item[1] country_count += 1 item[1] = country_count if row[3] == "Yes": count_success = item[2] count_success += 1 item[2] = count_success print(countries_success) # From looking at the list of lists above, we can see that some countries have a 100% success rate, but these countries also have a low number of attempts. Without the ability to make graphs, a statement for each country might be the most effective way to communicate the results: # In[20]: for country in countries_success: print("In " + country[0] + ", there were " + str(country[1]) + " attempts. Of these, there was " + str(country[2]) + " success(es). The success rate is " + str((country[2]/country[1]) *100)+"%\n") # In[ ]: