# LIBS used during the project
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.express as px
import numpy as np
# Imported Libs to work with Dates
from dateutil.relativedelta import relativedelta
from dateutil import parser
from dateutil.parser import parse
from datetime import date, time, datetime, timedelta
# Import from Libs for correlation
import seaborn as sn
import scipy
import statsmodels
# get the response in the form of html
wikiurl="https://en.wikipedia.org/wiki/List_of_helicopter_prison_escapes"
table_class="wikitable sortable"
response=requests.get(wikiurl)
print(response.status_code)
200
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response.text, 'html.parser')
helicopter_escape=soup.find('table',{'class':"wikitable"})
# returns a list
data=pd.read_html(str(helicopter_escape))
data_df=pd.DataFrame(data[0])
data_df.head(5)
Date | Prison name | Country | Succeeded | Escapee(s) | Details | |
---|---|---|---|---|---|---|
0 | August 19, 1971 | Santa Martha Acatitla | Mexico | Yes | Joel David Kaplan Carlos Antonio Contreras Castro | Joel David Kaplan was a New York businessman w... |
1 | October 31, 1973 | Mountjoy Jail | Ireland | Yes | JB O'Hagan Seamus TwomeyKevin Mallon | On October 31, 1973 an IRA member hijacked a h... |
2 | May 24, 1978 | United States Penitentiary, Marion | United States | No | Garrett Brock TrapnellMartin Joseph McNallyJam... | 43-year-old Barbara Ann Oswald hijacked a Sain... |
3 | February 27, 1981 | Fleury-Mérogis, Essonne, Ile de France | France | Yes | Gérard DupréDaniel Beaumont | With the help of Serge Coutel, Gérard Dupré an... |
4 | May 7, 1981 | Orsainville Prison, Quebec City | Canada | No | Marina Paquet (hijacker)Giles Arseneault (pris... | Marina Paquet held a sawed off shotgun against... |
data_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 48 entries, 0 to 47 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 48 non-null object 1 Prison name 48 non-null object 2 Country 48 non-null object 3 Succeeded 48 non-null object 4 Escapee(s) 48 non-null object 5 Details 48 non-null object dtypes: object(6) memory usage: 2.4+ KB
# columns filter
data_df[['Date', 'Succeeded']].head(5)
Date | Succeeded | |
---|---|---|
0 | August 19, 1971 | Yes |
1 | October 31, 1973 | Yes |
2 | May 24, 1978 | No |
3 | February 27, 1981 | Yes |
4 | May 7, 1981 | No |
# Stored the columns needed for answer 1 in a variable (select_1)
select_1 = data_df[['Date', 'Succeeded']]
# Tipos de Dados
select_1.dtypes
Date object Succeeded object dtype: object
select_1.info(5)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 48 entries, 0 to 47 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 48 non-null object 1 Succeeded 48 non-null object dtypes: object(2) memory usage: 896.0+ bytes
# Select from column Date
select_1['Date'].head(5)
0 August 19, 1971 1 October 31, 1973 2 May 24, 1978 3 February 27, 1981 4 May 7, 1981 Name: Date, dtype: object
# Turns the Date column into a DataFrame
column_date_df = pd.DataFrame(select_1['Date'])
column_date_df.head(5)
Date | |
---|---|
0 | August 19, 1971 |
1 | October 31, 1973 |
2 | May 24, 1978 |
3 | February 27, 1981 |
4 | May 7, 1981 |
column_date_df.dtypes
Date object dtype: object
# Used the pandas function "apply", which applies functions to all rows or columns using axis.
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.apply.html
column_date_df['Date'].apply(parse).head(5)
0 1971-08-19 1 1973-10-31 2 1978-05-24 3 1981-02-27 4 1981-05-07 Name: Date, dtype: datetime64[ns]
# Assigning View result to a new column in DataFrame
column_date_df['Date_treated'] = column_date_df['Date'].apply(parse)
column_date_df.head(5)
Date | Date_treated | |
---|---|---|
0 | August 19, 1971 | 1971-08-19 |
1 | October 31, 1973 | 1973-10-31 |
2 | May 24, 1978 | 1978-05-24 |
3 | February 27, 1981 | 1981-02-27 |
4 | May 7, 1981 | 1981-05-07 |
# Function dt.year used to extract only the year from the column "Data_treated"
column_date_df['Year'] = column_date_df['Date_treated'].dt.year
column_date_df.head(5)
Date | Date_treated | Year | |
---|---|---|---|
0 | August 19, 1971 | 1971-08-19 | 1971 |
1 | October 31, 1973 | 1973-10-31 | 1973 |
2 | May 24, 1978 | 1978-05-24 | 1978 |
3 | February 27, 1981 | 1981-02-27 | 1981 |
4 | May 7, 1981 | 1981-05-07 | 1981 |
column_date_df.groupby('Year').count().columns
Index(['Date', 'Date_treated'], dtype='object')
# Answer to the first question, the years with the most helic escape attempts were "2009, 2001, 1996, 2007"
df_count = column_date_df.groupby('Year').count().reset_index()[['Year','Date']].sort_values('Date', ascending=False)
df_count.columns = ['Year', 'Count']
df_count = df_count.reset_index(drop=True)
df_count
Year | Count | |
---|---|---|
0 | 2009 | 3 |
1 | 2001 | 3 |
2 | 1986 | 3 |
3 | 2007 | 3 |
4 | 2002 | 2 |
5 | 1981 | 2 |
6 | 2000 | 2 |
7 | 1985 | 2 |
8 | 2005 | 2 |
9 | 2013 | 2 |
10 | 1989 | 2 |
11 | 1992 | 2 |
12 | 2006 | 1 |
13 | 1971 | 1 |
14 | 2010 | 1 |
15 | 2003 | 1 |
16 | 2014 | 1 |
17 | 2016 | 1 |
18 | 2018 | 1 |
19 | 2012 | 1 |
20 | 1999 | 1 |
21 | 1973 | 1 |
22 | 1997 | 1 |
23 | 1996 | 1 |
24 | 1993 | 1 |
25 | 1991 | 1 |
26 | 1990 | 1 |
27 | 1988 | 1 |
28 | 1987 | 1 |
29 | 1983 | 1 |
30 | 1978 | 1 |
31 | 2020 | 1 |
dados=pd.read_html(str(helicopter_escape))
# It was possible to verify that the countries with the most attempts to escape by helicopter were France (15 attempts),
# United States (8 attempts) followed by Belgium, Canada and Greece all with 4 attempts.
data_2 = pd.DataFrame(data[0])
data_2 = data_2[['Country', 'Succeeded']].groupby('Country').count()[['Succeeded']].sort_values('Succeeded', ascending=False).reset_index()
data_2
Country | Succeeded | |
---|---|---|
0 | France | 15 |
1 | United States | 8 |
2 | Belgium | 4 |
3 | Canada | 4 |
4 | Greece | 4 |
5 | Australia | 2 |
6 | Brazil | 2 |
7 | United Kingdom | 2 |
8 | Chile | 1 |
9 | Ireland | 1 |
10 | Italy | 1 |
11 | Mexico | 1 |
12 | Netherlands | 1 |
13 | Puerto Rico | 1 |
14 | Russia | 1 |
df_count['Year']=df_count['Year'].astype(str)
fig = px.bar(df_count, x='Year', y='Count', title='Escape attempts per year',
labels={"Year":"Year", "Count":"Escape attempts", "Count":"Escape attempts"})
fig.show()
fig = px.bar(data_2, x='Country', y='Succeeded', title='Countries with the most escape attempts',
labels={"Country":"Country", "Succeeded":"Escape attempts", "Country":"Country"})
fig.show()
data=pd.read_html(str(helicopter_escape))
data_c = pd.DataFrame(dados[0])
data_c.head(5)
Date | Prison name | Country | Succeeded | Escapee(s) | Details | |
---|---|---|---|---|---|---|
0 | August 19, 1971 | Santa Martha Acatitla | Mexico | Yes | Joel David Kaplan Carlos Antonio Contreras Castro | Joel David Kaplan was a New York businessman w... |
1 | October 31, 1973 | Mountjoy Jail | Ireland | Yes | JB O'Hagan Seamus TwomeyKevin Mallon | On October 31, 1973 an IRA member hijacked a h... |
2 | May 24, 1978 | United States Penitentiary, Marion | United States | No | Garrett Brock TrapnellMartin Joseph McNallyJam... | 43-year-old Barbara Ann Oswald hijacked a Sain... |
3 | February 27, 1981 | Fleury-Mérogis, Essonne, Ile de France | France | Yes | Gérard DupréDaniel Beaumont | With the help of Serge Coutel, Gérard Dupré an... |
4 | May 7, 1981 | Orsainville Prison, Quebec City | Canada | No | Marina Paquet (hijacker)Giles Arseneault (pris... | Marina Paquet held a sawed off shotgun against... |
select = data_c[['Country','Succeeded']]
escapes_suc = select[select['Succeeded'] == 'Yes'].groupby('Country').count().sort_values('Succeeded', ascending=False).reset_index()
escapes_fai = select[select['Succeeded'] == 'No'].groupby('Country').count().sort_values('Succeeded', ascending=False).reset_index()
## Separação da coluna "Succeeded" = total de tentativas em duas colunas "Sucesso" e "Falha"
escapes_merge = escapes_suc.merge(escapes_fai, left_on='Country', right_on='Country', how='outer')
escapes_merge.columns=['Country', 'Succeeded', 'Failure']
escapes_merge
Country | Succeeded | Failure | |
---|---|---|---|
0 | France | 11.0 | 4.0 |
1 | United States | 6.0 | 2.0 |
2 | Canada | 3.0 | 1.0 |
3 | Belgium | 2.0 | 2.0 |
4 | Brazil | 2.0 | NaN |
5 | Greece | 2.0 | 2.0 |
6 | Australia | 1.0 | 1.0 |
7 | Chile | 1.0 | NaN |
8 | Ireland | 1.0 | NaN |
9 | Italy | 1.0 | NaN |
10 | Mexico | 1.0 | NaN |
11 | Puerto Rico | 1.0 | NaN |
12 | Russia | 1.0 | NaN |
13 | United Kingdom | 1.0 | 1.0 |
14 | Netherlands | NaN | 1.0 |
escapes_merge['Succeeded'] = escapes_merge['Succeeded'].fillna(0)
escapes_merge
Country | Succeeded | Failure | |
---|---|---|---|
0 | France | 11.0 | 4.0 |
1 | United States | 6.0 | 2.0 |
2 | Canada | 3.0 | 1.0 |
3 | Belgium | 2.0 | 2.0 |
4 | Brazil | 2.0 | NaN |
5 | Greece | 2.0 | 2.0 |
6 | Australia | 1.0 | 1.0 |
7 | Chile | 1.0 | NaN |
8 | Ireland | 1.0 | NaN |
9 | Italy | 1.0 | NaN |
10 | Mexico | 1.0 | NaN |
11 | Puerto Rico | 1.0 | NaN |
12 | Russia | 1.0 | NaN |
13 | United Kingdom | 1.0 | 1.0 |
14 | Netherlands | 0.0 | 1.0 |
escapes_merge['Failure'] = escapes_merge['Failure'].fillna(0)
escapes_merge
Country | Succeeded | Failure | |
---|---|---|---|
0 | France | 11.0 | 4.0 |
1 | United States | 6.0 | 2.0 |
2 | Canada | 3.0 | 1.0 |
3 | Belgium | 2.0 | 2.0 |
4 | Brazil | 2.0 | 0.0 |
5 | Greece | 2.0 | 2.0 |
6 | Australia | 1.0 | 1.0 |
7 | Chile | 1.0 | 0.0 |
8 | Ireland | 1.0 | 0.0 |
9 | Italy | 1.0 | 0.0 |
10 | Mexico | 1.0 | 0.0 |
11 | Puerto Rico | 1.0 | 0.0 |
12 | Russia | 1.0 | 0.0 |
13 | United Kingdom | 1.0 | 1.0 |
14 | Netherlands | 0.0 | 1.0 |
escapes_merge['Total_attempts'] = escapes_merge['Failure']+escapes_merge['Succeeded']
escapes_merge
Country | Succeeded | Failure | Total_attempts | |
---|---|---|---|---|
0 | France | 11.0 | 4.0 | 15.0 |
1 | United States | 6.0 | 2.0 | 8.0 |
2 | Canada | 3.0 | 1.0 | 4.0 |
3 | Belgium | 2.0 | 2.0 | 4.0 |
4 | Brazil | 2.0 | 0.0 | 2.0 |
5 | Greece | 2.0 | 2.0 | 4.0 |
6 | Australia | 1.0 | 1.0 | 2.0 |
7 | Chile | 1.0 | 0.0 | 1.0 |
8 | Ireland | 1.0 | 0.0 | 1.0 |
9 | Italy | 1.0 | 0.0 | 1.0 |
10 | Mexico | 1.0 | 0.0 | 1.0 |
11 | Puerto Rico | 1.0 | 0.0 | 1.0 |
12 | Russia | 1.0 | 0.0 | 1.0 |
13 | United Kingdom | 1.0 | 1.0 | 2.0 |
14 | Netherlands | 0.0 | 1.0 | 1.0 |
escapes_merge['Chance_suc'] = (escapes_merge['Succeeded']/escapes_merge['Total_attempts'])*100
escapes_merge
Country | Succeeded | Failure | Total_attempts | Chance_suc | |
---|---|---|---|---|---|
0 | France | 11.0 | 4.0 | 15.0 | 73.333333 |
1 | United States | 6.0 | 2.0 | 8.0 | 75.000000 |
2 | Canada | 3.0 | 1.0 | 4.0 | 75.000000 |
3 | Belgium | 2.0 | 2.0 | 4.0 | 50.000000 |
4 | Brazil | 2.0 | 0.0 | 2.0 | 100.000000 |
5 | Greece | 2.0 | 2.0 | 4.0 | 50.000000 |
6 | Australia | 1.0 | 1.0 | 2.0 | 50.000000 |
7 | Chile | 1.0 | 0.0 | 1.0 | 100.000000 |
8 | Ireland | 1.0 | 0.0 | 1.0 | 100.000000 |
9 | Italy | 1.0 | 0.0 | 1.0 | 100.000000 |
10 | Mexico | 1.0 | 0.0 | 1.0 | 100.000000 |
11 | Puerto Rico | 1.0 | 0.0 | 1.0 | 100.000000 |
12 | Russia | 1.0 | 0.0 | 1.0 | 100.000000 |
13 | United Kingdom | 1.0 | 1.0 | 2.0 | 50.000000 |
14 | Netherlands | 0.0 | 1.0 | 1.0 | 0.000000 |
## The countries where prison escapes by helicopter are most likely to succeed are: Brazil,
## Chile, Ireland, Italy, Mexico, Puerto Rico and Russia
escapes_merge['Chance_fai'] = (escapes_merge['Failure']/escapes_merge['Total_attempts'])*100
escapes_merge = escapes_merge.sort_values(['Chance_suc', 'Succeeded'], ascending=False)
escapes_merge
Country | Succeeded | Failure | Total_attempts | Chance_suc | Chance_fai | |
---|---|---|---|---|---|---|
4 | Brazil | 2.0 | 0.0 | 2.0 | 100.000000 | 0.000000 |
7 | Chile | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
8 | Ireland | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
9 | Italy | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
10 | Mexico | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
11 | Puerto Rico | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
12 | Russia | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
1 | United States | 6.0 | 2.0 | 8.0 | 75.000000 | 25.000000 |
2 | Canada | 3.0 | 1.0 | 4.0 | 75.000000 | 25.000000 |
0 | France | 11.0 | 4.0 | 15.0 | 73.333333 | 26.666667 |
3 | Belgium | 2.0 | 2.0 | 4.0 | 50.000000 | 50.000000 |
5 | Greece | 2.0 | 2.0 | 4.0 | 50.000000 | 50.000000 |
6 | Australia | 1.0 | 1.0 | 2.0 | 50.000000 | 50.000000 |
13 | United Kingdom | 1.0 | 1.0 | 2.0 | 50.000000 | 50.000000 |
14 | Netherlands | 0.0 | 1.0 | 1.0 | 0.000000 | 100.000000 |
fig = px.histogram(escapes_merge, x='Country', y='Chance_suc', title = 'Chance of success',
labels={'Country':'Country', 'Chance_suc':'Chance of success', 'Chance_suc':'Chance of success'})
fig.show()
# Percentage of failure in the project
Negative = []
for i in escapes_merge['Chance_fai']:
if i > 0:
Negative.append(-i)
else:
Negative.append(i)
Negative
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -25.0, -25.0, -26.666666666666668, -50.0, -50.0, -50.0, -50.0, -100.0]
escapes_merge
Country | Succeeded | Failure | Total_attempts | Chance_suc | Chance_fai | |
---|---|---|---|---|---|---|
4 | Brazil | 2.0 | 0.0 | 2.0 | 100.000000 | 0.000000 |
7 | Chile | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
8 | Ireland | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
9 | Italy | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
10 | Mexico | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
11 | Puerto Rico | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
12 | Russia | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
1 | United States | 6.0 | 2.0 | 8.0 | 75.000000 | 25.000000 |
2 | Canada | 3.0 | 1.0 | 4.0 | 75.000000 | 25.000000 |
0 | France | 11.0 | 4.0 | 15.0 | 73.333333 | 26.666667 |
3 | Belgium | 2.0 | 2.0 | 4.0 | 50.000000 | 50.000000 |
5 | Greece | 2.0 | 2.0 | 4.0 | 50.000000 | 50.000000 |
6 | Australia | 1.0 | 1.0 | 2.0 | 50.000000 | 50.000000 |
13 | United Kingdom | 1.0 | 1.0 | 2.0 | 50.000000 | 50.000000 |
14 | Netherlands | 0.0 | 1.0 | 1.0 | 0.000000 | 100.000000 |
escapes_merge['Chance_fai']
4 0.000000 7 0.000000 8 0.000000 9 0.000000 10 0.000000 11 0.000000 12 0.000000 1 25.000000 2 25.000000 0 26.666667 3 50.000000 5 50.000000 6 50.000000 13 50.000000 14 100.000000 Name: Chance_fai, dtype: float64
import plotly.graph_objects as go
years = escapes_merge['Country']
fig = go.Figure(
layout=dict(title=dict(text=" Chance of success/failure"))
)
fig.add_trace(go.Bar(x=years, y=escapes_merge['Chance_fai'],
base=Negative,
marker_color='crimson',
name='Chance of failure'))
fig.add_trace(go.Bar(x=years, y=escapes_merge['Chance_suc'],
base=0,
marker_color='lightslategrey',
name='Chance of success'
))
fig.update_traces(texttemplate='%{y:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()