import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings(action='ignore')
import calendar
import datetime as dt
import plotly.io as plio
plio.templates
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from IPython.display import HTML,display
unemp = pd.read_csv('Unemployment in India.csv')
unemp
Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Area | |
---|---|---|---|---|---|---|---|
0 | Andhra Pradesh | 31-05-2019 | Monthly | 3.65 | 11999139.0 | 43.24 | Rural |
1 | Andhra Pradesh | 30-06-2019 | Monthly | 3.05 | 11755881.0 | 42.05 | Rural |
2 | Andhra Pradesh | 31-07-2019 | Monthly | 3.75 | 12086707.0 | 43.50 | Rural |
3 | Andhra Pradesh | 31-08-2019 | Monthly | 3.32 | 12285693.0 | 43.97 | Rural |
4 | Andhra Pradesh | 30-09-2019 | Monthly | 5.17 | 12256762.0 | 44.68 | Rural |
... | ... | ... | ... | ... | ... | ... | ... |
749 | West Bengal | 29-02-2020 | Monthly | 7.55 | 10871168.0 | 44.09 | Urban |
750 | West Bengal | 31-03-2020 | Monthly | 6.67 | 10806105.0 | 43.34 | Urban |
751 | West Bengal | 30-04-2020 | Monthly | 15.63 | 9299466.0 | 41.20 | Urban |
752 | West Bengal | 31-05-2020 | Monthly | 15.22 | 9240903.0 | 40.67 | Urban |
753 | West Bengal | 30-06-2020 | Monthly | 9.86 | 9088931.0 | 37.57 | Urban |
754 rows × 7 columns
unemp.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 754 entries, 0 to 753 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Region 740 non-null object 1 Date 740 non-null object 2 Frequency 740 non-null object 3 Estimated Unemployment Rate (%) 740 non-null float64 4 Estimated Employed 740 non-null float64 5 Estimated Labour Participation Rate (%) 740 non-null float64 6 Area 740 non-null object dtypes: float64(3), object(4) memory usage: 41.4+ KB
unemp.shape
(754, 7)
unemp.isna().sum()
Region 14 Date 14 Frequency 14 Estimated Unemployment Rate (%) 14 Estimated Employed 14 Estimated Labour Participation Rate (%) 14 Area 14 dtype: int64
unemp[unemp.isnull().any(axis=1)]
Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Area | |
---|---|---|---|---|---|---|---|
359 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
360 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
361 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
362 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
363 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
364 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
365 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
366 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
367 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
368 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
369 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
370 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
371 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
372 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
print(unemp[unemp.isnull().any(axis=1)].index.tolist())
[359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372]
unemp_non_null = unemp.iloc[:, :360].dropna()
unemp_non_null
Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Area | |
---|---|---|---|---|---|---|---|
0 | Andhra Pradesh | 31-05-2019 | Monthly | 3.65 | 11999139.0 | 43.24 | Rural |
1 | Andhra Pradesh | 30-06-2019 | Monthly | 3.05 | 11755881.0 | 42.05 | Rural |
2 | Andhra Pradesh | 31-07-2019 | Monthly | 3.75 | 12086707.0 | 43.50 | Rural |
3 | Andhra Pradesh | 31-08-2019 | Monthly | 3.32 | 12285693.0 | 43.97 | Rural |
4 | Andhra Pradesh | 30-09-2019 | Monthly | 5.17 | 12256762.0 | 44.68 | Rural |
... | ... | ... | ... | ... | ... | ... | ... |
749 | West Bengal | 29-02-2020 | Monthly | 7.55 | 10871168.0 | 44.09 | Urban |
750 | West Bengal | 31-03-2020 | Monthly | 6.67 | 10806105.0 | 43.34 | Urban |
751 | West Bengal | 30-04-2020 | Monthly | 15.63 | 9299466.0 | 41.20 | Urban |
752 | West Bengal | 31-05-2020 | Monthly | 15.22 | 9240903.0 | 40.67 | Urban |
753 | West Bengal | 30-06-2020 | Monthly | 9.86 | 9088931.0 | 37.57 | Urban |
740 rows × 7 columns
unemp_non_null.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 740 entries, 0 to 753 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Region 740 non-null object 1 Date 740 non-null object 2 Frequency 740 non-null object 3 Estimated Unemployment Rate (%) 740 non-null float64 4 Estimated Employed 740 non-null float64 5 Estimated Labour Participation Rate (%) 740 non-null float64 6 Area 740 non-null object dtypes: float64(3), object(4) memory usage: 46.2+ KB
unemp_non_null.shape
(740, 7)
unemp_non_null.isna().sum()
Region 0 Date 0 Frequency 0 Estimated Unemployment Rate (%) 0 Estimated Employed 0 Estimated Labour Participation Rate (%) 0 Area 0 dtype: int64
unemp_non_null.Date.min(),unemp_non_null.Date.max()
(' 29-02-2020', ' 31-12-2019')
unemp_non_null['Date'] = pd.to_datetime(unemp_non_null['Date'],dayfirst=True)
unemp_non_null['Frequency']= unemp_non_null['Frequency'].astype('category')
unemp_non_null['Month'] = unemp_non_null['Date'].dt.month
unemp_non_null['Month_num'] = unemp_non_null['Month'].apply(lambda x : int(x))
unemp_non_null['Month_name'] = unemp_non_null['Month_num'].apply(lambda x: calendar.month_abbr[x])
unemp_non_null['Region'] = unemp_non_null['Region'].astype('category')
unemp_non_null['Year'] = unemp_non_null['Date'].dt.year
unemp_non_null['Year_num'] = unemp_non_null['Year'].apply(lambda x : int(x))
unemp_non_null.drop(columns='Year', inplace=True)
##unemp_non_null.to_csv('unmeployment in India non null.csv', index=False)
unemp_non_null
Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Area | Month | Month_num | Month_name | Year_num | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Andhra Pradesh | 2019-05-31 | Monthly | 3.65 | 11999139.0 | 43.24 | Rural | 5 | 5 | May | 2019 |
1 | Andhra Pradesh | 2019-06-30 | Monthly | 3.05 | 11755881.0 | 42.05 | Rural | 6 | 6 | Jun | 2019 |
2 | Andhra Pradesh | 2019-07-31 | Monthly | 3.75 | 12086707.0 | 43.50 | Rural | 7 | 7 | Jul | 2019 |
3 | Andhra Pradesh | 2019-08-31 | Monthly | 3.32 | 12285693.0 | 43.97 | Rural | 8 | 8 | Aug | 2019 |
4 | Andhra Pradesh | 2019-09-30 | Monthly | 5.17 | 12256762.0 | 44.68 | Rural | 9 | 9 | Sep | 2019 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
749 | West Bengal | 2020-02-29 | Monthly | 7.55 | 10871168.0 | 44.09 | Urban | 2 | 2 | Feb | 2020 |
750 | West Bengal | 2020-03-31 | Monthly | 6.67 | 10806105.0 | 43.34 | Urban | 3 | 3 | Mar | 2020 |
751 | West Bengal | 2020-04-30 | Monthly | 15.63 | 9299466.0 | 41.20 | Urban | 4 | 4 | Apr | 2020 |
752 | West Bengal | 2020-05-31 | Monthly | 15.22 | 9240903.0 | 40.67 | Urban | 5 | 5 | May | 2020 |
753 | West Bengal | 2020-06-30 | Monthly | 9.86 | 9088931.0 | 37.57 | Urban | 6 | 6 | Jun | 2020 |
740 rows × 11 columns
Estimated Unemployment Rate (%): This represents the actual unemployment rate you want to calculate. It's the percentage of the labor force that is currently unemployed and seeking employment.
¶Estimated Employed: This is the number of people who are currently employed.
¶Estimated Labour Participation Rate (%): This represents the percentage of the working-age population that is either employed or actively seeking employment. It includes both employed and unemployed individuals.
¶round(unemp_non_null[['Estimated Unemployment Rate (%)',
'Estimated Employed', 'Estimated Labour Participation Rate (%)']].describe(),2)
Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | |
---|---|---|---|
count | 740.00 | 740.00 | 740.00 |
mean | 11.79 | 7204460.03 | 42.63 |
std | 10.72 | 8087988.43 | 8.11 |
min | 0.00 | 49420.00 | 13.33 |
25% | 4.66 | 1190404.50 | 38.06 |
50% | 8.35 | 4744178.50 | 41.16 |
75% | 15.89 | 11275489.50 | 45.50 |
max | 76.74 | 45777509.00 | 72.57 |
feature_vs_region = round(unemp_non_null.groupby(['Region'])[['Estimated Unemployment Rate (%)',
'Estimated Employed', 'Estimated Labour Participation Rate (%)']].mean().reset_index(),2)
feature_vs_region
#feature_vs_region.to_csv('feature_vs_region.csv', index=False)
Region | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | |
---|---|---|---|---|
0 | Andhra Pradesh | 7.48 | 8154093.18 | 39.38 |
1 | Assam | 6.43 | 5354772.15 | 44.87 |
2 | Bihar | 18.92 | 12366189.14 | 38.15 |
3 | Chandigarh | 15.99 | 316831.25 | 39.34 |
4 | Chhattisgarh | 9.24 | 4303498.57 | 42.81 |
5 | Delhi | 16.50 | 2627512.86 | 38.93 |
6 | Goa | 9.27 | 226308.33 | 39.25 |
7 | Gujarat | 6.66 | 11402012.79 | 46.10 |
8 | Haryana | 26.28 | 3557072.46 | 42.74 |
9 | Himachal Pradesh | 18.54 | 1059823.71 | 44.22 |
10 | Jammu & Kashmir | 16.19 | 1799931.67 | 41.03 |
11 | Jharkhand | 20.58 | 4469240.43 | 41.67 |
12 | Karnataka | 6.68 | 10667119.29 | 41.35 |
13 | Kerala | 10.12 | 4425899.50 | 34.87 |
14 | Madhya Pradesh | 7.41 | 11115484.32 | 38.82 |
15 | Maharashtra | 7.56 | 19990195.86 | 42.30 |
16 | Meghalaya | 4.80 | 689736.81 | 57.08 |
17 | Odisha | 5.66 | 6545746.96 | 38.93 |
18 | Puducherry | 10.22 | 212278.08 | 38.99 |
19 | Punjab | 12.03 | 4539362.00 | 41.14 |
20 | Rajasthan | 14.06 | 10041064.75 | 39.97 |
21 | Sikkim | 7.25 | 106880.71 | 46.07 |
22 | Tamil Nadu | 9.28 | 12269546.75 | 40.87 |
23 | Telangana | 7.74 | 7939662.75 | 53.00 |
24 | Tripura | 28.35 | 717002.64 | 61.82 |
25 | Uttar Pradesh | 12.55 | 28094832.18 | 39.43 |
26 | Uttarakhand | 6.58 | 1390228.11 | 33.78 |
27 | West Bengal | 8.12 | 17198538.00 | 45.42 |
fig = px.bar(feature_vs_region, x='Region', y='Estimated Unemployment Rate (%)',
title='Estimated Unemployment Rate (%) vs Region', template='plotly', color_discrete_sequence=['gold'])
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Unemployment Rate (%)')
fig.update_traces(text=feature_vs_region['Estimated Unemployment Rate (%)'], textposition='outside')
# Show the plot
fig.show()
fig = px.bar(feature_vs_region, x='Region', y='Estimated Employed',
title='Estimated Employed Count vs Region', template='plotly', color_discrete_sequence=['gold'])
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Employed Count')
fig.update_traces(text=feature_vs_region['Estimated Employed'])
fig.update_layout(height=1500, width=1000)
# Show the plot
fig.show()
fig = px.bar(feature_vs_region, x='Region', y='Estimated Labour Participation Rate (%)',
title='Estimated Labour Participation Rate (%) vs Region', template='plotly', color_discrete_sequence=['gold'])
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Labour Participation Rate (%)')
fig.update_traces(text=feature_vs_region['Estimated Labour Participation Rate (%)'], textposition='outside')
# Show the plot
fig.show()
feature_vs_area = round(unemp_non_null.groupby(['Area'])[['Estimated Unemployment Rate (%)',
'Estimated Employed', 'Estimated Labour Participation Rate (%)']].mean().reset_index(),2)
feature_vs_area
#feature_vs_area.to_excel('feature_vs_year.xlsx', index=False)
#feature_vs_area.to_csv('feature_vs_area.csv', index=False)
Area | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | |
---|---|---|---|---|
0 | Rural | 10.32 | 10192852.57 | 44.46 |
1 | Urban | 13.17 | 4388625.58 | 40.90 |
fig = px.bar(feature_vs_area, x='Area', y='Estimated Unemployment Rate (%)',
title='Estimated Unemployment Rate (%) vs Area', template='plotly')
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Unemployment Rate (%)')
fig.update_traces(text=feature_vs_area['Estimated Unemployment Rate (%)'], textposition='outside')
# Show the plot
fig.show()
fig = px.bar(feature_vs_area, x='Area', y='Estimated Employed',
title='Estimated Employed Count vs Area', template='plotly')
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Employed')
fig.update_traces(text=feature_vs_area['Estimated Employed'], textposition='outside')
# Show the plot
fig.show()
fig = px.bar(feature_vs_area, x='Area', y='Estimated Labour Participation Rate (%)',
title='Estimated Labour Participation Rate (%) vs Area', template='plotly')
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Labour Participation Rate (%)')
fig.update_traces(text=feature_vs_area['Estimated Labour Participation Rate (%)'], textposition='outside')
# Show the plot
fig.show()
feature_vs_year_month = round(unemp_non_null.groupby(['Year_num','Month_num', 'Month_name'])[['Estimated Unemployment Rate (%)',
'Estimated Employed', 'Estimated Labour Participation Rate (%)']].mean().reset_index().sort_values(by=['Year_num']),2)
feature_vs_year_month
Year_num | Month_num | Month_name | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | |
---|---|---|---|---|---|---|
0 | 2019 | 5 | May | 8.87 | 7410148.44 | 43.90 |
1 | 2019 | 6 | Jun | 9.30 | 7358641.57 | 43.75 |
2 | 2019 | 7 | Jul | 9.03 | 7404425.31 | 43.71 |
3 | 2019 | 8 | Aug | 9.64 | 7539815.19 | 43.65 |
4 | 2019 | 9 | Sep | 9.05 | 7739463.96 | 44.30 |
5 | 2019 | 10 | Oct | 9.90 | 7298382.40 | 44.00 |
6 | 2019 | 11 | Nov | 9.87 | 7273660.64 | 44.11 |
7 | 2019 | 12 | Dec | 9.50 | 7377387.83 | 43.67 |
8 | 2020 | 1 | Jan | 9.95 | 7677344.42 | 44.05 |
9 | 2020 | 2 | Feb | 9.96 | 7603996.28 | 43.72 |
10 | 2020 | 3 | Mar | 10.70 | 7516581.13 | 43.08 |
11 | 2020 | 4 | Apr | 23.64 | 5283319.90 | 35.14 |
12 | 2020 | 5 | May | 24.88 | 5879363.02 | 38.50 |
13 | 2020 | 6 | Jun | 11.90 | 7387008.66 | 40.55 |
feature_vs_year_month['Month_name'] = feature_vs_year_month['Month_name'].astype(str)
feature_vs_year_month['Year_num_Month'] = feature_vs_year_month['Year_num'].astype(str) + ' - ' + feature_vs_year_month['Month_name']
feature_vs_year_month = feature_vs_year_month.sort_values(['Year_num', 'Month_name'])
feature_vs_year_month
#feature_vs_year_month.to_csv('feature_vs_year_month.csv', index=False)
Year_num | Month_num | Month_name | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Year_num_Month | |
---|---|---|---|---|---|---|---|
3 | 2019 | 8 | Aug | 9.64 | 7539815.19 | 43.65 | 2019 - Aug |
7 | 2019 | 12 | Dec | 9.50 | 7377387.83 | 43.67 | 2019 - Dec |
2 | 2019 | 7 | Jul | 9.03 | 7404425.31 | 43.71 | 2019 - Jul |
1 | 2019 | 6 | Jun | 9.30 | 7358641.57 | 43.75 | 2019 - Jun |
0 | 2019 | 5 | May | 8.87 | 7410148.44 | 43.90 | 2019 - May |
6 | 2019 | 11 | Nov | 9.87 | 7273660.64 | 44.11 | 2019 - Nov |
5 | 2019 | 10 | Oct | 9.90 | 7298382.40 | 44.00 | 2019 - Oct |
4 | 2019 | 9 | Sep | 9.05 | 7739463.96 | 44.30 | 2019 - Sep |
11 | 2020 | 4 | Apr | 23.64 | 5283319.90 | 35.14 | 2020 - Apr |
9 | 2020 | 2 | Feb | 9.96 | 7603996.28 | 43.72 | 2020 - Feb |
8 | 2020 | 1 | Jan | 9.95 | 7677344.42 | 44.05 | 2020 - Jan |
13 | 2020 | 6 | Jun | 11.90 | 7387008.66 | 40.55 | 2020 - Jun |
10 | 2020 | 3 | Mar | 10.70 | 7516581.13 | 43.08 | 2020 - Mar |
12 | 2020 | 5 | May | 24.88 | 5879363.02 | 38.50 | 2020 - May |
# Create the bar plot
fig = px.bar(feature_vs_year_month, x='Year_num_Month', y='Estimated Unemployment Rate (%)',
title='Estimated Unemployment Rate (%) vs Year and Month', template='plotly', color_discrete_sequence=['red'])
# Set the labels for the x and y-axes
fig.update_xaxes(title_text='Year and Month')
fig.update_yaxes(title_text='Estimated Unemployment Rate (%)')
# Add values on the bars
fig.update_traces(text=feature_vs_year_month['Estimated Unemployment Rate (%)'], textposition='outside')
# Show the plot
fig.show()
# Create the bar plot
fig = px.bar(feature_vs_year_month, x='Year_num_Month', y='Estimated Employed',
title='Estimated Employed Count vs Year and Month', template='plotly', color_discrete_sequence=['red'])
# Set the labels for the x and y-axes
fig.update_xaxes(title_text='Year and Month')
fig.update_yaxes(title_text='Estimated Employed')
# Add values on the bars
fig.update_traces(text=feature_vs_year_month['Estimated Employed'], textposition='outside')
# Show the plot
fig.show()
# Create the bar plot
fig = px.bar(feature_vs_year_month, x='Year_num_Month', y='Estimated Labour Participation Rate (%)',
title='Estimated Labour Participation Rate (%) vs Year and Month', template='plotly', color_discrete_sequence=['red'])
# Set the labels for the x and y-axes
fig.update_xaxes(title_text='Year and Month')
fig.update_yaxes(title_text='Estimated Labour Participation Rate (%)')
# Add values on the bars
fig.update_traces(text=feature_vs_year_month['Estimated Labour Participation Rate (%)'], textposition='outside')
# Show the plot
fig.show()
feature_vs_year = round(unemp_non_null.groupby(['Year_num'])[['Estimated Unemployment Rate (%)',
'Estimated Employed', 'Estimated Labour Participation Rate (%)']].mean().reset_index(),2)
feature_vs_year
#feature_vs_year.to_csv('feature_vs_year.csv', index=False)
Year_num | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | |
---|---|---|---|---|
0 | 2019 | 9.4 | 7422976.47 | 43.89 |
1 | 2020 | 15.1 | 6901356.57 | 40.89 |
fig = px.bar(feature_vs_year, x='Year_num', y='Estimated Unemployment Rate (%)',
title='Estimated Unemployment Rate (%) vs Year', template='plotly', color_discrete_sequence=['aqua'])
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Unemployment Rate (%)')
fig.update_traces(text=feature_vs_year['Estimated Unemployment Rate (%)'], textposition='outside')
# Show the plot
fig.show()
fig = px.bar(feature_vs_year, x='Year_num', y='Estimated Employed',
title='Estimated Employed Count vs Year', template='plotly', color_discrete_sequence=['aqua'])
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Employed')
fig.update_traces(text=feature_vs_year['Estimated Employed'], textposition='outside')
# Show the plot
fig.show()
fig = px.bar(feature_vs_year, x='Year_num', y='Estimated Labour Participation Rate (%)',
title='Estimated Labour Participation Rate (%) vs Year', template='plotly', color_discrete_sequence=['aqua'])
# Set the labels for the y-axis
fig.update_yaxes(title_text='Estimated Labour Participation Rate (%)')
fig.update_traces(text=feature_vs_year['Estimated Labour Participation Rate (%)'], textposition='outside')
# Show the plot
fig.show()
unemp_non_null.corr()
Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Month | Month_num | Year_num | |
---|---|---|---|---|---|---|
Estimated Unemployment Rate (%) | 1.000000 | -0.222876 | 0.002558 | -0.122938 | -0.122938 | 0.262602 |
Estimated Employed | -0.222876 | 1.000000 | 0.011300 | 0.011285 | 0.011285 | -0.031841 |
Estimated Labour Participation Rate (%) | 0.002558 | 0.011300 | 1.000000 | 0.087257 | 0.087257 | -0.182460 |
Month | -0.122938 | 0.011285 | 0.087257 | 1.000000 | 1.000000 | -0.768484 |
Month_num | -0.122938 | 0.011285 | 0.087257 | 1.000000 | 1.000000 | -0.768484 |
Year_num | 0.262602 | -0.031841 | -0.182460 | -0.768484 | -0.768484 | 1.000000 |
sns.heatmap(unemp_non_null.corr(), annot=True)
<AxesSubplot:>
plt.figure(figsize=(20,6))
sns.pairplot(unemp_non_null)
plt.show()
<Figure size 2000x600 with 0 Axes>
plt.figure(figsize=(20,20))
fig = px.scatter_matrix(unemp_non_null,template='plotly',
dimensions=['Estimated Unemployment Rate (%)','Estimated Employed',
'Estimated Labour Participation Rate (%)'],
color='Region')
fig.show()
<Figure size 2000x2000 with 0 Axes>
plt.figure(figsize=(20,16))
fig = px.scatter_matrix(unemp_non_null,template='plotly',
dimensions=['Estimated Unemployment Rate (%)','Estimated Employed',
'Estimated Labour Participation Rate (%)'],
color='Area')
fig.show()
<Figure size 2000x1600 with 0 Axes>
plt.figure(figsize=(20,15))
fig = px.box(unemp_non_null,x='Region',y='Estimated Unemployment Rate (%)',color='Region',title='Estimated Unemployment rate(%) vs State',template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
<Figure size 2000x1500 with 0 Axes>
plt.figure(figsize=(20,15))
fig = px.box(unemp_non_null,x='Area',y='Estimated Unemployment Rate (%)',color='Area',title='Estimated Unemployment rate(%) vs Area',template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
<Figure size 2000x1500 with 0 Axes>
df_est_unemp_rate_vs_region = unemp_non_null[['Estimated Unemployment Rate (%)','Region']]
df_est_unemp_rate_vs_region = df_est_unemp_rate_vs_region.groupby('Region').mean().reset_index()
df_est_unemp_rate_vs_region = df_est_unemp_rate_vs_region.sort_values('Estimated Unemployment Rate (%)')
fig = px.bar(df_est_unemp_rate_vs_region, x='Region',y='Estimated Unemployment Rate (%)',color='Region',
title='Average Estimated Unemployment Rate(%) in each state',template='plotly', text='Estimated Unemployment Rate (%)', height=1000)
fig.show()
unemp_2020 = unemp_non_null[unemp_non_null['Year_num'] == 2020]
unemp_2019 = unemp_non_null[unemp_non_null['Year_num'] == 2019]
plt.figure(figsize=(20,30))
fig = px.bar(unemp_2020, x='Region',y='Estimated Unemployment Rate (%)',animation_frame = 'Month_name' ,color='Area',
title='Estimated Unemployment rate across Region(States) during year 2020', height=900,template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()
<Figure size 2000x3000 with 0 Axes>
plt.figure(figsize=(20,30))
fig = px.bar(unemp_2019, x='Region',y='Estimated Unemployment Rate (%)',animation_frame = 'Month_name' ,color='Area',
title='Estimated Unemployment rate across Region(States) during year 2019', height=700,template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()
<Figure size 2000x3000 with 0 Axes>
fig = px.sunburst(unemp_non_null.groupby(['Region'])['Estimated Unemployment Rate (%)'].mean().reset_index(),
path=['Region'], values='Estimated Unemployment Rate (%)',
color_continuous_scale='Plasma', title='Estimated Unemployment Rate (%) by Region(State)',
height=950, template='ggplot2',custom_data=['Estimated Unemployment Rate (%)'])
fig.update_traces(textinfo='label+value')
fig.show()
From the pieplot, avg. unemployment rate(%) bar plot and box plots we can infer the following:-
¶The top 5 regions(states) in India having the highest unemployement rate (%) during COVID-19 lockdown are:
¶1. Tripura = 28.35%
¶2. Haryana = 26.28%
¶3. Jharkhand = 20.59%
¶4. Bihar = 18.92%
¶5. Himachal Pradesh = 18.54%
¶plt.figure(figsize=(20,15))
fig = px.box(unemp_non_null,x='Region',y='Estimated Employed',color='Region',title='Estimated Employed Count vs State',template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
<Figure size 2000x1500 with 0 Axes>
plt.figure(figsize=(20,15))
fig = px.box(unemp_non_null,x='Area',y='Estimated Employed',color='Area',title='Estimated Employed Count vs Area',template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
<Figure size 2000x1500 with 0 Axes>
df_est_emp_vs_region = unemp_non_null[['Estimated Employed','Region']]
df_est_emp_vs_region = df_est_emp_vs_region.groupby('Region').mean().reset_index()
df_est_emp_vs_region = df_est_emp_vs_region.sort_values('Estimated Employed')
fig = px.bar(df_est_emp_vs_region, x='Region',y='Estimated Employed',color='Region',
title='Average Estimated Employed in each state',template='plotly', text='Estimated Employed', height=1000)
fig.show()
plt.figure(figsize=(20,25))
fig = px.bar(unemp_2020, x='Region',y='Estimated Employed',animation_frame = 'Month_name' ,color='Area',
title='Estimated Employed count across Region(States) in India in 2020', height=700,template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()
<Figure size 2000x2500 with 0 Axes>
plt.figure(figsize=(20,25))
fig = px.bar(unemp_2019, x='Region',y='Estimated Employed',animation_frame = 'Month_name' ,color='Area',
title='Estimated Employed count across Region(States) of India in 2019', height=700,template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()
<Figure size 2000x2500 with 0 Axes>
fig = px.sunburst(unemp_non_null.groupby(['Region'])['Estimated Employed'].mean().reset_index(),
path=['Region'], values='Estimated Employed',
color_continuous_scale='Plasma', title='Estimated Employed Count by Region(State)',
height=1050, template='ggplot2',custom_data=['Estimated Employed'])
fig.update_traces(textinfo='label+value')
fig.show()
From the pieplot, avg. employed count bar plot and box plots we can infer the following:-
¶The top 5 regions(states) in India having the highest employed count during COVID-19 lockdown are:
¶1. Uttar Pradesh = 28.09 Million
¶2. Maharashtra = 19.99 Million
¶3. West Bengal = 17.19 Million
¶4. Bihar = 12.37 Million
¶5. Tamil Nadu = 12.27 Million
¶plt.figure(figsize=(20,15))
fig = px.box(unemp_non_null,x='Region',y='Estimated Labour Participation Rate (%)',color='Region',title='Estimated Labour Participation Rate (%) vs State',template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
<Figure size 2000x1500 with 0 Axes>
plt.figure(figsize=(20,15))
fig = px.box(unemp_non_null,x='Area',y='Estimated Labour Participation Rate (%)',color='Area',title='Estimated Labour Participation rate(%) vs Area',template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
<Figure size 2000x1500 with 0 Axes>
df_est_lab_par_rate_vs_region = unemp_non_null[['Estimated Labour Participation Rate (%)','Region']]
df_est_lab_par_rate_vs_region = df_est_lab_par_rate_vs_region.groupby('Region').mean().reset_index()
df_est_lab_par_rate_vs_region = df_est_lab_par_rate_vs_region.sort_values('Estimated Labour Participation Rate (%)')
fig = px.bar(df_est_lab_par_rate_vs_region, x='Region',y='Estimated Labour Participation Rate (%)',color='Region',
title='Average Estimated Labour Participation Rate (%) in each state',template='plotly', text='Estimated Labour Participation Rate (%)', height=1000)
fig.show()
plt.figure(figsize=(20,30))
fig = px.bar(unemp_2020, x='Region',y='Estimated Labour Participation Rate (%)',animation_frame = 'Month_name' ,color='Area',
title='Estimated Labour Participation Rate (%) across Region(States) during year 2020', height=900,template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()
<Figure size 2000x3000 with 0 Axes>
plt.figure(figsize=(20,30))
fig = px.bar(unemp_2019, x='Region',y='Estimated Labour Participation Rate (%)',animation_frame = 'Month_name' ,color='Area',
title='Estimated Labour Participation Rate (%) across Region(States) during year 2019', height=700,template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()
<Figure size 2000x3000 with 0 Axes>
fig = px.sunburst(unemp_non_null.groupby(['Region'])['Estimated Labour Participation Rate (%)'].mean().reset_index(),
path=['Region'], values='Estimated Labour Participation Rate (%)',
color_continuous_scale='Plasma', title='Estimated Labour Participation Rate (%) by Region(State)',
height=950, template='ggplot2',custom_data=['Estimated Labour Participation Rate (%)'])
fig.update_traces(textinfo='label+value')
fig.show()
From the pieplot, avg. labour participation rate(%) bar plot and box plots we can infer the following:-
¶The top 5 regions(states) in India having the highest labour participation rate (%) during COVID-19 lockdown are:
¶1. Tripura = 61.82%
¶2. Meghalaya = 57.08%
¶3. Telangana = 53.00%
¶4. Gujarat = 46.10%
¶5. Sikkim = 46.07%
¶