#!/usr/bin/env python
# coding: utf-8

# # Personality Disorder Prevalence
# 
# Trying to work out personality disorder prevalence.

# ## Ethnicity Data
# 
# Ethnicity facts and figures report:
# https://www.ethnicity-facts-figures.service.gov.uk/health/mental-health/prevalence-of-personality-disorder-in-adults/latest
# Data: https://www.ethnicity-facts-figures.service.gov.uk/health/mental-health/prevalence-of-personality-disorder-in-adults/latest/downloads/apms-screen-positive-for-personality-disorder.csv

# In[1]:


import pandas as pd

eth_prev_url = "https://www.ethnicity-facts-figures.service.gov.uk/health/mental-health/prevalence-of-personality-disorder-in-adults/latest/downloads/apms-screen-positive-for-personality-disorder.csv"

eth_prev_df = pd.read_csv(eth_prev_url)
eth_prev_df.head()


# From prevalance / ethnicity, ethnic groups and sex are:

# In[2]:


eth_prev_df["Ethnicity"].unique()


# In[3]:


eth_prev_df["Sex"].unique()


# ## Population Data
# 
# For population data (ethnicity) data, the Nomis website lets you pull data from Census 2011.
# 
# https://www.nomisweb.co.uk/census/2011/data_finder

# ![image.png](attachment:image.png)

# Ethnic group by sex:
# https://www.nomisweb.co.uk/census/2011/LC2101EW/view/1946157281?rows=c_ethpuk11&cols=c_sex 

# In[4]:


eth_sex_url = "https://www.nomisweb.co.uk/api/v01/dataset/nm_801_1.data.xlsx?time=latest&geography=1946157281&c_age=0&measures=20100&c_ethpuk11=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23&c_sex=0,1,2&rows=c_ethpuk11&cols=c_sex&select=noflags"
eth_sex_df = pd.read_excel(eth_sex_url, sheet_name="Sheet 1", skiprows=10).rename(columns={"Sex":"Ethnicity"}).set_index("Ethnicity").dropna(how="all")
eth_sex_df


# *It also looks like new 2021 census data is out at the end of the month: The 2021 Census results are starting to come out; ethnicity by end of month? https://census.gov.uk/census-2021-results/phase-one-topic-summaries/ethnic-group-national-identity-language-and-religion*

# Prevalence stats - pop group mapping:
# - White - British : English/Welsh/Scottish/Northern Irish/British
# - White - Other : (White: Total - English/Welsh/Scottish/Northern Irish/British)
# - Black : Black/African/Caribbean/Black British: Total	
# - Asian : Asian/Asian British: Total
# - Mixed/Other: (Mixed/multiple ethnic group: Total + Other ethnic group: Total)

# In[5]:


#Check:
#131099 + (134545-131099) +303+1514+(1709+194)
eth_sex_tot_df = eth_sex_df[eth_sex_df.index.str.contains("Total")].copy()
display(eth_sex_tot_df)

# Check the totals to ensure our sum is correct
eth_sex_tot_df.sum()


# In[6]:


eth_sex_tot_df.loc["White - British"] = eth_sex_df.loc['English/Welsh/Scottish/Northern Irish/British']
eth_sex_tot_df.loc["White - Other"] =  eth_sex_df.loc['White: Total'] - eth_sex_df.loc['English/Welsh/Scottish/Northern Irish/British']
eth_sex_tot_df.loc["Mixed/Other"] =  eth_sex_df.loc['Mixed/multiple ethnic group: Total'] + eth_sex_df.loc['Other ethnic group: Total']
eth_sex_tot_df = eth_sex_tot_df.drop(index=["White: Total", 'Mixed/multiple ethnic group: Total', 'Other ethnic group: Total'])

eth_sex_tot_df.rename(index={'Black/African/Caribbean/Black British: Total': 'Black',
                            'Asian/Asian British: Total':'Asian'}, inplace=True)
eth_sex_tot_df.rename(columns={"All persons":"All", "Males":"Men", "Females":"Women"}, inplace=True)
eth_sex_tot_df


# In[7]:


#The rates are percentages (Value Type column)
eth_prev_df_wide = eth_prev_df[["Ethnicity", "Sex", "Value"]].pivot(index='Ethnicity', columns='Sex', values='Value')
eth_prev_df_wide = eth_prev_df_wide/100
eth_prev_df_wide


# In[8]:


for c in eth_prev_df_wide.columns:
    display(eth_prev_df_wide[c]*eth_sex_tot_df[c])


# In[9]:


eth_prev_df_wide["All"]*eth_sex_tot_df["All"]