#!/usr/bin/env python # coding: utf-8 # # Personality Disorder Prevalence # # Trying to work out personality disorder prevalence. # ## Ethnicity Data # # Ethnicity facts and figures report: # https://www.ethnicity-facts-figures.service.gov.uk/health/mental-health/prevalence-of-personality-disorder-in-adults/latest # Data: https://www.ethnicity-facts-figures.service.gov.uk/health/mental-health/prevalence-of-personality-disorder-in-adults/latest/downloads/apms-screen-positive-for-personality-disorder.csv # In[1]: import pandas as pd eth_prev_url = "https://www.ethnicity-facts-figures.service.gov.uk/health/mental-health/prevalence-of-personality-disorder-in-adults/latest/downloads/apms-screen-positive-for-personality-disorder.csv" eth_prev_df = pd.read_csv(eth_prev_url) eth_prev_df.head() # From prevalance / ethnicity, ethnic groups and sex are: # In[2]: eth_prev_df["Ethnicity"].unique() # In[3]: eth_prev_df["Sex"].unique() # ## Population Data # # For population data (ethnicity) data, the Nomis website lets you pull data from Census 2011. # # https://www.nomisweb.co.uk/census/2011/data_finder # ![image.png](attachment:image.png) # Ethnic group by sex: # https://www.nomisweb.co.uk/census/2011/LC2101EW/view/1946157281?rows=c_ethpuk11&cols=c_sex # In[4]: eth_sex_url = "https://www.nomisweb.co.uk/api/v01/dataset/nm_801_1.data.xlsx?time=latest&geography=1946157281&c_age=0&measures=20100&c_ethpuk11=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23&c_sex=0,1,2&rows=c_ethpuk11&cols=c_sex&select=noflags" eth_sex_df = pd.read_excel(eth_sex_url, sheet_name="Sheet 1", skiprows=10).rename(columns={"Sex":"Ethnicity"}).set_index("Ethnicity").dropna(how="all") eth_sex_df # *It also looks like new 2021 census data is out at the end of the month: The 2021 Census results are starting to come out; ethnicity by end of month? https://census.gov.uk/census-2021-results/phase-one-topic-summaries/ethnic-group-national-identity-language-and-religion* # Prevalence stats - pop group mapping: # - White - British : English/Welsh/Scottish/Northern Irish/British # - White - Other : (White: Total - English/Welsh/Scottish/Northern Irish/British) # - Black : Black/African/Caribbean/Black British: Total # - Asian : Asian/Asian British: Total # - Mixed/Other: (Mixed/multiple ethnic group: Total + Other ethnic group: Total) # In[5]: #Check: #131099 + (134545-131099) +303+1514+(1709+194) eth_sex_tot_df = eth_sex_df[eth_sex_df.index.str.contains("Total")].copy() display(eth_sex_tot_df) # Check the totals to ensure our sum is correct eth_sex_tot_df.sum() # In[6]: eth_sex_tot_df.loc["White - British"] = eth_sex_df.loc['English/Welsh/Scottish/Northern Irish/British'] eth_sex_tot_df.loc["White - Other"] = eth_sex_df.loc['White: Total'] - eth_sex_df.loc['English/Welsh/Scottish/Northern Irish/British'] eth_sex_tot_df.loc["Mixed/Other"] = eth_sex_df.loc['Mixed/multiple ethnic group: Total'] + eth_sex_df.loc['Other ethnic group: Total'] eth_sex_tot_df = eth_sex_tot_df.drop(index=["White: Total", 'Mixed/multiple ethnic group: Total', 'Other ethnic group: Total']) eth_sex_tot_df.rename(index={'Black/African/Caribbean/Black British: Total': 'Black', 'Asian/Asian British: Total':'Asian'}, inplace=True) eth_sex_tot_df.rename(columns={"All persons":"All", "Males":"Men", "Females":"Women"}, inplace=True) eth_sex_tot_df # In[7]: #The rates are percentages (Value Type column) eth_prev_df_wide = eth_prev_df[["Ethnicity", "Sex", "Value"]].pivot(index='Ethnicity', columns='Sex', values='Value') eth_prev_df_wide = eth_prev_df_wide/100 eth_prev_df_wide # In[8]: for c in eth_prev_df_wide.columns: display(eth_prev_df_wide[c]*eth_sex_tot_df[c]) # In[9]: eth_prev_df_wide["All"]*eth_sex_tot_df["All"]