#!/usr/bin/env python # coding: utf-8 # # Visualizing the Drug Epidemic in San Francisco # ### Introduction #

San Francisco, a city once celebrated for its vibrant and diverse cultures, has faced numerous challenges in recent years, particularly with regards to drug addiction and crime.

# Drug overdose in San Francisco has been driven largely by the proliferation of the synthetic opioid fentanyl, according to the [SF Chronicle](https://www.sfchronicle.com/sf/article/S-F-is-investing-millions-in-overdose-response-16491976.php). On Dec. 17, 2021, Mayor London Breed declared an official state of emergency in the Tenderloin district to address the escalating epidemic. # # In response to these issues, the city has implemented several strategies aimed at addressing drug addiction and its impact on the community. These strategies include increasing law enforcement efforts to combat illegal drug possession and sales, as well as deploying more prescription medications, such as Narcan, to reverse overdose and save lives. # > Narcan, also known as naloxone, is a prescription medication used to reverse opioid overdose. It works by binding to the same receptors in the brain that opioids bind to, reversing their effects and restoring breathing. # ### Quantifiable questions # Using the [San Francisco police response records](https://data.sfgov.org/Public-Safety/Police-Department-Incident-Reports-2018-to-Present/wg3w-h783), we hope to answer the following questions: # # - Where do most drug-related arrests and Narcan deployments occur? # - How do the frequencies change over time? # - When do drug-related arrests happen? Mornings? Evenings? Late at night? # - What's the correlation between drug-related arrests and Narcan deployments? # ## Downloading and Preparing Data # The SFPD data can be downloaded by clicking `Export` - `CSV`: # ![export-csv](data_download.png) # Save the dataset to `data/raw`. # > Notice that since this file is big (over 200MB), before committing and pushing to github, make sure to add the line '/data' in `.gitignore` file so that we don't break the git! # In[1]: # importing the modules import csv import os import pandas as pd import altair as alt from altair import datum # In[9]: # Creating portable path data_dir = os.environ['DATA_DIR'] file_path = os.path.join(data_dir, 'raw/Police_Department_Incident_Reports__2018_to_Present.csv') # In[12]: # Reading the file from local data/raw folder sfpd_data = pd.read_csv(file_path) # In[7]: # Filtering for narcan deployment incidents narcan_deployment = sfpd_data[sfpd_data['Incident Code'] == 51050] # In[4]: # Filtering for drug-related arrests drug_arrest = sfpd_data[~sfpd_data['Incident Category'].isna()] drug_arrest_data = drug_arrest[drug_arrest['Incident Category'].str.contains('Drug')] # In[ ]: # Saving the files to the data/processed directory narcan_deployment.to_csv("/Users/tracy/sfpd-data/data/processed/narcan_deployment.csv") drug_arrest_data.to_csv("/Users/tracy/sfpd-data/data/processed/drug_arrest_data.csv") # Now, load the filterd csv files and be ready to dive in! # In[22]: # Read dataframes drug_arrest = pd.read_csv('/Users/tracy/code/sfpd_data/data/processed/drug_arrest_data.csv') narcan_deploy = pd.read_csv('/Users/tracy/sfpd-data/data/processed/narcan_deployment.csv') # Make the column names snake_case drug_arrest.columns = [col.lower().replace(' ', '_') for col in drug_arrest.columns] narcan_deploy.columns = [col.lower().replace(' ', '_') for col in narcan_deploy.columns] # ## Drug Arrests and Narcan Deployment by Neighborhood # ### Finding #1 # Our analysis found that most drug-related arrests occur in the **Tenderloin, South of Market (SoMa), and Mission** neighborhoods. Located near each other in downtown San Francisco, the two neighborhoods have undergone significant changes following rapid urban development and gentrification in recent years. Both suffer from a high-rate of poverty and crime. # #### Analysis and Visualization #1 # In[359]: da_neighb = drug_arrest.groupby(drug_arrest['analysis_neighborhood']).size() da_df = da_neighb.sort_values(ascending = False).reset_index(name='drug_arrests_count') da_df.index +=1 da_df.head(10) # In[25]: # Group the incidents by time and location # Note that incident_date_2 has no significant meaning other than for the purpose of tooltips in visualization. drug_arrest['incident_date'] = pd.to_datetime(drug_arrest['incident_date']) drug_arrest['incident_time'] = pd.to_datetime(drug_arrest['incident_time']) da_nbh = drug_arrest.groupby([(drug_arrest['incident_date'].dt.strftime('%Y-%m')),drug_arrest['analysis_neighborhood']]).size().reset_index(name='count') # In[362]: # Making a new column so that all the other neighborhoods except for the three mentioned above # will be categorized into "All Other Neighborhoods" da_viz1 = da_nbh for i, row in da_viz1.iterrows(): if row['analysis_neighborhood'] == 'Tenderloin': da_viz1.loc[i, 'viz1'] = 'Tenderloin' elif row['analysis_neighborhood'] == 'South of Market': da_viz1.loc[i, 'viz1'] = 'SoMA' elif row['analysis_neighborhood'] == 'Mission': da_viz1.loc[i, 'viz1'] = 'Mission' else: da_viz1.loc[i, 'viz1'] = 'All Other Neighborhoods' da_viz = da_viz1.groupby(['incident_date','viz1']).sum('count').reset_index() da_viz['incident_date_2'] = da_viz['incident_date'] # Note that incident_date_2 has no significant meaning other than for the purpose of tooltips in visualization. ## Excluding the month we're currently in. end_date = '2023-02' filtered_da_viz = da_viz[da_viz['incident_date']<=end_date] # In[363]: # Visualization alt.Chart(filtered_da_viz).mark_area(size=9,opacity=0.8).encode( x= alt.X('incident_date:T',title='Date',axis=alt.Axis(domain=False, format='%Y.%m')), y= alt.Y('count:Q',stack = 'normalize',title = 'Count'), color=alt.Color("viz1",legend=alt.Legend(title='Neighborhood', values=['All Other Neighborhoods','Mission', 'SoMA','Tenderloin'])), tooltip = [alt.Tooltip('incident_date_2',title = 'Date'),alt.Tooltip('count',title='Drug Arrests'),alt.Tooltip('viz1',title='Neighborhood')] ).properties( width=650, height=350, title='Total Drug Arrests by Neighborhoods (2018-now)' ) # From 2018, the number of drug-arrests in Tenderloin alone is more than that in all the other neighborhoods combined. # In[376]: # Making a new column so that all the neighborhoods except for Tenderloin will be categorized into "All Other Neighborhoods" for i, row in da_nbh.iterrows(): if row['analysis_neighborhood'] != 'Tenderloin': da_nbh.loc[i, 'compare'] = 'All Other Neighborhoods' else: da_nbh.loc[i, 'compare'] = 'Tenderloin' da_nbh_df = da_nbh.groupby('compare').sum('count').reset_index() da_nbh_df = da_nbh_df.rename(columns={'compare': 'Neighborhood', 'count': 'Count'}) da_nbh_df.index += 1 da_nbh_df # In[377]: # Visualization da_plot = da_nbh.groupby(['incident_date','compare']).sum('count').reset_index() da_plot['incident_date_2'] = da_plot['incident_date'] ## Excluding the month we're currently in. end_date = '2023-02' filtered_da_plot = da_plot[da_plot['incident_date']<=end_date] alt.Chart(filtered_da_plot).mark_line(opacity=0.8).encode( alt.X('incident_date:T',title='Date'), alt.Y('count:Q',title = 'Count'), color = alt.Color('compare:N',legend = alt.Legend(title = 'Neighborhoods'),scale=alt.Scale(domain=['Tenderloin','All Other Neighborhoods'], range=['crimson','grey'])), tooltip = [alt.Tooltip('incident_date_2',title='Date'),alt.Tooltip('count',title='Incidents')] ).properties( width=650, height=350, title='Drug-related Arrests in Tenderloin vs. in All Other Neighborhoods' ) # ### Finding #2 # # A similar pattern is found with Narcan deployment: incidents happen significantly more often in **Terderloin** than in other neighborhoods. # #### Analysis and Visualization #2 # In[23]: nd_neighb = narcan_deploy.groupby(narcan_deploy['analysis_neighborhood']).size() nd_df = nd_neighb.sort_values(ascending = False).reset_index(name='narcan_deployment_count') nd_df.index +=1 nd_df.head(10) # In[27]: # Group the incidents by time and location # Note that incident_date_2 has no significant meaning other than for the purpose of tooltips in visualization. narcan_deploy['incident_date'] = pd.to_datetime(narcan_deploy['incident_date']) narcan_deploy['incident_time'] = pd.to_datetime(narcan_deploy['incident_time']) nd_nbh = narcan_deploy.groupby([(narcan_deploy['incident_date'].dt.strftime('%Y-%m')),narcan_deploy['analysis_neighborhood']]).size().reset_index(name='count') nd_nbh['incident_date_2'] = nd_nbh['incident_date'] for i, row in nd_nbh.iterrows(): if row['analysis_neighborhood'] == 'Tenderloin': nd_nbh.loc[i, 'compare'] = 'Tenderloin Incidents' else: nd_nbh.loc[i, 'compare'] = 'All Other Neighborhoods' nd_nbh_df = nd_nbh.groupby('compare').sum('count').reset_index() nd_nbh_df = nd_nbh_df.rename(columns={'compare': 'Neighborhood', 'count': 'Count'}) nd_nbh_df.index += 1 nd_nbh_df # In[28]: # Visualization nd_plot = nd_nbh.groupby(['incident_date','compare']).sum('count').reset_index() nd_plot['incident_date_2'] = nd_plot['incident_date'] ## Excluding the month we're currently in. end_date = '2023-02' filtered_nd_plot = nd_plot[nd_plot['incident_date']<=end_date] alt.Chart(filtered_nd_plot).mark_line(opacity=0.8).encode( alt.X('incident_date:T',title='Date'), alt.Y('sum(count):Q',title = 'Count',scale=alt.Scale(domain=(0,25))), color = alt.Color('compare:N',legend = alt.Legend(title = 'Neighborhoods'),scale=alt.Scale(domain=['Tenderloin Incidents','All Other Neighborhoods'], range=['crimson','grey'])), tooltip = [alt.Tooltip('incident_date_2',title='Date'),alt.Tooltip('count',title='Incidents')] ).properties( width=650, height=350, title='Narcan Deployment in Tenderloin vs. in All Other Neighborhoods' ) # ## Drug Arrests and Narcan Deployment Over Time # ### Finding #3 # As the pandemic began to subside and people started moving around more freely in the city, there has been an increase in drug-related arrests. During July and August of 2022, the city saw **more than 1000** drug-related arrests, almost double the typical monthly average of **271**. # #### Analysis and Visualization #3 # In[379]: # Drug arrests over time, by year-month # Again, note that incident_date_2 has no significant meaning other than for the purpose of tooltips in visualization. drug_arrest['incident_date'] = pd.to_datetime(drug_arrest['incident_date']) drug_arrest['incident_time'] = pd.to_datetime(drug_arrest['incident_time']) counts = drug_arrest.groupby(drug_arrest['incident_date'].dt.strftime('%Y-%m')).size().reset_index(name='count') counts['incident_date_2'] = counts['incident_date'] counts.head() # In[380]: # Excluding the month we're currently in. end_date = '2023-02' filtered_counts = counts[counts['incident_date']<=end_date] # Calculating the monthly average. We chose median to account for extreme values. drug_arrest_median = filtered_counts['count'].median() drug_arrest_median # In[382]: # Visualize base = alt.Chart(filtered_counts).mark_line(size=3).encode( alt.X('incident_date:T',title='Date'), alt.Y('count',title='Count, Median of Counts',scale=alt.Scale(domain=(0,600))), tooltip = [alt.Tooltip('incident_date_2',title = 'Date'),alt.Tooltip('count',title='Drug Arrests')] ).properties( width=600, height=400, title='Drug-related Arrests, Jan. 2018 - Feb.2023' ) median = alt.Chart(filtered_counts).mark_rule(size=2,color='crimson').encode( y='median(count)', tooltip = alt.Tooltip(title = 'Median of counts') ) text = ( alt.Chart(filtered_counts.query("count == count.max()")) .mark_text(dy=-15, color="crimson") .encode(x=alt.X("incident_date:T"), y=alt.Y("count"), text=alt.Text(("count")) )) base+median+text # ### Finding #4 # Narcan deployment spiked as incidents of drug overdose worsened during the COVID-19 pandemic. # # In March 2020, there were **31 recorded deployments** of Narcan in San Francisco, more than doubling the typical monthly amount of **12**. # # #### Analysis and Visualization #4 # In[383]: # Grouping the narcan deployments by Year-month. # Note that incident_date_2 is not necessary, just for the sake of tooltips for visualization narcan_deploy['incident_date'] = pd.to_datetime(narcan_deploy['incident_date']) narcan_deploy['incident_time'] = pd.to_datetime(narcan_deploy['incident_time']) nd_counts = narcan_deploy.groupby(narcan_deploy['incident_date'].dt.strftime('%Y-%m')).size().reset_index(name='count') nd_counts['incident_date_2'] = nd_counts['incident_date'] nd_counts.head() # In[384]: # Excluding the month we're currently in. end_date = '2023-02' filtered_nd_counts = nd_counts[nd_counts['incident_date']<=end_date] # Calculating the monthly average. We chose median to account for extreme values. median_nd = filtered_nd_counts['count'].median() median_nd # In[386]: # Visualize base = alt.Chart(filtered_nd_counts).mark_line(size=3).encode( alt.X('incident_date:T',title='Date'), alt.Y('count',title = 'Count, Median of Counts'), tooltip = [alt.Tooltip('incident_date_2',title = 'Date'),alt.Tooltip('count',title='Narcan Deployment')] ).properties( width=600, height=400, title='Narcan Deployments, Jan 2018 - Feb 2023' ) median = alt.Chart(filtered_nd_counts).mark_rule(size=2,color='crimson').encode( y='median(count)', tooltip = alt.Tooltip(title = 'Median of counts') ) text = ( alt.Chart(filtered_nd_counts.query("count == count.max()")) .mark_text(dy=-25, color="crimson") .encode(x=alt.X("incident_date:T"), y=alt.Y("count"), text=alt.Text(("count")) )) base+median+text # ## Drug Arrests by Day-of-Week and Time-of-Day # ### Finding #5 # # Most drug-related arrests occur on the **afternoons of Tuesday and Wednesday**. This result could be due to routine police patrol around those times. # #### Analysis and Visualization #5 # In[387]: # Grouping the arrests week_time = drug_arrest.groupby([drug_arrest['incident_day_of_week'],drug_arrest['incident_time'].dt.strftime('%H')]).size().reset_index(name='count') week_time['hour']=week_time['incident_time']+':00' week_time.head() # In[388]: weekday_order = ['Sunday','Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday','Saturday'] base = alt.Chart(week_time).mark_rect().encode( alt.Y('incident_day_of_week',title = 'Day of Week',sort= weekday_order), alt.X('incident_time',title = 'Hour of Day'), color='count', tooltip = [alt.Tooltip('incident_day_of_week',title='Day'), alt.Tooltip('hour',title = 'Time'), alt.Tooltip('count',title='Drug Arrests')] ) base # ## The Correlation Between Drug Arrests and Narcan Deployments # ### Finding #6 # Perhaps unsurprisingly, there's a strong correlation between drug arrests and Narcan deployments. # In[391]: # Merging the drug arrests dataframe and the narcan deployment dataframe da_nd = da_df.merge(nd_df, on = 'analysis_neighborhood') # Visualization base = alt.Chart(da_nd).mark_square().encode( alt.X('drug_arrests_count',title = 'Drug Arrests'), alt.Y('narcan_deployment_count',title = 'Narcan Deployment'), color = alt.Color('analysis_neighborhood',legend=alt.Legend(title='Neighborhoods', values=['Tenderloin', 'South of Market','Mission'])), size = alt.Size('drug_arrests_count',legend=None), tooltip=[alt.Tooltip('analysis_neighborhood',title='Neighborhood'),alt.Tooltip('drug_arrests_count',title = 'Drug Arrests'),alt.Tooltip('narcan_deployment_count',title = 'Narcan Deployment')] ).properties( width=650, height=250, title='Drug Arrests vs. Narcan Deployment, by Neighborhood' ) base # Let's ignore the three outliers — *Tenderloin, SoMA and Mission* — and take a closer look at the busy bottom left corner: # In[392]: # Filtering out the three neighborhood with the most drug arrests left_coner = base.encode( color = alt.Color('analysis_neighborhood',legend=alt.Legend(title='Neighborhoods', values=['Financial District/South Beach', 'Bayview Hunters Point'])) ).transform_filter( datum.drug_arrests_count < 1000 ) left_coner # A similar correlation appears — high numbers of drug arrests corresponds with high numbers of Narcan deployment. Note that compared to other neighborhoods, **Financial District/South Beach** has a relatively high number of Narcan deployment. # ## Beyond the Notebook # Explore our dashboard to see how the drug epidemic affects different neighborhoods in San Francisco. # - [Drug Arrests in SF](https://fishdmy-sfpd-data-scriptsstreamlit-drugarrests-183wkh.streamlit.app/) # - [Narcan Deployment in SF](https://fishdmy-sfpd-data-scriptsstreamlit-narcan-xpqd8b.streamlit.app/)