Notebook

Extract Voting Data for County¶

Read the NC SBE voter [registration and voter history datasets and extracts relevant fields for Wake Co.

In [1]:

import pandas as pd

In [2]:

#Explore data: reveal columns
dfSample = pd.read_csv('./data/NCSBE/ncvoter_Statewide.txt',sep='\t',nrows=10,
                    dtype='str',
                    encoding = "ISO-8859-1")
dfSample.columns

Out[2]:

Index(['county_id', 'county_desc', 'voter_reg_num', 'status_cd',
       'voter_status_desc', 'reason_cd', 'voter_status_reason_desc',
       'absent_ind', 'name_prefx_cd', 'last_name', 'first_name', 'middle_name',
       'name_suffix_lbl', 'res_street_address', 'res_city_desc', 'state_cd',
       'zip_code', 'mail_addr1', 'mail_addr2', 'mail_addr3', 'mail_addr4',
       'mail_city', 'mail_state', 'mail_zipcode', 'full_phone_number',
       'race_code', 'ethnic_code', 'party_cd', 'gender_code', 'birth_age',
       'birth_state', 'drivers_lic', 'registr_dt', 'precinct_abbrv',
       'precinct_desc', 'municipality_abbrv', 'municipality_desc',
       'ward_abbrv', 'ward_desc', 'cong_dist_abbrv', 'super_court_abbrv',
       'judic_dist_abbrv', 'nc_senate_abbrv', 'nc_house_abbrv',
       'county_commiss_abbrv', 'county_commiss_desc', 'township_abbrv',
       'township_desc', 'school_dist_abbrv', 'school_dist_desc',
       'fire_dist_abbrv', 'fire_dist_desc', 'water_dist_abbrv',
       'water_dist_desc', 'sewer_dist_abbrv', 'sewer_dist_desc',
       'sanit_dist_abbrv', 'sanit_dist_desc', 'rescue_dist_abbrv',
       'rescue_dist_desc', 'munic_dist_abbrv', 'munic_dist_desc',
       'dist_1_abbrv', 'dist_1_desc', 'dist_2_abbrv', 'dist_2_desc',
       'confidential_ind', 'birth_year', 'ncid', 'vtd_abbrv', 'vtd_desc'],
      dtype='object')

In [10]:

#Explore data: reveal values
dfSample.iloc[1,30:]

Out[10]:

birth_state                             DC
drivers_lic                              Y
registr_dt                      02/23/2018
precinct_abbrv                         10N
precinct_desc               NORTH MELVILLE
municipality_abbrv                     MEB
municipality_desc                   MEBANE
ward_abbrv                             NaN
ward_desc                              NaN
cong_dist_abbrv                         06
super_court_abbrv                      15A
judic_dist_abbrv                       15A
nc_senate_abbrv                         24
nc_house_abbrv                         063
county_commiss_abbrv                   NaN
county_commiss_desc                    NaN
township_abbrv                         NaN
township_desc                          NaN
school_dist_abbrv                      NaN
school_dist_desc                       NaN
fire_dist_abbrv                        NaN
fire_dist_desc                         NaN
water_dist_abbrv                       NaN
water_dist_desc                        NaN
sewer_dist_abbrv                       NaN
sewer_dist_desc                        NaN
sanit_dist_abbrv                       NaN
sanit_dist_desc                        NaN
rescue_dist_abbrv                      NaN
rescue_dist_desc                       NaN
munic_dist_abbrv                       MEB
munic_dist_desc                     MEBANE
dist_1_abbrv                            17
dist_1_desc             17TH PROSECUTORIAL
dist_2_abbrv                              
dist_2_desc                               
confidential_ind                         N
birth_year                            1978
ncid                              AA201627
vtd_abbrv                              10N
vtd_desc                               10N
Name: 1, dtype: object

In [11]:

#Get all the data
dfAll = pd.read_csv('./data/NCSBE/ncvoter_Statewide.txt',
                    usecols=['county_desc','voter_reg_num','res_street_address','res_city_desc',
                             'state_cd','zip_code','race_code','ethnic_code','gender_code','party_cd','ncid'],
                    sep='\t',
                    dtype='str',
                    encoding = "ISO-8859-1")

In [12]:

#Select Wake records
dfWake = dfAll[dfAll['county_desc'] == "WAKE"].reindex()
dfWake.drop('county_desc',axis=1,inplace=True)
dfWake.set_index('voter_reg_num',inplace=True)
dfWake.dropna(how='any',inplace=True,
              subset=['res_street_address','res_city_desc','state_cd','zip_code'])

In [13]:

#Add an address 
dfWake['address'] = (dfWake['res_street_address'] + " " 
                     + dfWake['res_city_desc'] + " " 
                     + dfWake['state_cd'] + " "
                     + dfWake['zip_code'])

In [14]:

#Write to a file
dfWake.to_csv('./data/NCSBE/ncvoter_Wake.csv',index=True,index_label='voter_reg_num')