import pandas as pd
#Explore data: reveal columns
dfSample = pd.read_csv('./data/NCSBE/ncvoter_Statewide.txt',sep='\t',nrows=10,
dtype='str',
encoding = "ISO-8859-1")
dfSample.columns
Index(['county_id', 'county_desc', 'voter_reg_num', 'status_cd', 'voter_status_desc', 'reason_cd', 'voter_status_reason_desc', 'absent_ind', 'name_prefx_cd', 'last_name', 'first_name', 'middle_name', 'name_suffix_lbl', 'res_street_address', 'res_city_desc', 'state_cd', 'zip_code', 'mail_addr1', 'mail_addr2', 'mail_addr3', 'mail_addr4', 'mail_city', 'mail_state', 'mail_zipcode', 'full_phone_number', 'race_code', 'ethnic_code', 'party_cd', 'gender_code', 'birth_age', 'birth_state', 'drivers_lic', 'registr_dt', 'precinct_abbrv', 'precinct_desc', 'municipality_abbrv', 'municipality_desc', 'ward_abbrv', 'ward_desc', 'cong_dist_abbrv', 'super_court_abbrv', 'judic_dist_abbrv', 'nc_senate_abbrv', 'nc_house_abbrv', 'county_commiss_abbrv', 'county_commiss_desc', 'township_abbrv', 'township_desc', 'school_dist_abbrv', 'school_dist_desc', 'fire_dist_abbrv', 'fire_dist_desc', 'water_dist_abbrv', 'water_dist_desc', 'sewer_dist_abbrv', 'sewer_dist_desc', 'sanit_dist_abbrv', 'sanit_dist_desc', 'rescue_dist_abbrv', 'rescue_dist_desc', 'munic_dist_abbrv', 'munic_dist_desc', 'dist_1_abbrv', 'dist_1_desc', 'dist_2_abbrv', 'dist_2_desc', 'confidential_ind', 'birth_year', 'ncid', 'vtd_abbrv', 'vtd_desc'], dtype='object')
#Explore data: reveal values
dfSample.iloc[1,30:]
birth_state DC drivers_lic Y registr_dt 02/23/2018 precinct_abbrv 10N precinct_desc NORTH MELVILLE municipality_abbrv MEB municipality_desc MEBANE ward_abbrv NaN ward_desc NaN cong_dist_abbrv 06 super_court_abbrv 15A judic_dist_abbrv 15A nc_senate_abbrv 24 nc_house_abbrv 063 county_commiss_abbrv NaN county_commiss_desc NaN township_abbrv NaN township_desc NaN school_dist_abbrv NaN school_dist_desc NaN fire_dist_abbrv NaN fire_dist_desc NaN water_dist_abbrv NaN water_dist_desc NaN sewer_dist_abbrv NaN sewer_dist_desc NaN sanit_dist_abbrv NaN sanit_dist_desc NaN rescue_dist_abbrv NaN rescue_dist_desc NaN munic_dist_abbrv MEB munic_dist_desc MEBANE dist_1_abbrv 17 dist_1_desc 17TH PROSECUTORIAL dist_2_abbrv dist_2_desc confidential_ind N birth_year 1978 ncid AA201627 vtd_abbrv 10N vtd_desc 10N Name: 1, dtype: object
#Get all the data
dfAll = pd.read_csv('./data/NCSBE/ncvoter_Statewide.txt',
usecols=['county_desc','voter_reg_num','res_street_address','res_city_desc',
'state_cd','zip_code','race_code','ethnic_code','gender_code','party_cd','ncid'],
sep='\t',
dtype='str',
encoding = "ISO-8859-1")
#Select Wake records
dfWake = dfAll[dfAll['county_desc'] == "WAKE"].reindex()
dfWake.drop('county_desc',axis=1,inplace=True)
dfWake.set_index('voter_reg_num',inplace=True)
dfWake.dropna(how='any',inplace=True,
subset=['res_street_address','res_city_desc','state_cd','zip_code'])
#Add an address
dfWake['address'] = (dfWake['res_street_address'] + " "
+ dfWake['res_city_desc'] + " "
+ dfWake['state_cd'] + " "
+ dfWake['zip_code'])
#Write to a file
dfWake.to_csv('./data/NCSBE/ncvoter_Wake.csv',index=True,index_label='voter_reg_num')