# The usual preamble
import pandas as pd

# Make the graphs a bit prettier, and bigger
pd.set_option('display.mpl_style', 'default')
figsize(15, 5)

# Always display all the columns
pd.set_option('display.line_width', 5000) 
pd.set_option('display.max_columns', 60) 

requests = pd.read_csv('../data/311-service-requests.csv')

requests['Incident Zip'].unique()

na_values = ['NO CLUE', 'N/A', '0']
requests = pd.read_csv('../data/311-service-requests.csv', na_values=na_values, dtype={'Incident Zip': str})

requests['Incident Zip'].unique()

rows_with_dashes = requests['Incident Zip'].str.contains('-').fillna(False)
len(requests[rows_with_dashes])

requests[rows_with_dashes]

long_zip_codes = requests['Incident Zip'].str.len() > 5
requests['Incident Zip'][long_zip_codes].unique()

requests['Incident Zip'] = requests['Incident Zip'].str.slice(0, 5)

requests[requests['Incident Zip'] == '00000']

zero_zips = requests['Incident Zip'] == '00000'
requests['Incident Zip'][zero_zips] = np.nan

unique_zips = requests['Incident Zip'].unique()
unique_zips.sort()
unique_zips

zips = requests['Incident Zip']
# Let's say the zips starting with '0' and '1' are okay, for now. (this isn't actually true -- 13221 is in Syracuse, and why?)
is_close = zips.str.startswith('0') | zips.str.startswith('1')
# There are a bunch of NaNs, but we're not interested in them right now, so we'll say they're True
is_far = ~(is_close.fillna(True).astype(bool))

zips[is_far]

requests[is_far][['Incident Zip', 'Descriptor', 'City']].sort('Incident Zip')

requests['City'].str.upper().value_counts()

na_values = ['NO CLUE', 'N/A', '0']
requests = pd.read_csv('../data/311-service-requests.csv', 
                       na_values=na_values, 
                       dtype={'Incident Zip': str})

def fix_zip_codes(zips):
    # Truncate everything to length 5 
    zips = zips.str.slice(0, 5)
    
    # Set 00000 zip codes to nan
    zero_zips = zips == '00000'
    zips[zero_zips] = np.nan
    
    return zips

requests['Incident Zip'] = fix_zip_codes(requests['Incident Zip'])

requests['Incident Zip'].unique()