# Import quandl to retrieve data
# Pandas and matplotlib standard imports for data analysis
import quandl
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# Use Plotly for better graphs
import plotly.offline as plotly
import plotly.graph_objs as go
# Offline Plotting
plotly.init_notebook_mode(connected=True)
# Constants
# Populate your own quandl API Token
# Populate your own mapbox API Token
quandl.ApiConfig.api_key = {YOUR API KEY}
mapbox_access_token = {YOUR API KEY}
# Start date and end date should be the same
start_date = '2018-06-30'
end_date = '2018-06-30'
# Specify Zillow Indicator we want to graph
indicator = 'MRPFAH'
# Specify the county and state we want to generate the map for
county = 'Alameda'
state = 'CA'
# We are going to fetch the data for each zip code in the county and add that to a single dataframe
def get_city_data(cities, indicator):
df_array = []
for city, code in cities.items():
quandl_db_code = f'ZILLOW/Z{code}_{indicator}'
try:
data = quandl.get(quandl_db_code, start_date=start_date, end_date=end_date)
data.rename(index=str, columns={'Value': city}, inplace=True)
data.index = pd.to_datetime(data.index, format='%Y/%m/%d')
df_array.append(data)
except:
print(f'Error fetching for: {quandl_db_code}')
ret = pd.concat(df_array, axis=1)
return ret
# Here we are going to figure out what zip codes are in the specified county
# We will also use the logitude / latitude data for plotting
ZIP_CODE_DTYPE = {'zip_code': str}
ZIP_CODES = pd.read_csv('zip_codes_states.csv', dtype=ZIP_CODE_DTYPE)
county_df = ZIP_CODES['county'] == county
state_df = ZIP_CODES['state'] == state
data = ZIP_CODES[county_df & state_df]
data.index = data['zip_code']
# We will call our function to fetch the data from quandl
# There will be errors for data we cannot retrieve
real_estate_data = get_city_data(data['zip_code'], indicator)
Error fetching for: ZILLOW/Z94502_MRPFAH Error fetching for: ZILLOW/Z94537_MRPFAH Error fetching for: ZILLOW/Z94540_MRPFAH Error fetching for: ZILLOW/Z94542_MRPFAH Error fetching for: ZILLOW/Z94543_MRPFAH Error fetching for: ZILLOW/Z94544_MRPFAH Error fetching for: ZILLOW/Z94545_MRPFAH Error fetching for: ZILLOW/Z94552_MRPFAH Error fetching for: ZILLOW/Z94557_MRPFAH Error fetching for: ZILLOW/Z94577_MRPFAH Error fetching for: ZILLOW/Z94578_MRPFAH Error fetching for: ZILLOW/Z94579_MRPFAH Error fetching for: ZILLOW/Z94580_MRPFAH Error fetching for: ZILLOW/Z94586_MRPFAH Error fetching for: ZILLOW/Z94601_MRPFAH Error fetching for: ZILLOW/Z94602_MRPFAH Error fetching for: ZILLOW/Z94603_MRPFAH Error fetching for: ZILLOW/Z94604_MRPFAH Error fetching for: ZILLOW/Z94606_MRPFAH Error fetching for: ZILLOW/Z94609_MRPFAH Error fetching for: ZILLOW/Z94610_MRPFAH Error fetching for: ZILLOW/Z94612_MRPFAH Error fetching for: ZILLOW/Z94613_MRPFAH Error fetching for: ZILLOW/Z94614_MRPFAH Error fetching for: ZILLOW/Z94615_MRPFAH Error fetching for: ZILLOW/Z94617_MRPFAH Error fetching for: ZILLOW/Z94618_MRPFAH Error fetching for: ZILLOW/Z94619_MRPFAH Error fetching for: ZILLOW/Z94620_MRPFAH Error fetching for: ZILLOW/Z94621_MRPFAH Error fetching for: ZILLOW/Z94622_MRPFAH Error fetching for: ZILLOW/Z94623_MRPFAH Error fetching for: ZILLOW/Z94624_MRPFAH Error fetching for: ZILLOW/Z94625_MRPFAH Error fetching for: ZILLOW/Z94626_MRPFAH Error fetching for: ZILLOW/Z94627_MRPFAH Error fetching for: ZILLOW/Z94643_MRPFAH Error fetching for: ZILLOW/Z94649_MRPFAH Error fetching for: ZILLOW/Z94659_MRPFAH Error fetching for: ZILLOW/Z94660_MRPFAH Error fetching for: ZILLOW/Z94661_MRPFAH Error fetching for: ZILLOW/Z94662_MRPFAH Error fetching for: ZILLOW/Z94666_MRPFAH Error fetching for: ZILLOW/Z94701_MRPFAH Error fetching for: ZILLOW/Z94702_MRPFAH Error fetching for: ZILLOW/Z94703_MRPFAH Error fetching for: ZILLOW/Z94704_MRPFAH Error fetching for: ZILLOW/Z94705_MRPFAH Error fetching for: ZILLOW/Z94706_MRPFAH Error fetching for: ZILLOW/Z94707_MRPFAH Error fetching for: ZILLOW/Z94708_MRPFAH Error fetching for: ZILLOW/Z94709_MRPFAH Error fetching for: ZILLOW/Z94710_MRPFAH Error fetching for: ZILLOW/Z94712_MRPFAH Error fetching for: ZILLOW/Z94720_MRPFAH
# Here we are just doing some transformation and massaging of the data we get back
# If you want more information please run the notebook on your own
transformed_real_estate_data = real_estate_data.transpose()
column_name = f'{indicator} {end_date}'
transformed_real_estate_data[column_name] = transformed_real_estate_data[end_date].astype('float')
concat_data = pd.concat([data, transformed_real_estate_data[column_name]], axis=1, sort=True)
concat_data.dropna(inplace=True)
concat_data['text'] = concat_data['zip_code'] + ' ' + concat_data[column_name].astype(str)
df = concat_data
# Here is our final result dataframe
# We can see for each zip code we have the longitude and latitude data
# As well as the Zillow Indicator and text label
df.head()
zip_code | latitude | longitude | city | state | county | MRPFAH 2018-06-30 | text | |
---|---|---|---|---|---|---|---|---|
94501 | 94501 | 37.770563 | -122.264779 | Alameda | CA | Alameda | 2.698115 | 94501 2.6981150835926 |
94536 | 94536 | 37.565285 | -121.982721 | Fremont | CA | Alameda | 2.397260 | 94536 2.3972602739726 |
94538 | 94538 | 37.509453 | -121.958320 | Fremont | CA | Alameda | 2.460908 | 94538 2.4609079743008 |
94539 | 94539 | 37.520339 | -121.912568 | Fremont | CA | Alameda | 2.297297 | 94539 2.2972972972973 |
94541 | 94541 | 37.675130 | -121.974120 | Hayward | CA | Alameda | 1.966925 | 94541 1.9669247009149 |
# Now it's just a matter of plotting using the Plotly Mapbox graph
plot_data = [
go.Scattermapbox(
lon = df['longitude'],
lat = df['latitude'],
mode='markers',
marker=dict(
reversescale = True,
autocolorscale = False,
size=17,
opacity=0.85,
colorscale = 'YlOrRd',
cmin = df[column_name].min(),
color = df[column_name],
cmax = df[column_name].max(),
colorbar=dict(
title="Price"
)
),
text = df['text'],
hoverinfo= 'text'
)]
layout = go.Layout(
title= f'{indicator} in {county}, {state} {end_date}',
autosize=True,
hovermode='closest',
showlegend=False,
mapbox=dict(
accesstoken=mapbox_access_token,
bearing=0,
center=dict(
lat=df.iloc[0]['latitude'],
lon=df.iloc[0]['longitude']
),
pitch=0,
zoom=8,
style='light'
),
)
# Finally calling our plot function after declaring the layout
fig = dict( data=plot_data, layout=layout )
plotly.iplot( fig, filename=f'{indicator} {county} {state} {end_date}')
# Output to div for blog post
# plotly.plot(fig, filename=f'{indicator} {county} {state} {end_date}', include_plotlyjs=False, output_type='div')