%%ruby --out socrata_data
# with --out, data written to the stdout in this ruby cell
# will be mapped to a Python variable (socrata_data) after execution.
require 'soda/client'
require 'json'
# Set up client object with domain and application token
client = SODA::Client.new({:domain => "opendata.socrata.com",
:app_token => "eqZC5q2iEmFXdIu2qEbtZkWgP"})
# Get data with dataset identifier
response = client.get("ed74-c6ni")
# Print dataset to stdout as a JSON
puts response.to_json
import pandas as pd
# Read the retrieved JSON dataset (df stands for dataframe)
df = pd.read_json(socrata_data)
df.head() # print the first 5 lines of the dataframe
df.shape # print the dataframe's size
song_by_artist = df.groupby('artist').size().to_dict()
song_by_artist
song_by_artist_4plus = {k:v for k,v in song_by_artist.items() if v>=4}
song_by_artist_4plus
import numpy as np
# Lists of keys and values
my_keys = song_by_artist_4plus.keys()
my_vals = song_by_artist_4plus.values()
# Find indices of sorted values (first converted to a numpy array)
i_sorted = np.argsort(np.array(my_vals))[::-1]
# Sort both the keys and value list
my_keys_sorted = [my_keys[i] for i in i_sorted]
my_vals_sorted = [my_vals[i] for i in i_sorted]
import plotly.plotly as py
from plotly.graph_objs import Figure, Data, Layout
from plotly.graph_objs import Bar
from plotly.graph_objs import XAxis, YAxis, Marker, Font, Margin
my_bar = Bar(x=my_keys_sorted, # labels of the x-axis
y=my_vals_sorted, # values of the y-axis
marker= Marker(color='#2ca02c')) # a nice green color
my_data = Data([my_bar]) # make data object, (Data accepts only list)
my_title = 'Number of songs listed in the Guardian\'s
\
Top 1,000 Songs to Hear Before You Die per artist with 4 or more songs'
my_ytitle = 'Number of songs per artist'
my_layout = Layout(title=my_title, # set plot title
showlegend=False, # remove legend
font= Font(family='Georgia, serif', # set global font family
color='#635F5D'), # and color
plot_bgcolor='#EFECEA', # set plot color to grey
xaxis= XAxis(title='', # no x-axis title
tickangle=45, # tick labels' angle
ticks='outside', # draw ticks outside axes
ticklen=8, # tick length
tickwidth=1.5,), # and width,
yaxis= YAxis(title=my_ytitle, # y-axis title
gridcolor='#FFFFFF', # white grid lines
ticks='outside',
ticklen=8,
tickwidth=1.5),
autosize=False, # manual figure size
width=700,
height=500,
margin= Margin(b=140) # increase bottom margin,
) # to fit long x-axis tick labels
my_fig = Figure(data=my_data, layout=my_layout)
py.iplot(my_fig, filename='socrata1')
# Rows which have 'artist' name in song_by_artist_4plus
i_good = (df['artist'].isin(song_by_artist_4plus))
df_good = df[i_good] # a new dataframe
df_good.shape # a much smaller dataframe than the original
my_text = [] # init. the hover-text list
# Loop through the sorted artist names, so that my_text
# will have to same ordering as the values linked to 'x' and 'y' in my_data
for k in my_keys_sorted:
# Slice dataframe to artist name and sort songs by year
i_artist = (df['artist']==k)
df_tmp = df_good[i_artist].sort(columns='year')
my_text_tmp = '' # init. string
cnt_song = 0 # song counter for given artist
N_song = len(df_tmp['title']) # total number of song for given artist
# Loop through songs
for i_song, song in df_tmp.iterrows():
# Add to string and counter
my_text_tmp += song['title']+' ('+str(song['year'])+')
'
cnt_song += 1
# Skip if song list is too long to fit on figure
if cnt_song>12:
diff = N_song - cnt_song
my_text_tmp += ' and '+str(diff)+' more ...'
break
# Append hover-text list
my_text += [my_text_tmp]
# Update figure object
my_fig['data'][0].update(text=my_text)
from plotly.graph_objs import Annotation
my_anno_text = 'Open Data by Socrata
\
Hover over the bars to see list of songs'
my_anno = Annotation(text=my_anno_text, # annotation text
x=0.95, # position's x-coord
y=0.95, # and y-coord
xref='paper', # use paper coords
yref='paper', # for both coordinates
font= Font(size=14), # increase font size (default is 12)
showarrow=False, # remove arrow
bgcolor='#FFFFFF', # white background
borderpad=4) # space bt. border and text (in px)
# Update figure object
my_fig['layout'].update(annotations=[my_anno])
py.iplot(my_fig, filename='socrata1-hover')
# CSS styling within IPython notebook
from IPython.core.display import HTML
import urllib2
def css_styling():
url = 'https://raw.githubusercontent.com/plotly/python-user-guide/master/custom.css'
styles = urllib2.urlopen(url).read()
return HTML(styles)
css_styling()