%%ruby --out socrata_data # with --out, data written to the stdout in this ruby cell # will be mapped to a Python variable (socrata_data) after execution. require 'soda/client' require 'json' # Set up client object with domain and application token client = SODA::Client.new({:domain => "opendata.socrata.com", :app_token => "eqZC5q2iEmFXdIu2qEbtZkWgP"}) # Get data with dataset identifier response = client.get("ed74-c6ni") # Print dataset to stdout as a JSON puts response.to_json import pandas as pd # Read the retrieved JSON dataset (df stands for dataframe) df = pd.read_json(socrata_data) df.head() # print the first 5 lines of the dataframe df.shape # print the dataframe's size song_by_artist = df.groupby('artist').size().to_dict() song_by_artist song_by_artist_4plus = {k:v for k,v in song_by_artist.items() if v>=4} song_by_artist_4plus import numpy as np # Lists of keys and values my_keys = song_by_artist_4plus.keys() my_vals = song_by_artist_4plus.values() # Find indices of sorted values (first converted to a numpy array) i_sorted = np.argsort(np.array(my_vals))[::-1] # Sort both the keys and value list my_keys_sorted = [my_keys[i] for i in i_sorted] my_vals_sorted = [my_vals[i] for i in i_sorted] import plotly.plotly as py from plotly.graph_objs import Figure, Data, Layout from plotly.graph_objs import Bar from plotly.graph_objs import XAxis, YAxis, Marker, Font, Margin my_bar = Bar(x=my_keys_sorted, # labels of the x-axis y=my_vals_sorted, # values of the y-axis marker= Marker(color='#2ca02c')) # a nice green color my_data = Data([my_bar]) # make data object, (Data accepts only list) my_title = 'Number of songs listed in the Guardian\'s
\ Top 1,000 Songs to Hear Before You Die per artist with 4 or more songs' my_ytitle = 'Number of songs per artist' my_layout = Layout(title=my_title, # set plot title showlegend=False, # remove legend font= Font(family='Georgia, serif', # set global font family color='#635F5D'), # and color plot_bgcolor='#EFECEA', # set plot color to grey xaxis= XAxis(title='', # no x-axis title tickangle=45, # tick labels' angle ticks='outside', # draw ticks outside axes ticklen=8, # tick length tickwidth=1.5,), # and width, yaxis= YAxis(title=my_ytitle, # y-axis title gridcolor='#FFFFFF', # white grid lines ticks='outside', ticklen=8, tickwidth=1.5), autosize=False, # manual figure size width=700, height=500, margin= Margin(b=140) # increase bottom margin, ) # to fit long x-axis tick labels my_fig = Figure(data=my_data, layout=my_layout) py.iplot(my_fig, filename='socrata1') # Rows which have 'artist' name in song_by_artist_4plus i_good = (df['artist'].isin(song_by_artist_4plus)) df_good = df[i_good] # a new dataframe df_good.shape # a much smaller dataframe than the original my_text = [] # init. the hover-text list # Loop through the sorted artist names, so that my_text # will have to same ordering as the values linked to 'x' and 'y' in my_data for k in my_keys_sorted: # Slice dataframe to artist name and sort songs by year i_artist = (df['artist']==k) df_tmp = df_good[i_artist].sort(columns='year') my_text_tmp = '' # init. string cnt_song = 0 # song counter for given artist N_song = len(df_tmp['title']) # total number of song for given artist # Loop through songs for i_song, song in df_tmp.iterrows(): # Add to string and counter my_text_tmp += song['title']+' ('+str(song['year'])+')
' cnt_song += 1 # Skip if song list is too long to fit on figure if cnt_song>12: diff = N_song - cnt_song my_text_tmp += ' and '+str(diff)+' more ...' break # Append hover-text list my_text += [my_text_tmp] # Update figure object my_fig['data'][0].update(text=my_text) from plotly.graph_objs import Annotation my_anno_text = 'Open Data by Socrata
\ Hover over the bars to see list of songs' my_anno = Annotation(text=my_anno_text, # annotation text x=0.95, # position's x-coord y=0.95, # and y-coord xref='paper', # use paper coords yref='paper', # for both coordinates font= Font(size=14), # increase font size (default is 12) showarrow=False, # remove arrow bgcolor='#FFFFFF', # white background borderpad=4) # space bt. border and text (in px) # Update figure object my_fig['layout'].update(annotations=[my_anno]) py.iplot(my_fig, filename='socrata1-hover') # CSS styling within IPython notebook from IPython.core.display import HTML import urllib2 def css_styling(): url = 'https://raw.githubusercontent.com/plotly/python-user-guide/master/custom.css' styles = urllib2.urlopen(url).read() return HTML(styles) css_styling()