In [ ]:
flex_title = "Movie explorer"
flex_source_code = "https://github.com/danielfrg/jupyter-flex/blob/master/examples/movie-explorer.ipynb"
In [ ]:
import sqlite3 as sql
from os.path import dirname, join

import numpy as np
import pandas.io.sql as psql

import plotly.graph_objects as go
from bokeh.sampledata.movies_data import movie_path

import ipywidgets as widgets
from IPython.display import display

Sidebar

In [ ]:
axis_map = {
    "Tomato Meter": "Meter",
    "Numeric Rating": "numericRating",
    "Number of Reviews": "Reviews",
    "Box Office (dollars)": "BoxOffice",
    "Length (minutes)": "Runtime",
    "Year": "Year",
}
In [ ]:
genres = """
All
Action
Adventure
Animation
Biography
Comedy
Crime
Documentary
Drama
Family
Fantasy
History
Horror
Music
Musical
Mystery
Romance
Sci-Fi
Short
Sport
Thriller
War
Western
"""

Filter

In [ ]:
components = []

reviews_label = widgets.HTML(value="Minimum number of reviews on Rotten Tomatoes:")
reviews = widgets.IntSlider(value=80, min=10, max=300, step=1)
components.extend([reviews_label, reviews])

released_year_label = widgets.HTML(value="Year released:")
released_year = widgets.IntRangeSlider(value=[1970, 2014], min=1940, max=2014, step=1)
components.extend([released_year_label, released_year])

oscars_label = widgets.HTML(value="Minimum number of Oscar wins (all categories):")
oscars = widgets.IntSlider(value=0, min=0, max=4, step=1)
components.extend([oscars_label, oscars])

boxoffice_label = widgets.HTML(value="Dollars at Box Office (millions):")
boxoffice = widgets.IntSlider(value=0, min=0, max=800, step=1)
components.extend([boxoffice_label, boxoffice])

genre_label = widgets.HTML(value="Genre (a movie can have multiple genres):")
genre = widgets.Dropdown(options=genres.split("\n"), value="All")
components.extend([genre_label, genre])

director_label = widgets.HTML(value="Director name contains (e.g., Miyazaki):")
director = widgets.Text()
components.extend([director_label, director])

cast_label = widgets.HTML(value="Cast names contains (e.g. Tom Hanks):")
cast = widgets.Text()
components.extend([cast_label, cast])

all_widgets = widgets.VBox(components)
all_widgets

Variables

In [ ]:
components = []

x_axis_label = widgets.HTML(value="X-axis variable:")
x_axis = widgets.Dropdown(options=list(axis_map.items()), value="Meter")
components.extend([x_axis_label, x_axis])

y_axis_label = widgets.HTML(value="Y-axis variable:")
y_axis = widgets.Dropdown(options=list(axis_map.items()), value="Reviews")
components.extend([y_axis_label, y_axis])

note = "Note: The Tomato Meter is the proportion of positive reviews (as judged by the Rotten Tomatoes staff), and the Numeric rating is a normalized 1-10 score of those reviews which have star ratings (for example, 3 out of 4 stars)."
note_label = widgets.HTML(value=note)
components.append(note_label)

all_widgets = widgets.VBox(components)
all_widgets

Movies

Column

In [ ]:
conn = sql.connect(movie_path)

query = """
SELECT omdb.ID,
       imdbID,
       Title,
       Year,
       omdb.Rating as mpaaRating,
       Runtime,
       Genre,
       Released,
       Director,
       Writer,
       omdb.Cast,
       imdbRating,
       imdbVotes,
       Language,
       Country,
       Oscars,
       tomatoes.Rating as numericRating,
       Meter,
       Reviews,
       Fresh,
       Rotten,
       userMeter,
       userRating,
       userReviews,
       BoxOffice,
       Production
FROM omdb, tomatoes
WHERE omdb.ID = tomatoes.ID AND Reviews >= 10
"""

movies = psql.read_sql(query, conn)
In [ ]:
movies["color"] = np.where(movies["Oscars"] > 0, "orange", "grey")
movies["alpha"] = np.where(movies["Oscars"] > 0, 0.9, 0.25)
movies.fillna(0, inplace=True)  # just replace missing values with zero
movies["revenue"] = movies.BoxOffice.apply(lambda x: '{:,d}'.format(int(x)))
In [ ]:
fig = go.FigureWidget()
plot = go.Scatter(x=[], y=[], mode="markers", text=[], hoverinfo="text", marker=dict(color=[], opacity=[], size=7))
fig.add_trace(plot)

margin = go.layout.Margin(l=20, r=20, b=20, t=30)
fig = fig.update_layout(margin=margin)
In [ ]:
def select_movies():
    selected = movies[
        (movies.Reviews >= reviews.value) &
        (movies.BoxOffice >= (boxoffice.value * 1e6)) &
        (movies.Year >= released_year.value[0]) &
        (movies.Year <= released_year.value[1]) &
        (movies.Oscars >= oscars.value)
    ]
    if (genre.value != "All"):
        selected = selected[selected.Genre.str.contains(genre.value)==True]
    if (director.value != ""):
        selected = selected[selected.Director.str.contains(director.value)==True]
    if (cast.value != ""):
        selected = selected[selected.Cast.str.contains(cast.value)==True]
    return selected


def on_value_change(change):
    df = select_movies()
    x_name = x_axis.value
    y_name = y_axis.value

    fig.data[0]['x'] = df[x_name]
    fig.data[0]['y'] = df[y_name]
    fig.data[0]['marker']['color'] = df["color"]
    fig.data[0]['marker']['opacity'] = df["alpha"]
    fig.data[0]['text'] = df["Title"] + "<br>" + df["Year"].astype(str) + "<br>" + df["BoxOffice"].astype(str)
    
    fig.update_xaxes(title_text=x_axis.label)
    fig.update_yaxes(title_text=y_axis.label)
    fig.update_layout(title="%d movies selected" % len(df))
In [ ]:
controls = [reviews, boxoffice, released_year, oscars, genre, director, cast, x_axis, y_axis]
for control in controls:
    control.observe(on_value_change, names="value")
In [ ]:
on_value_change(None)
fig
In [ ]: