flex_title = "Movie explorer"
flex_subtitle = "built using jupyter-flex"
flex_include_source = True
flex_source_link = "https://github.com/danielfrg/jupyter-flex/blob/master/examples/movie-explorer.ipynb"
axis_map = {
"Tomato Meter": "Meter",
"Numeric Rating": "numericRating",
"Number of Reviews": "Reviews",
"Box Office (dollars)": "BoxOffice",
"Length (minutes)": "Runtime",
"Year": "Year",
}
genres = """
All
Action
Adventure
Animation
Biography
Comedy
Crime
Documentary
Drama
Family
Fantasy
History
Horror
Music
Musical
Mystery
Romance
Sci-Fi
Short
Sport
Thriller
War
Western
"""
components = []
reviews_label = widgets.HTML(value="Minimum number of reviews on Rotten Tomatoes:")
reviews = widgets.IntSlider(value=80, min=10, max=300, step=1)
components.extend([reviews_label, reviews])
released_year_label = widgets.HTML(value="Year released:")
released_year = widgets.IntRangeSlider(value=[1970, 2014], min=1940, max=2014, step=1)
components.extend([released_year_label, released_year])
oscars_label = widgets.HTML(value="Minimum number of Oscar wins (all categories):")
oscars = widgets.IntSlider(value=0, min=0, max=4, step=1)
components.extend([oscars_label, oscars])
boxoffice_label = widgets.HTML(value="Dollars at Box Office (millions):")
boxoffice = widgets.IntSlider(value=0, min=0, max=800, step=1)
components.extend([boxoffice_label, boxoffice])
genre_label = widgets.HTML(value="Genre (a movie can have multiple genres):")
genre = widgets.Dropdown(options=genres.split("\n"), value="All")
components.extend([genre_label, genre])
director_label = widgets.HTML(value="Director name contains (e.g., Miyazaki):")
director = widgets.Text()
components.extend([director_label, director])
cast_label = widgets.HTML(value="Cast names contains (e.g. Tom Hanks):")
cast = widgets.Text()
components.extend([cast_label, cast])
all_widgets = widgets.VBox(components)
all_widgets
components = []
x_axis_label = widgets.HTML(value="X-axis variable:")
x_axis = widgets.Dropdown(options=list(axis_map.items()), value="Meter")
components.extend([x_axis_label, x_axis])
y_axis_label = widgets.HTML(value="Y-axis variable:")
y_axis = widgets.Dropdown(options=list(axis_map.items()), value="Reviews")
components.extend([y_axis_label, y_axis])
note = "Note: The Tomato Meter is the proportion of positive reviews (as judged by the Rotten Tomatoes staff), and the Numeric rating is a normalized 1-10 score of those reviews which have star ratings (for example, 3 out of 4 stars)."
note_label = widgets.HTML(value=note)
components.append(note_label)
all_widgets = widgets.VBox(components)
all_widgets
conn = sql.connect(movie_path)
query = """
SELECT omdb.ID,
imdbID,
Title,
Year,
omdb.Rating as mpaaRating,
Runtime,
Genre,
Released,
Director,
Writer,
omdb.Cast,
imdbRating,
imdbVotes,
Language,
Country,
Oscars,
tomatoes.Rating as numericRating,
Meter,
Reviews,
Fresh,
Rotten,
userMeter,
userRating,
userReviews,
BoxOffice,
Production
FROM omdb, tomatoes
WHERE omdb.ID = tomatoes.ID AND Reviews >= 10
"""
movies = psql.read_sql(query, conn)
movies["color"] = np.where(movies["Oscars"] > 0, "orange", "grey")
movies["alpha"] = np.where(movies["Oscars"] > 0, 0.9, 0.25)
movies.fillna(0, inplace=True) # just replace missing values with zero
movies["revenue"] = movies.BoxOffice.apply(lambda x: '{:,d}'.format(int(x)))
fig = go.FigureWidget()
plot = go.Scatter(x=[], y=[], mode="markers", text=[], hoverinfo="text", marker=dict(color=[], opacity=[], size=7))
fig.add_trace(plot)
margin = go.layout.Margin(l=20, r=20, b=20, t=30)
fig = fig.update_layout(margin=margin)
def select_movies():
selected = movies[
(movies.Reviews >= reviews.value) &
(movies.BoxOffice >= (boxoffice.value * 1e6)) &
(movies.Year >= released_year.value[0]) &
(movies.Year <= released_year.value[1]) &
(movies.Oscars >= oscars.value)
]
if (genre.value != "All"):
selected = selected[selected.Genre.str.contains(genre.value)==True]
if (director.value != ""):
selected = selected[selected.Director.str.contains(director.value)==True]
if (cast.value != ""):
selected = selected[selected.Cast.str.contains(cast.value)==True]
return selected
def on_value_change(change):
df = select_movies()
x_name = x_axis.value
y_name = y_axis.value
fig.data[0]['x'] = df[x_name]
fig.data[0]['y'] = df[y_name]
fig.data[0]['marker']['color'] = df["color"]
fig.data[0]['marker']['opacity'] = df["alpha"]
fig.data[0]['text'] = df["Title"] + "<br>" + df["Year"].astype(str) + "<br>" + df["BoxOffice"].astype(str)
fig.update_xaxes(title_text=x_axis.label)
fig.update_yaxes(title_text=y_axis.label)
fig.update_layout(title="%d movies selected" % len(df))
controls = [reviews, boxoffice, released_year, oscars, genre, director, cast, x_axis, y_axis]
for control in controls:
control.observe(on_value_change, names="value")
on_value_change(None)
fig