import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
fandango = pd.read_csv(r"C:\Users\Teni\Desktop\Git-Github\06-Capstone-Project\fandango_scrape.csv")
fandango
FILM | STARS | RATING | VOTES | |
---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 |
... | ... | ... | ... | ... |
499 | Valiyavan (2015) | 0.0 | 0.0 | 0 |
500 | WWE SummerSlam 2015 (2015) | 0.0 | 0.0 | 0 |
501 | Yagavarayinum Naa Kaakka (2015) | 0.0 | 0.0 | 0 |
502 | Yesterday, Today and Tomorrow (1964) | 0.0 | 0.0 | 0 |
503 | Zarafa (2012) | 0.0 | 0.0 | 0 |
504 rows × 4 columns
fandango.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 504 entries, 0 to 503 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FILM 504 non-null object 1 STARS 504 non-null float64 2 RATING 504 non-null float64 3 VOTES 504 non-null int64 dtypes: float64(2), int64(1), object(1) memory usage: 15.9+ KB
fandango.describe()
STARS | RATING | VOTES | |
---|---|---|---|
count | 504.000000 | 504.000000 | 504.000000 |
mean | 3.558532 | 3.375794 | 1147.863095 |
std | 1.563133 | 1.491223 | 3830.583136 |
min | 0.000000 | 0.000000 | 0.000000 |
25% | 3.500000 | 3.100000 | 3.000000 |
50% | 4.000000 | 3.800000 | 18.500000 |
75% | 4.500000 | 4.300000 | 189.750000 |
max | 5.000000 | 5.000000 | 34846.000000 |
plt.figure(figsize=(10, 4), dpi=200)
sns.scatterplot(data=fandango, x='RATING', y='VOTES');
This helps undersyand how closely knitted each column is to each other
fandango.corr()
STARS | RATING | VOTES | |
---|---|---|---|
STARS | 1.000000 | 0.994696 | 0.164218 |
RATING | 0.994696 | 1.000000 | 0.163764 |
VOTES | 0.164218 | 0.163764 | 1.000000 |
fandango['YEAR'] = fandango['FILM'].str.extract(r'\((\d+)\)')
# pd.to_datetime(fandango['YEAR'])
fandango
FILM | STARS | RATING | VOTES | YEAR | |
---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 |
... | ... | ... | ... | ... | ... |
499 | Valiyavan (2015) | 0.0 | 0.0 | 0 | 2015 |
500 | WWE SummerSlam 2015 (2015) | 0.0 | 0.0 | 0 | 2015 |
501 | Yagavarayinum Naa Kaakka (2015) | 0.0 | 0.0 | 0 | 2015 |
502 | Yesterday, Today and Tomorrow (1964) | 0.0 | 0.0 | 0 | 1964 |
503 | Zarafa (2012) | 0.0 | 0.0 | 0 | 2012 |
504 rows × 5 columns
# fandango.drop('Numbers_in_brackets', axis=1, inplace=True)
fandango
FILM | STARS | RATING | VOTES | YEAR | |
---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 |
... | ... | ... | ... | ... | ... |
499 | Valiyavan (2015) | 0.0 | 0.0 | 0 | 2015 |
500 | WWE SummerSlam 2015 (2015) | 0.0 | 0.0 | 0 | 2015 |
501 | Yagavarayinum Naa Kaakka (2015) | 0.0 | 0.0 | 0 | 2015 |
502 | Yesterday, Today and Tomorrow (1964) | 0.0 | 0.0 | 0 | 1964 |
503 | Zarafa (2012) | 0.0 | 0.0 | 0 | 2012 |
504 rows × 5 columns
fandango['YEAR'].value_counts()
2015 477 2014 24 2016 1 1964 1 2012 1 Name: YEAR, dtype: int64
# Not using the barplot because it'd require a Y variable- which we don't have or do not need. The countlot allows for only counting
plt.figure(figsize=(10, 6), dpi=200)
sns.countplot(data=fandango, x='YEAR');
fandango.loc[fandango['VOTES'].nlargest(10).index]
FILM | STARS | RATING | VOTES | YEAR | |
---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 |
5 | The Hobbit: The Battle of the Five Armies (2014) | 4.5 | 4.3 | 15337 | 2014 |
6 | Kingsman: The Secret Service (2015) | 4.5 | 4.2 | 15205 | 2015 |
7 | Minions (2015) | 4.0 | 4.0 | 14998 | 2015 |
8 | Avengers: Age of Ultron (2015) | 5.0 | 4.5 | 14846 | 2015 |
9 | Into the Woods (2014) | 3.5 | 3.4 | 13055 | 2014 |
# OR
fandango.sort_values(by='VOTES', ascending=False).head(10)
FILM | STARS | RATING | VOTES | YEAR | |
---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 |
5 | The Hobbit: The Battle of the Five Armies (2014) | 4.5 | 4.3 | 15337 | 2014 |
6 | Kingsman: The Secret Service (2015) | 4.5 | 4.2 | 15205 | 2015 |
7 | Minions (2015) | 4.0 | 4.0 | 14998 | 2015 |
8 | Avengers: Age of Ultron (2015) | 5.0 | 4.5 | 14846 | 2015 |
9 | Into the Woods (2014) | 3.5 | 3.4 | 13055 | 2014 |
fandango[fandango['VOTES']==0]
FILM | STARS | RATING | VOTES | YEAR | |
---|---|---|---|---|---|
435 | 6 Years (2015) | 0.0 | 0.0 | 0 | 2015 |
436 | 7 Minutes (2015) | 0.0 | 0.0 | 0 | 2015 |
437 | A Year in Champagne (2015) | 0.0 | 0.0 | 0 | 2015 |
438 | Balls Out (2015) | 0.0 | 0.0 | 0 | 2015 |
439 | Before I Wake (2015) | 0.0 | 0.0 | 0 | 2015 |
... | ... | ... | ... | ... | ... |
499 | Valiyavan (2015) | 0.0 | 0.0 | 0 | 2015 |
500 | WWE SummerSlam 2015 (2015) | 0.0 | 0.0 | 0 | 2015 |
501 | Yagavarayinum Naa Kaakka (2015) | 0.0 | 0.0 | 0 | 2015 |
502 | Yesterday, Today and Tomorrow (1964) | 0.0 | 0.0 | 0 | 1964 |
503 | Zarafa (2012) | 0.0 | 0.0 | 0 | 2012 |
69 rows × 5 columns
(fandango['VOTES']==0).value_counts()
False 435 True 69 Name: VOTES, dtype: int64
(fandango['VOTES']==0).sum()
69
fandango
FILM | STARS | RATING | VOTES | YEAR | |
---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 |
... | ... | ... | ... | ... | ... |
499 | Valiyavan (2015) | 0.0 | 0.0 | 0 | 2015 |
500 | WWE SummerSlam 2015 (2015) | 0.0 | 0.0 | 0 | 2015 |
501 | Yagavarayinum Naa Kaakka (2015) | 0.0 | 0.0 | 0 | 2015 |
502 | Yesterday, Today and Tomorrow (1964) | 0.0 | 0.0 | 0 | 1964 |
503 | Zarafa (2012) | 0.0 | 0.0 | 0 | 2012 |
504 rows × 5 columns
reviewed_films = fandango[fandango['VOTES']!=0]
reviewed_films
FILM | STARS | RATING | VOTES | YEAR | |
---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 |
... | ... | ... | ... | ... | ... |
430 | That Sugar Film (2015) | 5.0 | 5.0 | 1 | 2015 |
431 | The Intern (2015) | 5.0 | 5.0 | 1 | 2015 |
432 | The Park Bench (2015) | 5.0 | 5.0 | 1 | 2015 |
433 | The Wanted 18 (2015) | 5.0 | 5.0 | 1 | 2015 |
434 | Z For Zachariah (2015) | 5.0 | 5.0 | 1 | 2015 |
435 rows × 5 columns
from matplotlib.ticker import MaxNLocator # Ensure MaxNLocator is imported
plt.figure(figsize=[10,4], dpi=500)
sns.kdeplot(reviewed_films['RATING'], clip=[0, 5], shade=True, label='True Rating')
sns.kdeplot(reviewed_films['STARS'], clip=[0, 5], shade=True, label='Stars Displayed')
plt.title('KDE Plot for True Rating & the Stars Displayed Comparism')
# plt.xlim(0, 5) The clip already catered to the ranges
# plt.ylim(0.0, 0.6) The clip already catered to the ranges
# plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) # Display integers on x-axis
# plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True)) # Display integers on y-axis
plt.legend(loc=(1.05, 0.5))
plt.show()
reviewed_films['STARS_DIFF'] = round(reviewed_films['STARS'] - reviewed_films['RATING'], 2)
reviewed_films
FILM | STARS | RATING | VOTES | YEAR | STARS_DIFF | |
---|---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 | 0.1 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 | 0.0 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 | 0.2 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 | 0.2 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 | 0.0 |
... | ... | ... | ... | ... | ... | ... |
430 | That Sugar Film (2015) | 5.0 | 5.0 | 1 | 2015 | 0.0 |
431 | The Intern (2015) | 5.0 | 5.0 | 1 | 2015 | 0.0 |
432 | The Park Bench (2015) | 5.0 | 5.0 | 1 | 2015 | 0.0 |
433 | The Wanted 18 (2015) | 5.0 | 5.0 | 1 | 2015 | 0.0 |
434 | Z For Zachariah (2015) | 5.0 | 5.0 | 1 | 2015 | 0.0 |
435 rows × 6 columns
plt.figure(figsize=[12, 8], dpi=200)
sns.countplot(data = reviewed_films, x='STARS_DIFF');
reviewed_films[reviewed_films['STARS_DIFF']>=1.0]
FILM | STARS | RATING | VOTES | YEAR | STARS_DIFF | |
---|---|---|---|---|---|---|
381 | Turbo Kid (2015) | 5.0 | 4.0 | 2 | 2015 | 1.0 |
all_sites = pd.read_csv(r"C:\Users\Teni\Desktop\Git-Github\06-Capstone-Project\all_sites_scores.csv")
all_sites
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | |
---|---|---|---|---|---|---|---|---|
0 | Avengers: Age of Ultron (2015) | 74 | 86 | 66 | 7.1 | 7.8 | 1330 | 271107 |
1 | Cinderella (2015) | 85 | 80 | 67 | 7.5 | 7.1 | 249 | 65709 |
2 | Ant-Man (2015) | 80 | 90 | 64 | 8.1 | 7.8 | 627 | 103660 |
3 | Do You Believe? (2015) | 18 | 84 | 22 | 4.7 | 5.4 | 31 | 3136 |
4 | Hot Tub Time Machine 2 (2015) | 14 | 28 | 29 | 3.4 | 5.1 | 88 | 19560 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
141 | Mr. Holmes (2015) | 87 | 78 | 67 | 7.9 | 7.4 | 33 | 7367 |
142 | '71 (2015) | 97 | 82 | 83 | 7.5 | 7.2 | 60 | 24116 |
143 | Two Days, One Night (2014) | 97 | 78 | 89 | 8.8 | 7.4 | 123 | 24345 |
144 | Gett: The Trial of Viviane Amsalem (2015) | 100 | 81 | 90 | 7.3 | 7.8 | 19 | 1955 |
145 | Kumiko, The Treasure Hunter (2015) | 87 | 63 | 68 | 6.4 | 6.7 | 19 | 5289 |
146 rows × 8 columns
all_sites.head(5)
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | |
---|---|---|---|---|---|---|---|---|
0 | Avengers: Age of Ultron (2015) | 74 | 86 | 66 | 7.1 | 7.8 | 1330 | 271107 |
1 | Cinderella (2015) | 85 | 80 | 67 | 7.5 | 7.1 | 249 | 65709 |
2 | Ant-Man (2015) | 80 | 90 | 64 | 8.1 | 7.8 | 627 | 103660 |
3 | Do You Believe? (2015) | 18 | 84 | 22 | 4.7 | 5.4 | 31 | 3136 |
4 | Hot Tub Time Machine 2 (2015) | 14 | 28 | 29 | 3.4 | 5.1 | 88 | 19560 |
all_sites.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 146 entries, 0 to 145 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FILM 146 non-null object 1 RottenTomatoes 146 non-null int64 2 RottenTomatoes_User 146 non-null int64 3 Metacritic 146 non-null int64 4 Metacritic_User 146 non-null float64 5 IMDB 146 non-null float64 6 Metacritic_user_vote_count 146 non-null int64 7 IMDB_user_vote_count 146 non-null int64 dtypes: float64(2), int64(5), object(1) memory usage: 9.2+ KB
all_sites.describe()
RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | |
---|---|---|---|---|---|---|---|
count | 146.000000 | 146.000000 | 146.000000 | 146.000000 | 146.000000 | 146.000000 | 146.000000 |
mean | 60.849315 | 63.876712 | 58.808219 | 6.519178 | 6.736986 | 185.705479 | 42846.205479 |
std | 30.168799 | 20.024430 | 19.517389 | 1.510712 | 0.958736 | 316.606515 | 67406.509171 |
min | 5.000000 | 20.000000 | 13.000000 | 2.400000 | 4.000000 | 4.000000 | 243.000000 |
25% | 31.250000 | 50.000000 | 43.500000 | 5.700000 | 6.300000 | 33.250000 | 5627.000000 |
50% | 63.500000 | 66.500000 | 59.000000 | 6.850000 | 6.900000 | 72.500000 | 19103.000000 |
75% | 89.000000 | 81.000000 | 75.000000 | 7.500000 | 7.400000 | 168.500000 | 45185.750000 |
max | 100.000000 | 94.000000 | 94.000000 | 9.600000 | 8.600000 | 2375.000000 | 334164.000000 |
Let's first take a look at Rotten Tomatoes. RT has two sets of reviews, their critics reviews (ratings published by official critics) and user reviews.
TASK: Create a scatterplot exploring the relationship between RT Critic reviews and RT User reviews.
plt.figure(figsize=[12, 8], dpi=200)
sns.scatterplot(data=all_sites, x='RottenTomatoes', y= 'RottenTomatoes_User', s=80)
plt.xlim(0,100)
plt.ylim(0, 100);
Let's quantify this difference by comparing the critics ratings and the RT User ratings. We will calculate this with RottenTomatoes-RottenTomatoes_User. Note: diff here is Critics - User Score. So values closer to 0 means aggrement between Critics and Users. Larger positive values means critics rated much higher than users. Larger negative values means users rated much higher than critics.
TASK: Create a new column based off the difference between critics ratings and users ratings for Rotten Tomatoes. Calculate this with RottenTomatoes-RottenTomatoes_User
all_sites['diff'] = round(all_sites['RottenTomatoes'] - all_sites['RottenTomatoes_User'], 2)
all_sites
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | diff | |
---|---|---|---|---|---|---|---|---|---|
0 | Avengers: Age of Ultron (2015) | 74 | 86 | 66 | 7.1 | 7.8 | 1330 | 271107 | -12 |
1 | Cinderella (2015) | 85 | 80 | 67 | 7.5 | 7.1 | 249 | 65709 | 5 |
2 | Ant-Man (2015) | 80 | 90 | 64 | 8.1 | 7.8 | 627 | 103660 | -10 |
3 | Do You Believe? (2015) | 18 | 84 | 22 | 4.7 | 5.4 | 31 | 3136 | -66 |
4 | Hot Tub Time Machine 2 (2015) | 14 | 28 | 29 | 3.4 | 5.1 | 88 | 19560 | -14 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
141 | Mr. Holmes (2015) | 87 | 78 | 67 | 7.9 | 7.4 | 33 | 7367 | 9 |
142 | '71 (2015) | 97 | 82 | 83 | 7.5 | 7.2 | 60 | 24116 | 15 |
143 | Two Days, One Night (2014) | 97 | 78 | 89 | 8.8 | 7.4 | 123 | 24345 | 19 |
144 | Gett: The Trial of Viviane Amsalem (2015) | 100 | 81 | 90 | 7.3 | 7.8 | 19 | 1955 | 19 |
145 | Kumiko, The Treasure Hunter (2015) | 87 | 63 | 68 | 6.4 | 6.7 | 19 | 5289 | 24 |
146 rows × 9 columns
Let's now compare the overall mean difference. Since we're dealing with differences that could be negative or positive, first take the absolute value of all the differences, then take the mean. This would report back on average to absolute difference between the critics rating versus the user rating.
TASK: Calculate the Mean Absolute Difference between RT scores and RT User scores as described above.
all_sites['diff'].apply(abs).mean()
15.095890410958905
TASK: Plot the distribution of the differences between RT Critics Score and RT User Score. There should be negative values in this distribution plot. Feel free to use KDE or Histograms to display this distribution.
plt.figure(figsize=[12, 6], dpi=200)
sns.displot(data=all_sites,x='diff', kde=True, bins=25)
plot.set_axis_labels("Rotten_Diff")
plot.fig.suptitle('RT Critics Score minus RT User Score', y=1.02) # Title for the plot
plt.show()
# sns.displot(data=sample_ages, x='age', rug=True, bins=30, kde=True)
<Figure size 2400x1200 with 0 Axes>
TASK: Now create a distribution showing the absolute value difference between Critics and Users on Rotten Tomatoes.
abs_mean = all_sites['diff'].apply(abs).mean()
all_sites['task_need'] = round(abs_mean-all_sites['diff'], 2)
all_sites
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | diff | task_need | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Avengers: Age of Ultron (2015) | 74 | 86 | 66 | 7.1 | 7.8 | 1330 | 271107 | -12 | 27.1 |
1 | Cinderella (2015) | 85 | 80 | 67 | 7.5 | 7.1 | 249 | 65709 | 5 | 10.1 |
2 | Ant-Man (2015) | 80 | 90 | 64 | 8.1 | 7.8 | 627 | 103660 | -10 | 25.1 |
3 | Do You Believe? (2015) | 18 | 84 | 22 | 4.7 | 5.4 | 31 | 3136 | -66 | 81.1 |
4 | Hot Tub Time Machine 2 (2015) | 14 | 28 | 29 | 3.4 | 5.1 | 88 | 19560 | -14 | 29.1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
141 | Mr. Holmes (2015) | 87 | 78 | 67 | 7.9 | 7.4 | 33 | 7367 | 9 | 6.1 |
142 | '71 (2015) | 97 | 82 | 83 | 7.5 | 7.2 | 60 | 24116 | 15 | 0.1 |
143 | Two Days, One Night (2014) | 97 | 78 | 89 | 8.8 | 7.4 | 123 | 24345 | 19 | -3.9 |
144 | Gett: The Trial of Viviane Amsalem (2015) | 100 | 81 | 90 | 7.3 | 7.8 | 19 | 1955 | 19 | -3.9 |
145 | Kumiko, The Treasure Hunter (2015) | 87 | 63 | 68 | 6.4 | 6.7 | 19 | 5289 | 24 | -8.9 |
146 rows × 10 columns
plt.figure(figsize=[12, 8], dpi=200)
sns.displot(x=all_sites['diff'].apply(abs), bins=25, kde=True)
plt.title('Abs Difference between RT Critics Score and RT User Score')
plt.show()
<Figure size 2400x1600 with 0 Axes>
plt.figure(figsize=[12, 8], dpi=200)
sns.histplot(x=all_sites['diff'].apply(abs), bins=25, kde=True)
plt.title('Abs Difference between RT Critics Score and RT User Score')
plt.show()
# Apparently the Histplot is neater and more srisp than the displot
TASK: What are the top 5 movies users rated higher than critics on average:
all_sites.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 146 entries, 0 to 145 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FILM 146 non-null object 1 RottenTomatoes 146 non-null int64 2 RottenTomatoes_User 146 non-null int64 3 Metacritic 146 non-null int64 4 Metacritic_User 146 non-null float64 5 IMDB 146 non-null float64 6 Metacritic_user_vote_count 146 non-null int64 7 IMDB_user_vote_count 146 non-null int64 8 diff 146 non-null int64 9 task_need 146 non-null float64 dtypes: float64(3), int64(6), object(1) memory usage: 11.5+ KB
print('Users Love but Critics Hate')
all_sites.nsmallest(5, 'diff')[['FILM', 'diff']]
Users Love but Critics Hate
FILM | diff | |
---|---|---|
3 | Do You Believe? (2015) | -66 |
85 | Little Boy (2015) | -61 |
105 | Hitman: Agent 47 (2015) | -42 |
134 | The Longest Ride (2015) | -42 |
125 | The Wedding Ringer (2015) | -39 |
print('Users Love but Critics Hate')
all_sites.sort_values('diff', ascending=True)[['FILM', 'diff']].head(5)
Users Love but Critics Hate
FILM | diff | |
---|---|---|
3 | Do You Believe? (2015) | -66 |
85 | Little Boy (2015) | -61 |
134 | The Longest Ride (2015) | -42 |
105 | Hitman: Agent 47 (2015) | -42 |
125 | The Wedding Ringer (2015) | -39 |
print('Users Love but Critics Hate')
all_sites.sort_values('diff', ascending=False)[['FILM', 'diff']].tail(5)
Users Love but Critics Hate
FILM | diff | |
---|---|---|
125 | The Wedding Ringer (2015) | -39 |
105 | Hitman: Agent 47 (2015) | -42 |
134 | The Longest Ride (2015) | -42 |
85 | Little Boy (2015) | -61 |
3 | Do You Believe? (2015) | -66 |
TASK: Now show the top 5 movies critics scores higher than users on average.
print('Critics love, but Users Hate')
all_sites.nlargest(5, 'diff')[['FILM', 'diff']]
Critics love, but Users Hate
FILM | diff | |
---|---|---|
69 | Mr. Turner (2014) | 42 |
112 | It Follows (2015) | 31 |
115 | While We're Young (2015) | 31 |
37 | Welcome to Me (2015) | 24 |
40 | I'll See You In My Dreams (2015) | 24 |
print('Critics love, but Users Hate')
all_sites.sort_values('diff', ascending=False)[['FILM', 'diff']].head(5)
Critics love, but Users Hate
FILM | diff | |
---|---|---|
69 | Mr. Turner (2014) | 42 |
112 | It Follows (2015) | 31 |
115 | While We're Young (2015) | 31 |
145 | Kumiko, The Treasure Hunter (2015) | 24 |
37 | Welcome to Me (2015) | 24 |
Now let's take a quick look at the ratings from MetaCritic. Metacritic also shows an average user rating versus their official displayed rating.
TASK: Display a scatterplot of the Metacritic Rating versus the Metacritic User rating.
all_sites.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 146 entries, 0 to 145 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FILM 146 non-null object 1 RottenTomatoes 146 non-null int64 2 RottenTomatoes_User 146 non-null int64 3 Metacritic 146 non-null int64 4 Metacritic_User 146 non-null float64 5 IMDB 146 non-null float64 6 Metacritic_user_vote_count 146 non-null int64 7 IMDB_user_vote_count 146 non-null int64 8 diff 146 non-null int64 9 task_need 146 non-null float64 dtypes: float64(3), int64(6), object(1) memory usage: 11.5+ KB
plt.figure(figsize=[12, 8], dpi=200)
sns.scatterplot(data= all_sites, x= 'Metacritic',y= 'Metacritic_User', s=80)
plt.xlim(0, 100)
plt.ylim(0, 10)
plt.show()
Finally let's explore IMDB. Notice that both Metacritic and IMDB report back vote counts. Let's analyze the most popular movies.
TASK: Create a scatterplot for the relationship between vote counts on MetaCritic versus vote counts on IMDB.
all_sites.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 146 entries, 0 to 145 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FILM 146 non-null object 1 RottenTomatoes 146 non-null int64 2 RottenTomatoes_User 146 non-null int64 3 Metacritic 146 non-null int64 4 Metacritic_User 146 non-null float64 5 IMDB 146 non-null float64 6 Metacritic_user_vote_count 146 non-null int64 7 IMDB_user_vote_count 146 non-null int64 8 diff 146 non-null int64 9 task_need 146 non-null float64 dtypes: float64(3), int64(6), object(1) memory usage: 11.5+ KB
plt.figure(figsize=[12, 8], dpi=200)
sns.scatterplot(data=all_sites, x= 'Metacritic_user_vote_count', y='IMDB_user_vote_count',s=70)
plt.show()
Notice there are two outliers here. The movie with the highest vote count on IMDB only has about 500 Metacritic ratings. What is this movie?
TASK: What movie has the highest IMDB user vote count?
largest = all_sites[all_sites['Metacritic'] <=500]
largest.nlargest(1, 'IMDB_user_vote_count')
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | diff | task_need | |
---|---|---|---|---|---|---|---|---|---|---|
14 | The Imitation Game (2014) | 90 | 92 | 73 | 8.2 | 8.1 | 566 | 334164 | -2 | 17.1 |
largest = all_sites[all_sites['Metacritic'] <=500]
largest.sort_values('IMDB_user_vote_count', ascending=False).head(1)
TASK: What movie has the highest Metacritic User Vote count?
all_sites.nlargest(1, 'Metacritic_user_vote_count')
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | diff | task_need | |
---|---|---|---|---|---|---|---|---|---|---|
88 | Mad Max: Fury Road (2015) | 97 | 88 | 89 | 8.7 | 8.3 | 2375 | 292023 | 9 | 6.1 |
all_sites.sort_values( 'Metacritic_user_vote_count', ascending=False).head(1)
FILM | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | diff | task_need | |
---|---|---|---|---|---|---|---|---|---|---|
88 | Mad Max: Fury Road (2015) | 97 | 88 | 89 | 8.7 | 8.3 | 2375 | 292023 | 9 | 6.1 |
Finally let's begin to explore whether or not Fandango artificially displays higher ratings than warranted to boost ticket sales.
TASK: Combine the Fandango Table with the All Sites table. Not every movie in the Fandango table is in the All Sites table, since some Fandango movies have very little or no reviews. We only want to compare movies that are in both DataFrames, so do an inner merge to merge together both DataFrames based on the FILM columns.
fandango.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 504 entries, 0 to 503 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FILM 504 non-null object 1 STARS 504 non-null float64 2 RATING 504 non-null float64 3 VOTES 504 non-null int64 4 YEAR 504 non-null object dtypes: float64(2), int64(1), object(2) memory usage: 19.8+ KB
all_sites.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 146 entries, 0 to 145 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FILM 146 non-null object 1 RottenTomatoes 146 non-null int64 2 RottenTomatoes_User 146 non-null int64 3 Metacritic 146 non-null int64 4 Metacritic_User 146 non-null float64 5 IMDB 146 non-null float64 6 Metacritic_user_vote_count 146 non-null int64 7 IMDB_user_vote_count 146 non-null int64 8 diff 146 non-null int64 9 task_need 146 non-null float64 dtypes: float64(3), int64(6), object(1) memory usage: 11.5+ KB
df = pd.merge(fandango, all_sites,how='inner', on='FILM')
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 145 entries, 0 to 144 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FILM 145 non-null object 1 STARS 145 non-null float64 2 RATING 145 non-null float64 3 VOTES 145 non-null int64 4 YEAR 145 non-null object 5 RottenTomatoes 145 non-null int64 6 RottenTomatoes_User 145 non-null int64 7 Metacritic 145 non-null int64 8 Metacritic_User 145 non-null float64 9 IMDB 145 non-null float64 10 Metacritic_user_vote_count 145 non-null int64 11 IMDB_user_vote_count 145 non-null int64 12 diff 145 non-null int64 13 task_need 145 non-null float64 dtypes: float64(5), int64(7), object(2) memory usage: 17.0+ KB
df.head(5)
FILM | STARS | RATING | VOTES | YEAR | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | diff | task_need | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 | 25 | 42 | 46 | 3.2 | 4.2 | 778 | 179506 | -17 | 32.1 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 | 71 | 81 | 59 | 7.0 | 7.3 | 1281 | 241807 | -10 | 25.1 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 | 72 | 85 | 72 | 6.6 | 7.4 | 850 | 251856 | -13 | 28.1 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 | 81 | 84 | 67 | 6.8 | 7.4 | 764 | 207211 | -3 | 18.1 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 | 98 | 90 | 94 | 8.9 | 8.6 | 807 | 96252 | 8 | 7.1 |
Notice that RT,Metacritic, and IMDB don't use a score between 0-5 stars like Fandango does. In order to do a fair comparison, we need to normalize these values so they all fall between 0-5 stars and the relationship between reviews stays the same.
TASK: Create new normalized columns for all ratings so they match up within the 0-5 star range shown on Fandango. There are many ways to do this.
df['RT_norm']=np.round(df['RottenTomatoes']/20, 1)
df['RTU_norm']=np.round(df['RottenTomatoes_User']/20, 1)
df['Meta_norm']=np.round(df['Metacritic']/20, 1)
df['MetaU_norm']=np.round(df['Metacritic_User']/2, 1)
df['IMDB_norm']=np.round(df['IMDB']/2, 1)
df.head()
FILM | STARS | RATING | VOTES | YEAR | RottenTomatoes | RottenTomatoes_User | Metacritic | Metacritic_User | IMDB | Metacritic_user_vote_count | IMDB_user_vote_count | diff | task_need | RT_norm | RTU_norm | Meta_norm | MetaU_norm | IMDB_norm | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Fifty Shades of Grey (2015) | 4.0 | 3.9 | 34846 | 2015 | 25 | 42 | 46 | 3.2 | 4.2 | 778 | 179506 | -17 | 32.1 | 1.2 | 2.1 | 2.3 | 1.6 | 2.1 |
1 | Jurassic World (2015) | 4.5 | 4.5 | 34390 | 2015 | 71 | 81 | 59 | 7.0 | 7.3 | 1281 | 241807 | -10 | 25.1 | 3.6 | 4.0 | 3.0 | 3.5 | 3.6 |
2 | American Sniper (2015) | 5.0 | 4.8 | 34085 | 2015 | 72 | 85 | 72 | 6.6 | 7.4 | 850 | 251856 | -13 | 28.1 | 3.6 | 4.2 | 3.6 | 3.3 | 3.7 |
3 | Furious 7 (2015) | 5.0 | 4.8 | 33538 | 2015 | 81 | 84 | 67 | 6.8 | 7.4 | 764 | 207211 | -3 | 18.1 | 4.0 | 4.2 | 3.4 | 3.4 | 3.7 |
4 | Inside Out (2015) | 4.5 | 4.5 | 15749 | 2015 | 98 | 90 | 94 | 8.9 | 8.6 | 807 | 96252 | 8 | 7.1 | 4.9 | 4.5 | 4.7 | 4.4 | 4.3 |
TASK: Now create a norm_scores DataFrame that only contains the normalizes ratings. Include both STARS and RATING from the original Fandango table.
norm_scores = df[['STARS', 'RATING', 'RT_norm', 'RTU_norm', 'Meta_norm', 'MetaU_norm', 'IMDB_norm']]
norm_scores
STARS | RATING | RT_norm | RTU_norm | Meta_norm | MetaU_norm | IMDB_norm | |
---|---|---|---|---|---|---|---|
0 | 4.0 | 3.9 | 1.2 | 2.1 | 2.3 | 1.6 | 2.1 |
1 | 4.5 | 4.5 | 3.6 | 4.0 | 3.0 | 3.5 | 3.6 |
2 | 5.0 | 4.8 | 3.6 | 4.2 | 3.6 | 3.3 | 3.7 |
3 | 5.0 | 4.8 | 4.0 | 4.2 | 3.4 | 3.4 | 3.7 |
4 | 4.5 | 4.5 | 4.9 | 4.5 | 4.7 | 4.4 | 4.3 |
... | ... | ... | ... | ... | ... | ... | ... |
140 | 3.5 | 3.5 | 4.4 | 3.2 | 3.4 | 3.2 | 3.4 |
141 | 4.0 | 3.6 | 4.8 | 4.0 | 4.4 | 3.2 | 3.5 |
142 | 4.5 | 4.2 | 4.6 | 4.2 | 3.4 | 3.5 | 3.9 |
143 | 4.0 | 3.9 | 4.8 | 4.3 | 4.3 | 3.6 | 3.7 |
144 | 3.5 | 3.1 | 3.0 | 2.3 | 3.4 | 2.9 | 3.2 |
145 rows × 7 columns
norm_scores.head(5)
STARS | RATING | RT_norm | RTU_norm | Meta_norm | MetaU_norm | IMDB_norm | |
---|---|---|---|---|---|---|---|
0 | 4.0 | 3.9 | 1.2 | 2.1 | 2.3 | 1.6 | 2.1 |
1 | 4.5 | 4.5 | 3.6 | 4.0 | 3.0 | 3.5 | 3.6 |
2 | 5.0 | 4.8 | 3.6 | 4.2 | 3.6 | 3.3 | 3.7 |
3 | 5.0 | 4.8 | 4.0 | 4.2 | 3.4 | 3.4 | 3.7 |
4 | 4.5 | 4.5 | 4.9 | 4.5 | 4.7 | 4.4 | 4.3 |
Now the moment of truth! Does Fandango display abnormally high ratings? We already know it pushs displayed RATING higher than STARS, but are the ratings themselves higher than average?
TASK: Create a plot comparing the distributions of normalized ratings across all sites. There are many ways to do this, but explore the Seaborn KDEplot docs for some simple ways to quickly show this. Don't worry if your plot format does not look exactly the same as ours, as long as the differences in distribution are clear.
def move_legend(ax, new_loc, **kws):
old_legend = ax.legend_
handles = old_legend.legendHandles
labels = [t.get_text() for t in old_legend.get_texts()]
title = old_legend.get_title().get_text()
ax.legend(handles, labels, loc=new_loc, title=title, **kws)
fig, ax = plt.subplots(figsize=(15,6),dpi=150)
sns.kdeplot(data=norm_scores,clip=[0,5],shade=True,palette='Set1', ax=ax)
move_legend(ax, "upper left")
Clearly Fandango has an uneven distribution. We can also see that RT critics have the most uniform distribution. Let's directly compare these two.
TASK: Create a KDE plot that compare the distribution of RT critic ratings against the STARS displayed by Fandango.
norm_scores.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 145 entries, 0 to 144 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 STARS 145 non-null float64 1 RATING 145 non-null float64 2 RT_norm 145 non-null float64 3 RTU_norm 145 non-null float64 4 Meta_norm 145 non-null float64 5 MetaU_norm 145 non-null float64 6 IMDB_norm 145 non-null float64 dtypes: float64(7) memory usage: 9.1 KB
fig, ax = plt.subplots(figsize=[12, 8], dpi=200)
sns.kdeplot(data=norm_scores[['RT_norm', 'STARS']], clip=[0, 5], shade=True, ax=ax)
move_legend(ax, 'upper left')
fig = plt.figure(figsize=[12, 8], dpi=200)
sns.kdeplot(norm_scores['RT_norm'], clip=[0, 5], shade=True, label='RT_norm')
sns.kdeplot(norm_scores[ 'STARS'], clip=[0, 5], shade=True, label='Stars')
plt.legend(loc=(0.01, 0.9))
plt.show()
OPTIONAL TASK: Create a histplot comparing all normalized scores.
norm_scores.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 145 entries, 0 to 144 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 STARS 145 non-null float64 1 RATING 145 non-null float64 2 RT_norm 145 non-null float64 3 RTU_norm 145 non-null float64 4 Meta_norm 145 non-null float64 5 MetaU_norm 145 non-null float64 6 IMDB_norm 145 non-null float64 dtypes: float64(7) memory usage: 9.1 KB
fig, ax= plt.subplots(figsize=[12, 8], dpi=200)
sns.histplot(data=norm_scores, ax=ax, bins=50)
move_legend(ax, "center left")
plt.show()
TASK: Create a clustermap visualization of all normalized scores. Note the differences in ratings, highly rated movies should be clustered together versus poorly rated movies. Note: This clustermap does not need to have the FILM titles as the index, feel free to drop it for the clustermap.
sns.clustermap(norm_scores);
sns.clustermap(norm_scores, col_cluster=False, cmap='magma');
TASK: Clearly Fandango is rating movies much higher than other sites, especially considering that it is then displaying a rounded up version of the rating. Let's examine the top 10 worst movies. Based off the Rotten Tomatoes Critic Ratings, what are the top 10 lowest rated movies? What are the normalized scores across all platforms for these movies? You may need to add the FILM column back in to your DataFrame of normalized scores to see the results.
norm_scores
STARS | RATING | RT_norm | RTU_norm | Meta_norm | MetaU_norm | IMDB_norm | |
---|---|---|---|---|---|---|---|
0 | 4.0 | 3.9 | 1.2 | 2.1 | 2.3 | 1.6 | 2.1 |
1 | 4.5 | 4.5 | 3.6 | 4.0 | 3.0 | 3.5 | 3.6 |
2 | 5.0 | 4.8 | 3.6 | 4.2 | 3.6 | 3.3 | 3.7 |
3 | 5.0 | 4.8 | 4.0 | 4.2 | 3.4 | 3.4 | 3.7 |
4 | 4.5 | 4.5 | 4.9 | 4.5 | 4.7 | 4.4 | 4.3 |
... | ... | ... | ... | ... | ... | ... | ... |
140 | 3.5 | 3.5 | 4.4 | 3.2 | 3.4 | 3.2 | 3.4 |
141 | 4.0 | 3.6 | 4.8 | 4.0 | 4.4 | 3.2 | 3.5 |
142 | 4.5 | 4.2 | 4.6 | 4.2 | 3.4 | 3.5 | 3.9 |
143 | 4.0 | 3.9 | 4.8 | 4.3 | 4.3 | 3.6 | 3.7 |
144 | 3.5 | 3.1 | 3.0 | 2.3 | 3.4 | 2.9 | 3.2 |
145 rows × 7 columns
norm_scores = df[['STARS','RATING','RT_norm','RTU_norm','Meta_norm','MetaU_norm','IMDB_norm','FILM']]
norm_scores
STARS | RATING | RT_norm | RTU_norm | Meta_norm | MetaU_norm | IMDB_norm | FILM | |
---|---|---|---|---|---|---|---|---|
0 | 4.0 | 3.9 | 1.2 | 2.1 | 2.3 | 1.6 | 2.1 | Fifty Shades of Grey (2015) |
1 | 4.5 | 4.5 | 3.6 | 4.0 | 3.0 | 3.5 | 3.6 | Jurassic World (2015) |
2 | 5.0 | 4.8 | 3.6 | 4.2 | 3.6 | 3.3 | 3.7 | American Sniper (2015) |
3 | 5.0 | 4.8 | 4.0 | 4.2 | 3.4 | 3.4 | 3.7 | Furious 7 (2015) |
4 | 4.5 | 4.5 | 4.9 | 4.5 | 4.7 | 4.4 | 4.3 | Inside Out (2015) |
... | ... | ... | ... | ... | ... | ... | ... | ... |
140 | 3.5 | 3.5 | 4.4 | 3.2 | 3.4 | 3.2 | 3.4 | Kumiko, The Treasure Hunter (2015) |
141 | 4.0 | 3.6 | 4.8 | 4.0 | 4.4 | 3.2 | 3.5 | The Diary of a Teenage Girl (2015) |
142 | 4.5 | 4.2 | 4.6 | 4.2 | 3.4 | 3.5 | 3.9 | The Wrecking Crew (2015) |
143 | 4.0 | 3.9 | 4.8 | 4.3 | 4.3 | 3.6 | 3.7 | Tangerine (2015) |
144 | 3.5 | 3.1 | 3.0 | 2.3 | 3.4 | 2.9 | 3.2 | Maps to the Stars (2015) |
145 rows × 8 columns
norm_scores.nsmallest(10, 'RT_norm')
STARS | RATING | RT_norm | RTU_norm | Meta_norm | MetaU_norm | IMDB_norm | FILM | |
---|---|---|---|---|---|---|---|---|
49 | 3.5 | 3.5 | 0.2 | 1.8 | 0.6 | 1.2 | 2.2 | Paul Blart: Mall Cop 2 (2015) |
25 | 4.5 | 4.1 | 0.4 | 2.3 | 1.3 | 2.3 | 3.0 | Taken 3 (2015) |
28 | 3.0 | 2.7 | 0.4 | 1.0 | 1.4 | 1.2 | 2.0 | Fantastic Four (2015) |
54 | 4.0 | 3.7 | 0.4 | 1.8 | 1.6 | 1.8 | 2.4 | Hot Pursuit (2015) |
84 | 4.0 | 3.9 | 0.4 | 2.4 | 1.4 | 1.6 | 3.0 | Hitman: Agent 47 (2015) |
50 | 4.0 | 3.6 | 0.5 | 1.8 | 1.5 | 2.8 | 2.3 | The Boy Next Door (2015) |
77 | 3.5 | 3.2 | 0.6 | 1.8 | 1.5 | 2.0 | 2.8 | Seventh Son (2015) |
78 | 3.5 | 3.2 | 0.6 | 1.5 | 1.4 | 1.6 | 2.8 | Mortdecai (2015) |
83 | 3.5 | 3.3 | 0.6 | 1.7 | 1.6 | 2.5 | 2.8 | Sinister 2 (2015) |
87 | 3.5 | 3.2 | 0.6 | 1.4 | 1.6 | 1.9 | 2.7 | Unfinished Business (2015) |
norm_scores.sort_values('RT_norm', ascending = True).head(10)
# norm_scores.sort_values('RT_norm', ascending = False).tail(10)
STARS | RATING | RT_norm | RTU_norm | Meta_norm | MetaU_norm | IMDB_norm | FILM | |
---|---|---|---|---|---|---|---|---|
49 | 3.5 | 3.5 | 0.2 | 1.8 | 0.6 | 1.2 | 2.2 | Paul Blart: Mall Cop 2 (2015) |
25 | 4.5 | 4.1 | 0.4 | 2.3 | 1.3 | 2.3 | 3.0 | Taken 3 (2015) |
28 | 3.0 | 2.7 | 0.4 | 1.0 | 1.4 | 1.2 | 2.0 | Fantastic Four (2015) |
84 | 4.0 | 3.9 | 0.4 | 2.4 | 1.4 | 1.6 | 3.0 | Hitman: Agent 47 (2015) |
54 | 4.0 | 3.7 | 0.4 | 1.8 | 1.6 | 1.8 | 2.4 | Hot Pursuit (2015) |
50 | 4.0 | 3.6 | 0.5 | 1.8 | 1.5 | 2.8 | 2.3 | The Boy Next Door (2015) |
109 | 3.0 | 2.9 | 0.6 | 1.0 | 1.8 | 2.7 | 2.3 | The Vatican Tapes (2015) |
88 | 4.0 | 3.6 | 0.6 | 2.0 | 1.2 | 1.2 | 3.2 | The Loft (2015) |
87 | 3.5 | 3.2 | 0.6 | 1.4 | 1.6 | 1.9 | 2.7 | Unfinished Business (2015) |
83 | 3.5 | 3.3 | 0.6 | 1.7 | 1.6 | 2.5 | 2.8 | Sinister 2 (2015) |
FINAL TASK: Visualize the distribution of ratings across all sites for the top 10 worst movies.
norm_scores
STARS | RATING | RT_norm | RTU_norm | Meta_norm | MetaU_norm | IMDB_norm | FILM | |
---|---|---|---|---|---|---|---|---|
0 | 4.0 | 3.9 | 1.2 | 2.1 | 2.3 | 1.6 | 2.1 | Fifty Shades of Grey (2015) |
1 | 4.5 | 4.5 | 3.6 | 4.0 | 3.0 | 3.5 | 3.6 | Jurassic World (2015) |
2 | 5.0 | 4.8 | 3.6 | 4.2 | 3.6 | 3.3 | 3.7 | American Sniper (2015) |
3 | 5.0 | 4.8 | 4.0 | 4.2 | 3.4 | 3.4 | 3.7 | Furious 7 (2015) |
4 | 4.5 | 4.5 | 4.9 | 4.5 | 4.7 | 4.4 | 4.3 | Inside Out (2015) |
... | ... | ... | ... | ... | ... | ... | ... | ... |
140 | 3.5 | 3.5 | 4.4 | 3.2 | 3.4 | 3.2 | 3.4 | Kumiko, The Treasure Hunter (2015) |
141 | 4.0 | 3.6 | 4.8 | 4.0 | 4.4 | 3.2 | 3.5 | The Diary of a Teenage Girl (2015) |
142 | 4.5 | 4.2 | 4.6 | 4.2 | 3.4 | 3.5 | 3.9 | The Wrecking Crew (2015) |
143 | 4.0 | 3.9 | 4.8 | 4.3 | 4.3 | 3.6 | 3.7 | Tangerine (2015) |
144 | 3.5 | 3.1 | 3.0 | 2.3 | 3.4 | 2.9 | 3.2 | Maps to the Stars (2015) |
145 rows × 8 columns
print('\n\n\n\n\n')
# This signmifies some space before the plot or whatever comes next
plt.figure(figsize=[12, 8], dpi=200)
worst_films= norm_scores.nsmallest(10, 'RT_norm').drop('FILM', axis=1)
sns.kdeplot(data= worst_films, shade=True, clip=[0, 5])
plt.title("Ratings for RT Critics' Worst Reviewed Films")
plt.show()
Final thoughts: Wow! Fandango is showing around 3-4 star ratings for films that are clearly bad! Notice the biggest offender, Taken 3!. Fandango is displaying 4.5 stars on their site for a film with an average rating of 1.86 across the other platforms!
norm_scores.iloc[25]
STARS 4.5 RATING 4.1 RT_norm 0.4 RTU_norm 2.3 Meta_norm 1.3 MetaU_norm 2.3 IMDB_norm 3.0 FILM Taken 3 (2015) Name: 25, dtype: object