!wget -q --show-progress http://files.grouplens.org/datasets/movielens/ml-latest-small.zip !unzip ml-latest-small.zip import matplotlib.pyplot as plt import pandas as pd import zipfile import urllib.request import sys import os DOWNLOAD_DESTINATION_DIR = '/content/ml-latest-small' ratings_path = os.path.join(DOWNLOAD_DESTINATION_DIR, 'ratings.csv') ratings = pd.read_csv( ratings_path, sep=',', names=["userid", "itemid", "rating", "timestamp"], skiprows=1 ) movies_path = os.path.join(DOWNLOAD_DESTINATION_DIR, 'movies.csv') movies = pd.read_csv( movies_path, sep=',', names=["itemid", "title", "genres"], encoding='latin-1', skiprows=1 ) ratings.head() movies.head() ratings.groupby('rating').size().plot(kind='bar') movie_means = ratings.join(movies['title'], on='itemid').groupby('title').rating.mean() movie_means[:50].plot(kind='bar', grid=True, figsize=(16,6), title="mean ratings of 50 movies") fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16,4), sharey=True) movie_means.nlargest(30).plot(kind='bar', ax=ax1, title="Top 30 movies in data set") movie_means.nsmallest(30).plot(kind='bar', ax=ax2, title="Bottom 30 movies in data set") !pip install -q watermark %reload_ext watermark %watermark -a "Sparsh A." -m -iv -u -t -d