!pip install -q tensorflow_text
import numpy as np
import pandas as pd
import nltk
import json
import re
import csv
import pickle
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_similarity
import tensorflow_hub as hub
import tensorflow_text
data = pd.read_json('https://raw.githubusercontent.com/sparsh-ai/reco-data/master/books.json', lines=True)
data.head()
df = data[['title', 'authors', 'isbn','shortDescription','thumbnailUrl']].copy()
df['authors'] = df['authors'].str[0]
df.dropna(subset = ["shortDescription"], inplace=True)
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual/3")
full_data = df.to_dict('records')
# add vectors to the data
for element in full_data:
element['vector'] = embed(element['shortDescription'])[0]
vectors = [item['vector'] for item in full_data]
X = np.array(vectors)
# calculate similarity based on Euclidean distance
sim = euclidean_distances(X)
indices = np.vstack([np.argsort(-arr) for arr in sim])
# calculate similarity based on cosine distance
cos_sim = cosine_similarity(X)
cos_indices = np.vstack([np.argsort(-arr) for arr in cos_sim])
# find top-k most similar books for each case
topk = 20
for i, book in enumerate(full_data):
book['euclidean'] = indices[i][1:topk+1]
book['cosine'] = cos_indices[i][1:topk+1]
# remove vectors from dict
for book in full_data:
book.pop('vector')
full_data[0]
# save the data
with open('model_01.pkl', 'wb') as f:
pickle.dump(full_data, f)
%%writefile Procfile
web: gunicorn app:app
!mkdir templates static
!wget -O ./static/logo.png https://images-platform.99static.com//JY78phRJ6tT1yo1QGGfhZOVlrAw=/68x2062:932x2926/fit-in/500x500/99designs-contests-attachments/87/87917/attachment_87917977
%%writefile ./templates/index.html
{% if book_selected is defined %}
SELECTED BOOK

{% endif %}
{% if similar_books is defined %}
Here are your other reading suggestions:
{% for book in similar_books %}
{% endfor %}
{% endif %}
!pip install flask_ngrok
from flask_ngrok import run_with_ngrok
%%writefile app.py
import pickle
from flask import Flask, request, render_template, jsonify
from flask_ngrok import run_with_ngrok
import numpy as np
app = Flask(__name__)
run_with_ngrok(app)
# load data and extract all the vectors
with open('model_01.pkl', 'rb') as f:
book_data = pickle.load(f)
list_books = sorted([book['title'] for book in book_data])
isbn_list = [item['isbn'] for item in book_data]
@app.route("/", methods=['GET', 'POST'])
def template_test():
if request.method == 'POST':
selected_title = request.form.get('selected_title')
selected_metric = request.form.get('selected_metric')
selected_book = next(item for item in book_data if item['title'] == selected_title)
similar_books = [book_data[i] for i in selected_book[selected_metric]]
return render_template('index.html',
list_books=list_books,
book_selected=selected_book,
similar_books=similar_books[:6])
else:
return render_template('index.html', list_books=list_books)
@app.route("/recommendations", methods=['GET'])
def get_recommendations():
isbn = request.args.get('isbn', default=None, type=str)
num_reco = request.args.get("number", default=5, type=int)
distance = request.args.get("distance", default="cosine", type=str)
field = request.args.get("field", default="isbn", type=str)
if not isbn:
return jsonify("Missing ISBN for the book"), 400
elif distance not in ["cosine", "euclidean"]:
return jsonify("Distance can only be cosine or euclidean"), 400
elif num_reco not in range(1, 21):
return jsonify("Can only request between 1 and 20 books"), 400
elif isbn not in isbn_list:
return jsonify("ISBN not in supported books"), 400
elif field not in book_data[0].keys():
return jsonify("Field not available in the data"), 400
else:
try:
selected_book = next(item for item in book_data if item['isbn'] == isbn)
similar_books = [book_data[i][field] for i in selected_book[distance]]
return jsonify(similar_books[:num_reco]), 200
except Exception as e:
return jsonify(str(e)), 500
if __name__ == '__main__':
app.run()
!python app.py