#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from sklearn.datasets import load_iris from sklearn.metrics.pairwise import cosine_similarity as skcosine_similarity # In[2]: def cosine_similarity(X, Y=None): X_normalized = X / np.sqrt(np.sum(np.square(X), axis=1))[:, np.newaxis] if Y is None: Y_normalized = X_normalized else: Y_normalized = Y / np.sqrt(np.sum(np.square(Y), axis=1))[:, np.newaxis] return np.dot(X_normalized, Y_normalized.T) # In[3]: X, _ = load_iris(return_X_y=True) ans1 = cosine_similarity(X) ans2 = skcosine_similarity(X) assert np.allclose(ans1, ans2) ans1 = cosine_similarity(X[:100], X[100:]) ans2 = skcosine_similarity(X[:100], X[100:]) assert np.allclose(ans1, ans2)