#!/usr/bin/env python # coding: utf-8 # In[ ]: get_ipython().run_line_magic('pip', 'install openai python-dotenv') # In[ ]: import os from openai import OpenAI from dotenv import load_dotenv import numpy as np load_dotenv() API_KEY = os.getenv("OPENAI_API_KEY","") assert API_KEY, "ERROR: OpenAI Key is missing" client = OpenAI( api_key=API_KEY ) # In[ ]: # Dependencies for embeddings_utils get_ipython().run_line_magic('pip', 'install matplotlib plotly scikit-learn pandas') # In[ ]: def cosine_similarity(a, b): return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) # In[ ]: text = 'the quick brown fox jumped over the lazy dog' model = 'text-embedding-ada-002' client.embeddings.create(input = [text], model=model).data[0].embedding # In[ ]: # compare several words automobile_embedding = client.embeddings.create(input = 'automobile', model=model).data[0].embedding vehicle_embedding = client.embeddings.create(input = 'vehicle', model=model).data[0].embedding dinosaur_embedding = client.embeddings.create(input = 'dinosaur', model=model).data[0].embedding stick_embedding = client.embeddings.create(input = 'stick', model=model).data[0].embedding # comparing cosine similarity, automobiles vs automobiles should be 1.0, i.e exactly the same, while automobiles vs dinosaurs should be between 0 and 1, i.e. not the same print(cosine_similarity(automobile_embedding, automobile_embedding)) print(cosine_similarity(automobile_embedding, vehicle_embedding)) print(cosine_similarity(automobile_embedding, dinosaur_embedding)) print(cosine_similarity(automobile_embedding, stick_embedding))