#!/usr/bin/env python # coding: utf-8 # # Using Weaviate with Generative OpenAI module for Generative Search # # This notebook is prepared for a scenario where: # * Your data is already in Weaviate # * You want to use Weaviate with the Generative OpenAI module ([generative-openai](https://weaviate.io/developers/weaviate/modules/reader-generator-modules/generative-openai)). # # # ## Prerequisites # # This cookbook only coveres Generative Search examples, however, it doesn't cover the configuration and data imports. # # In order to make the most of this cookbook, please complete the [Getting Started cookbook](./getting-started-with-weaviate-and-openai.ipynb) first, where you will learn the essentials of working with Weaviate and import the demo data. # # Checklist: # * completed [Getting Started cookbook](./getting-started-with-weaviate-and-openai.ipynb), # * crated a `Weaviate` instance, # * imported data into your `Weaviate` instance, # * you have an [OpenAI API key](https://beta.openai.com/account/api-keys) # =========================================================== # ## Prepare your OpenAI API key # # The `OpenAI API key` is used for vectorization of your data at import, and for running queries. # # If you don't have an OpenAI API key, you can get one from [https://beta.openai.com/account/api-keys](https://beta.openai.com/account/api-keys). # # Once you get your key, please add it to your environment variables as `OPENAI_API_KEY`. # In[ ]: # Export OpenAI API Key get_ipython().system('export OPENAI_API_KEY="your key"') # In[ ]: # Test that your OpenAI API key is correctly set as an environment variable # Note. if you run this notebook locally, you will need to reload your terminal and the notebook for the env variables to be live. import os # Note. alternatively you can set a temporary env variable like this: # os.environ["OPENAI_API_KEY"] = 'your-key-goes-here' if os.getenv("OPENAI_API_KEY") is not None: print ("OPENAI_API_KEY is ready") else: print ("OPENAI_API_KEY environment variable not found") # ## Connect to your Weaviate instance # # In this section, we will: # # 1. test env variable `OPENAI_API_KEY` – **make sure** you completed the step in [#Prepare-your-OpenAI-API-key](#Prepare-your-OpenAI-API-key) # 2. connect to your Weaviate with your `OpenAI API Key` # 3. and test the client connection # # ### The client # # After this step, the `client` object will be used to perform all Weaviate-related operations. # In[ ]: import weaviate from datasets import load_dataset import os # Connect to your Weaviate instance client = weaviate.Client( url="https://your-wcs-instance-name.weaviate.network/", # url="http://localhost:8080/", auth_client_secret=weaviate.auth.AuthApiKey(api_key=""), # comment out this line if you are not using authentication for your Weaviate instance (i.e. for locally deployed instances) additional_headers={ "X-OpenAI-Api-Key": os.getenv("OPENAI_API_KEY") } ) # Check if your instance is live and ready # This should return `True` client.is_ready() # ## Generative Search # Weaviate offers a [Generative Search OpenAI](https://weaviate.io/developers/weaviate/modules/reader-generator-modules/generative-openai) module, which generates responses based on the data stored in your Weaviate instance. # # The way you construct a generative search query is very similar to a standard semantic search query in Weaviate. # # For example: # * search in "Articles", # * return "title", "content", "url" # * look for objects related to "football clubs" # * limit results to 5 objects # # ``` # result = ( # client.query # .get("Articles", ["title", "content", "url"]) # .with_near_text("concepts": "football clubs") # .with_limit(5) # # generative query will go here # .do() # ) # ``` # # Now, you can add `with_generate()` function to apply generative transformation. `with_generate` takes either: # - `single_prompt` - to generate a response for each returned object, # - `grouped_task` – to generate a single response from all returned objects. # # In[ ]: def generative_search_per_item(query, collection_name): prompt = "Summarize in a short tweet the following content: {content}" result = ( client.query .get(collection_name, ["title", "content", "url"]) .with_near_text({ "concepts": [query], "distance": 0.7 }) .with_limit(5) .with_generate(single_prompt=prompt) .do() ) # Check for errors if ("errors" in result): print ("\033[91mYou probably have run out of OpenAI API calls for the current minute – the limit is set at 60 per minute.") raise Exception(result["errors"][0]['message']) return result["data"]["Get"][collection_name] # In[ ]: query_result = generative_search_per_item("football clubs", "Article") for i, article in enumerate(query_result): print(f"{i+1}. { article['title']}") print(article['_additional']['generate']['singleResult']) # print generated response print("-----------------------") # In[79]: def generative_search_group(query, collection_name): generateTask = "Explain what these have in common" result = ( client.query .get(collection_name, ["title", "content", "url"]) .with_near_text({ "concepts": [query], "distance": 0.7 }) .with_generate(grouped_task=generateTask) .with_limit(5) .do() ) # Check for errors if ("errors" in result): print ("\033[91mYou probably have run out of OpenAI API calls for the current minute – the limit is set at 60 per minute.") raise Exception(result["errors"][0]['message']) return result["data"]["Get"][collection_name] # In[ ]: query_result = generative_search_group("football clubs", "Article") print (query_result[0]['_additional']['generate']['groupedResult']) # Thanks for following along, you're now equipped to set up your own vector databases and use embeddings to do all kinds of cool things - enjoy! For more complex use cases please continue to work through other cookbook examples in this repo.