#!/usr/bin/env python # coding: utf-8 # # Building Semantic Memory with Embeddings # # So far, we've mostly been treating the kernel as a stateless orchestration engine. # We send text into a model API and receive text out. # # In a [previous notebook](04-kernel-arguments-chat.ipynb), we used `kernel arguments` to pass in additional # text into prompts to enrich them with more data. This allowed us to create a basic chat experience. # # However, if you solely relied on kernel arguments, you would quickly realize that eventually your prompt # would grow so large that you would run into the model's token limit. What we need is a way to persist state # and build both short-term and long-term memory to empower even more intelligent applications. # # To do this, we dive into the key concept of `Semantic Memory` in the Semantic Kernel. # # Import Semantic Kernel SDK from pypi.org and other dependencies for this example. # In[ ]: # Note: if using a virtual environment, do not run this cell get_ipython().run_line_magic('pip', 'install -U semantic-kernel[azure]') from semantic_kernel import __version__ __version__ # Initial configuration for the notebook to run properly. # In[ ]: # Make sure paths are correct for the imports import os import sys notebook_dir = os.path.abspath("") parent_dir = os.path.dirname(notebook_dir) grandparent_dir = os.path.dirname(parent_dir) sys.path.append(grandparent_dir) # ### Configuring the Kernel # # Let's get started with the necessary configuration to run Semantic Kernel. For Notebooks, we require a `.env` file with the proper settings for the model you use. Create a new file named `.env` and place it in this directory. Copy the contents of the `.env.example` file from this directory and paste it into the `.env` file that you just created. # # **NOTE: Please make sure to include `GLOBAL_LLM_SERVICE` set to either OpenAI, AzureOpenAI, or HuggingFace in your .env file. If this setting is not included, the Service will default to AzureOpenAI.** # # #### Option 1: using OpenAI # # Add your [OpenAI Key](https://openai.com/product/) key to your `.env` file (org Id only if you have multiple orgs): # # ``` # GLOBAL_LLM_SERVICE="OpenAI" # OPENAI_API_KEY="sk-..." # OPENAI_ORG_ID="" # OPENAI_CHAT_MODEL_ID="" # OPENAI_TEXT_MODEL_ID="" # OPENAI_EMBEDDING_MODEL_ID="" # ``` # The names should match the names used in the `.env` file, as shown above. # # #### Option 2: using Azure OpenAI # # Add your [Azure Open AI Service key](https://learn.microsoft.com/azure/cognitive-services/openai/quickstart?pivots=programming-language-studio) settings to the `.env` file in the same folder: # # ``` # GLOBAL_LLM_SERVICE="AzureOpenAI" # AZURE_OPENAI_API_KEY="..." # AZURE_OPENAI_ENDPOINT="https://..." # AZURE_OPENAI_CHAT_DEPLOYMENT_NAME="..." # AZURE_OPENAI_TEXT_DEPLOYMENT_NAME="..." # AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME="..." # AZURE_OPENAI_API_VERSION="..." # ``` # The names should match the names used in the `.env` file, as shown above. # # For more advanced configuration, please follow the steps outlined in the [setup guide](./CONFIGURING_THE_KERNEL.md). # We will load our settings and get the LLM service to use for the notebook. # In[ ]: from services import Service from samples.service_settings import ServiceSettings service_settings = ServiceSettings.create() # Select a service to use for this notebook (available services: OpenAI, AzureOpenAI, HuggingFace) selectedService = ( Service.AzureOpenAI if service_settings.global_llm_service is None else Service(service_settings.global_llm_service.lower()) ) print(f"Using service type: {selectedService}") # In order to use memory, we need to instantiate the Kernel with a Memory Storage # and an Embedding service. In this example, we make use of the `VolatileMemoryStore` which can be thought of as a temporary in-memory storage. This memory is not written to disk and is only available during the app session. # # When developing your app you will have the option to plug in persistent storage like Azure AI Search, Azure Cosmos Db, PostgreSQL, SQLite, etc. Semantic Memory allows also to index external data sources, without duplicating all the information as you will see further down in this notebook. # # In[ ]: from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion from semantic_kernel.connectors.ai.open_ai.services.azure_text_embedding import AzureTextEmbedding from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding from semantic_kernel.core_plugins.text_memory_plugin import TextMemoryPlugin from semantic_kernel.kernel import Kernel from semantic_kernel.memory.semantic_text_memory import SemanticTextMemory from semantic_kernel.memory.volatile_memory_store import VolatileMemoryStore kernel = Kernel() chat_service_id = "chat" # Configure AI service used by the kernel if selectedService == Service.AzureOpenAI: azure_chat_service = AzureChatCompletion( service_id=chat_service_id, ) embedding_gen = AzureTextEmbedding( service_id="embedding", ) kernel.add_service(azure_chat_service) kernel.add_service(embedding_gen) elif selectedService == Service.OpenAI: oai_chat_service = OpenAIChatCompletion( service_id=chat_service_id, ) embedding_gen = OpenAITextEmbedding( service_id="embedding", ) kernel.add_service(oai_chat_service) kernel.add_service(embedding_gen) memory = SemanticTextMemory(storage=VolatileMemoryStore(), embeddings_generator=embedding_gen) kernel.add_plugin(TextMemoryPlugin(memory), "TextMemoryPlugin") # At its core, Semantic Memory is a set of data structures that allow you to store the meaning of text that come from different data sources, and optionally to store the source text too. These texts can be from the web, e-mail providers, chats, a database, or from your local directory, and are hooked up to the Semantic Kernel through data source connectors. # # The texts are embedded or compressed into a vector of floats representing mathematically the texts' contents and meaning. You can read more about embeddings [here](https://aka.ms/sk/embeddings). # # ### Manually adding memories # # Let's create some initial memories "About Me". We can add memories to our `VolatileMemoryStore` by using `SaveInformationAsync` # # In[ ]: collection_id = "generic" async def populate_memory(memory: SemanticTextMemory) -> None: # Add some documents to the semantic memory await memory.save_information(collection=collection_id, id="info1", text="Your budget for 2024 is $100,000") await memory.save_information(collection=collection_id, id="info2", text="Your savings from 2023 are $50,000") await memory.save_information(collection=collection_id, id="info3", text="Your investments are $80,000") # In[ ]: await populate_memory(memory) # Let's try searching the memory: # # In[ ]: async def search_memory_examples(memory: SemanticTextMemory) -> None: questions = [ "What is my budget for 2024?", "What are my savings from 2023?", "What are my investments?", ] for question in questions: print(f"Question: {question}") result = await memory.search(collection_id, question) print(f"Answer: {result[0].text}\n") # In[ ]: await search_memory_examples(memory) # Let's now revisit the our chat sample from the [previous notebook](04-kernel-arguments-chat.ipynb). # If you remember, we used kernel arguments to fill the prompt with a `history` that continuously got populated as we chatted with the bot. Let's add also memory to it! # # This is done by using the `TextMemoryPlugin` which exposes the `recall` native function. # # `recall` takes an input ask and performs a similarity search on the contents that have # been embedded in the Memory Store and returns the most relevant memory. # # In[ ]: from semantic_kernel.functions import KernelFunction from semantic_kernel.prompt_template import PromptTemplateConfig async def setup_chat_with_memory( kernel: Kernel, service_id: str, ) -> KernelFunction: prompt = """ ChatBot can have a conversation with you about any topic. It can give explicit instructions or say 'I don't know' if it does not have an answer. Information about me, from previous conversations: - {{recall 'budget by year'}} What is my budget for 2024? - {{recall 'savings from previous year'}} What are my savings from 2023? - {{recall 'investments'}} What are my investments? {{$request}} """.strip() prompt_template_config = PromptTemplateConfig( template=prompt, execution_settings={ service_id: kernel.get_service(service_id).get_prompt_execution_settings_class()(service_id=service_id) }, ) return kernel.add_function( function_name="chat_with_memory", plugin_name="chat", prompt_template_config=prompt_template_config, ) # The `RelevanceParam` is used in memory search and is a measure of the relevance score from 0.0 to 1.0, where 1.0 means a perfect match. We encourage users to experiment with different values. # # Now that we've included our memories, let's chat! # # In[ ]: print("Populating memory...") await populate_memory(memory) print("Asking questions... (manually)") await search_memory_examples(memory) print("Setting up a chat (with memory!)") chat_func = await setup_chat_with_memory(kernel, chat_service_id) print("Begin chatting (type 'exit' to exit):\n") print( "Welcome to the chat bot!\ \n Type 'exit' to exit.\ \n Try asking a question about your finances (i.e. \"talk to me about my finances\")." ) async def chat(user_input: str): print(f"User: {user_input}") answer = await kernel.invoke(chat_func, request=user_input) print(f"ChatBot:> {answer}") # In[ ]: await chat("What is my budget for 2024?") # In[ ]: await chat("talk to me about my finances") # ### Adding documents to your memory # # Many times in your applications you'll want to bring in external documents into your memory. Let's see how we can do this using our VolatileMemoryStore. # # Let's first get some data using some of the links in the Semantic Kernel repo. # # In[ ]: github_files = {} github_files["https://github.com/microsoft/semantic-kernel/blob/main/README.md"] = ( "README: Installation, getting started, and how to contribute" ) github_files[ "https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks/02-running-prompts-from-file.ipynb" ] = "Jupyter notebook describing how to pass prompts from a file to a semantic plugin or function" github_files["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks/00-getting-started.ipynb"] = ( "Jupyter notebook describing how to get started with the Semantic Kernel" ) github_files["https://github.com/microsoft/semantic-kernel/tree/main/samples/plugins/ChatPlugin/ChatGPT"] = ( "Sample demonstrating how to create a chat plugin interfacing with ChatGPT" ) github_files[ "https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/Volatile/VolatileMemoryStore.cs" ] = "C# class that defines a volatile embedding store" # Now let's add these files to our VolatileMemoryStore using `SaveReferenceAsync`. We'll separate these memories from the chat memories by putting them in a different collection. # # In[ ]: memory_collection_name = "SKGitHub" print("Adding some GitHub file URLs and their descriptions to a volatile Semantic Memory.") for index, (entry, value) in enumerate(github_files.items()): await memory.save_reference( collection=memory_collection_name, description=value, text=value, external_id=entry, external_source_name="GitHub", ) print(" URL {} saved".format(index)) # In[ ]: ask = "I love Jupyter notebooks, how should I get started?" print("===========================\n" + "Query: " + ask + "\n") memories = await memory.search(memory_collection_name, ask, limit=5, min_relevance_score=0.77) for index, memory in enumerate(memories): print(f"Result {index}:") print(" URL: : " + memory.id) print(" Title : " + memory.description) print(" Relevance: " + str(memory.relevance)) print() # Now you might be wondering what happens if you have so much data that it doesn't fit into your RAM? That's where you want to make use of an external Vector Database made specifically for storing and retrieving embeddings. Fortunately, semantic kernel makes this easy thanks to an extensive list of available connectors. In the following section, we will connect to an existing Azure AI Search service that we will use as an external Vector Database to store and retrieve embeddings. # # _Please note you will need an AzureAI Search api_key or token credential and endpoint for the following example to work properly._ # In[ ]: from semantic_kernel.connectors.memory.azure_cognitive_search import AzureCognitiveSearchMemoryStore acs_memory_store = AzureCognitiveSearchMemoryStore(vector_size=1536) memory = SemanticTextMemory(storage=acs_memory_store, embeddings_generator=embedding_gen) kernel.add_plugin(TextMemoryPlugin(memory), "TextMemoryPluginACS") # The implementation of Semantic Kernel allows to easily swap memory store for another. Here, we will re-use the functions we initially created for `VolatileMemoryStore` with our new external Vector Store leveraging Azure AI Search # # In[ ]: await populate_memory(memory) # Let's now try to query from Azure AI Search! # # In[ ]: await search_memory_examples(memory) # We have laid the foundation which will allow us to store an arbitrary amount of data in an external Vector Store above and beyond what could fit in memory at the expense of a little more latency. #