#!/usr/bin/env python # coding: utf-8 # # Multiple Results # # In this notebook we show how you can in a single request, have the LLM model return multiple results per prompt. This is useful for running experiments where you want to evaluate the robustness of your prompt and the parameters of your config against a particular large language model. # # Import Semantic Kernel SDK from pypi.org # In[ ]: # Note: if using a virtual environment, do not run this cell get_ipython().run_line_magic('pip', 'install -U semantic-kernel') from semantic_kernel import __version__ __version__ # Initial configuration for the notebook to run properly. # In[ ]: # Make sure paths are correct for the imports import os import sys notebook_dir = os.path.abspath("") parent_dir = os.path.dirname(notebook_dir) grandparent_dir = os.path.dirname(parent_dir) sys.path.append(grandparent_dir) # ### Configuring the Kernel # # Let's get started with the necessary configuration to run Semantic Kernel. For Notebooks, we require a `.env` file with the proper settings for the model you use. Create a new file named `.env` and place it in this directory. Copy the contents of the `.env.example` file from this directory and paste it into the `.env` file that you just created. # # **NOTE: Please make sure to include `GLOBAL_LLM_SERVICE` set to either OpenAI, AzureOpenAI, or HuggingFace in your .env file. If this setting is not included, the Service will default to AzureOpenAI.** # # #### Option 1: using OpenAI # # Add your [OpenAI Key](https://openai.com/product/) key to your `.env` file (org Id only if you have multiple orgs): # # ``` # GLOBAL_LLM_SERVICE="OpenAI" # OPENAI_API_KEY="sk-..." # OPENAI_ORG_ID="" # OPENAI_CHAT_MODEL_ID="" # OPENAI_TEXT_MODEL_ID="" # OPENAI_EMBEDDING_MODEL_ID="" # ``` # The names should match the names used in the `.env` file, as shown above. # # #### Option 2: using Azure OpenAI # # Add your [Azure Open AI Service key](https://learn.microsoft.com/azure/cognitive-services/openai/quickstart?pivots=programming-language-studio) settings to the `.env` file in the same folder: # # ``` # GLOBAL_LLM_SERVICE="AzureOpenAI" # AZURE_OPENAI_API_KEY="..." # AZURE_OPENAI_ENDPOINT="https://..." # AZURE_OPENAI_CHAT_DEPLOYMENT_NAME="..." # AZURE_OPENAI_TEXT_DEPLOYMENT_NAME="..." # AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME="..." # AZURE_OPENAI_API_VERSION="..." # ``` # The names should match the names used in the `.env` file, as shown above. # # For more advanced configuration, please follow the steps outlined in the [setup guide](./CONFIGURING_THE_KERNEL.md). # We will load our settings and get the LLM service to use for the notebook. # In[ ]: from services import Service from samples.service_settings import ServiceSettings service_settings = ServiceSettings.create() # Select a service to use for this notebook (available services: OpenAI, AzureOpenAI, HuggingFace) selectedService = ( Service.AzureOpenAI if service_settings.global_llm_service is None else Service(service_settings.global_llm_service.lower()) ) print(f"Using service type: {selectedService}") # First, we will set up the text and chat services we will be submitting prompts to. # # In[ ]: from semantic_kernel import Kernel from semantic_kernel.connectors.ai.open_ai import ( AzureChatCompletion, AzureChatPromptExecutionSettings, # noqa: F401 AzureTextCompletion, OpenAIChatCompletion, OpenAIChatPromptExecutionSettings, # noqa: F401 OpenAITextCompletion, OpenAITextPromptExecutionSettings, # noqa: F401 ) kernel = Kernel() # Configure Azure LLM service service_id = None if selectedService == Service.OpenAI: from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion service_id = "default" oai_chat_service = OpenAIChatCompletion( service_id="oai_chat", ) oai_text_service = OpenAITextCompletion( service_id="oai_text", ) elif selectedService == Service.AzureOpenAI: from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion service_id = "default" aoai_chat_service = AzureChatCompletion( service_id="aoai_chat", ) aoai_text_service = AzureTextCompletion( service_id="aoai_text", ) # Configure Hugging Face service if selectedService == Service.HuggingFace: from semantic_kernel.connectors.ai.hugging_face import ( # noqa: F401 HuggingFacePromptExecutionSettings, HuggingFaceTextCompletion, ) hf_text_service = HuggingFaceTextCompletion(service_id="hf_text", ai_model_id="distilgpt2", task="text-generation") # Next, we'll set up the completion request settings for text completion services. # # In[ ]: oai_text_prompt_execution_settings = OpenAITextPromptExecutionSettings( service="oai_text", extension_data={ "max_tokens": 80, "temperature": 0.7, "top_p": 1, "frequency_penalty": 0.5, "presence_penalty": 0.5, "number_of_responses": 3, }, ) # ## Multiple Open AI Text Completions # # In[ ]: if selectedService == Service.OpenAI: prompt = "What is the purpose of a rubber duck?" results = await oai_text_service.get_text_contents(prompt=prompt, settings=oai_text_prompt_execution_settings) for i, result in enumerate(results): print(f"Result {i + 1}: {result}") # ## Multiple Azure Open AI Text Completions # # In[ ]: if selectedService == Service.AzureOpenAI: prompt = "provide me a list of possible meanings for the acronym 'ORLD'" results = await aoai_text_service.get_text_contents(prompt=prompt, settings=oai_text_prompt_execution_settings) for i, result in enumerate(results): print(f"Result {i + 1}: {result}") # ## Multiple Hugging Face Text Completions # # In[ ]: if selectedService == Service.HuggingFace: hf_prompt_execution_settings = HuggingFacePromptExecutionSettings( service_id="hf_text", extension_data={"max_new_tokens": 80, "temperature": 0.7, "top_p": 1, "num_return_sequences": 3}, ) # In[ ]: if selectedService == Service.HuggingFace: prompt = "The purpose of a rubber duck is" results = await hf_text_service.get_text_contents(prompt=prompt, settings=hf_prompt_execution_settings) for i, result in enumerate(results): print(f"Result {i + 1}: {result}") # Here, we're setting up the settings for Chat completions. # # In[ ]: oai_chat_prompt_execution_settings = OpenAIChatPromptExecutionSettings( service_id="oai_chat", max_tokens=80, temperature=0.7, top_p=1, frequency_penalty=0.5, presence_penalty=0.5, number_of_responses=3, ) # ## Multiple OpenAI Chat Completions # # In[ ]: from semantic_kernel.contents import ChatHistory if selectedService == Service.OpenAI: chat = ChatHistory() chat.add_user_message( "It's a beautiful day outside, birds are singing, flowers are blooming. On days like these, kids like you..." ) results = await oai_chat_service.get_chat_message_contents( chat_history=chat, settings=oai_chat_prompt_execution_settings ) for i, result in enumerate(results): print(f"Result {i + 1}: {result!s}") # ## Multiple Azure OpenAI Chat Completions # # In[ ]: az_oai_prompt_execution_settings = AzureChatPromptExecutionSettings( service_id="aoai_chat", max_tokens=80, temperature=0.7, top_p=1, frequency_penalty=0.5, presence_penalty=0.5, number_of_responses=3, ) # In[ ]: if selectedService == Service.AzureOpenAI: content = ( "Tomorrow is going to be a great day, I can feel it. I'm going to wake up early, go for a run, and then..." ) chat = ChatHistory() chat.add_user_message(content) results = await aoai_chat_service.get_chat_message_contents( chat_history=chat, settings=az_oai_prompt_execution_settings ) for i, result in enumerate(results): print(f"Result {i + 1}: {result!s}") # ## Streaming Multiple Results # # Here is an example pattern if you want to stream your multiple results. Note that this is not supported for Hugging Face text completions at this time. # # In[ ]: if selectedService == Service.OpenAI: import os import time from IPython.display import clear_output # Determine the clear command based on OS clear_command = "cls" if os.name == "nt" else "clear" chat = ChatHistory() chat.add_user_message("what is the purpose of a rubber duck?") stream = oai_chat_service.get_streaming_chat_message_contents( chat_history=chat, settings=oai_chat_prompt_execution_settings ) number_of_responses = oai_chat_prompt_execution_settings.number_of_responses texts = [""] * number_of_responses last_clear_time = time.time() clear_interval = 0.5 # seconds # Note: there are some quirks with displaying the output, which sometimes flashes and disappears. # This could be influenced by a few factors specific to Jupyter notebooks and asynchronous processing. # The following code attempts to buffer the results to avoid the output flashing on/off the screen. async for results in stream: current_time = time.time() # Update texts with new results for result in results: texts[result.choice_index] += str(result) # Clear and display output at intervals if current_time - last_clear_time > clear_interval: clear_output(wait=True) for idx, text in enumerate(texts): print(f"Result {idx + 1}: {text}") last_clear_time = current_time print("----------------------------------------")