#!/usr/bin/env python # coding: utf-8 # # Using Hugging Face With Plugins # # In this notebook, we demonstrate using Hugging Face models for Plugins using both SemanticMemory and text completions. # # SK supports downloading models from the Hugging Face that can perform the following tasks: text-generation, text2text-generation, summarization, and sentence-similarity. You can search for models by task at https://huggingface.co/models. # # In[ ]: # Note: if using a virtual environment, do not run this cell get_ipython().run_line_magic('pip', 'install -U semantic-kernel') from semantic_kernel import __version__ __version__ # In[ ]: from services import Service # Select a service to use for this notebook (available services: OpenAI, AzureOpenAI, HuggingFace) selectedService = Service.HuggingFace print(f"Using service type: {selectedService}") # First, we will create a kernel and add both text completion and embedding services. # # For text completion, we are choosing GPT2. This is a text-generation model. (Note: text-generation will repeat the input in the output, text2text-generation will not.) # For embeddings, we are using sentence-transformers/all-MiniLM-L6-v2. Vectors generated for this model are of length 384 (compared to a length of 1536 from OpenAI ADA). # # The following step may take a few minutes when run for the first time as the models will be downloaded to your local machine. # # In[ ]: from semantic_kernel import Kernel from semantic_kernel.connectors.ai.hugging_face import HuggingFaceTextCompletion, HuggingFaceTextEmbedding from semantic_kernel.core_plugins import TextMemoryPlugin from semantic_kernel.memory import SemanticTextMemory, VolatileMemoryStore kernel = Kernel() # Configure LLM service if selectedService == Service.HuggingFace: # Feel free to update this model to any other model available on Hugging Face text_service_id = "HuggingFaceM4/tiny-random-LlamaForCausalLM" kernel.add_service( service=HuggingFaceTextCompletion( service_id=text_service_id, ai_model_id=text_service_id, task="text-generation" ), ) embed_service_id = "sentence-transformers/all-MiniLM-L6-v2" embedding_svc = HuggingFaceTextEmbedding(service_id=embed_service_id, ai_model_id=embed_service_id) kernel.add_service( service=embedding_svc, ) memory = SemanticTextMemory(storage=VolatileMemoryStore(), embeddings_generator=embedding_svc) kernel.add_plugin(TextMemoryPlugin(memory), "TextMemoryPlugin") # ### Add Memories and Define a plugin to use them # # Most models available on huggingface.co are not as powerful as OpenAI GPT-3+. Your plugins will likely need to be simpler to accommodate this. # # In[ ]: from semantic_kernel.connectors.ai.hugging_face import HuggingFacePromptExecutionSettings from semantic_kernel.prompt_template import PromptTemplateConfig collection_id = "generic" await memory.save_information(collection=collection_id, id="info1", text="Sharks are fish.") await memory.save_information(collection=collection_id, id="info2", text="Whales are mammals.") await memory.save_information(collection=collection_id, id="info3", text="Penguins are birds.") await memory.save_information(collection=collection_id, id="info4", text="Dolphins are mammals.") await memory.save_information(collection=collection_id, id="info5", text="Flies are insects.") # Define prompt function using SK prompt template language my_prompt = """I know these animal facts: - {{recall 'fact about sharks'}} - {{recall 'fact about whales'}} - {{recall 'fact about penguins'}} - {{recall 'fact about dolphins'}} - {{recall 'fact about flies'}} Now, tell me something about: {{$request}}""" execution_settings = HuggingFacePromptExecutionSettings( service_id=text_service_id, ai_model_id=text_service_id, max_tokens=45, temperature=0.5, top_p=0.5, ) prompt_template_config = PromptTemplateConfig( template=my_prompt, name="text_complete", template_format="semantic-kernel", execution_settings=execution_settings, ) my_function = kernel.add_function( function_name="text_complete", plugin_name="TextCompletionPlugin", prompt_template_config=prompt_template_config, ) # Let's now see what the completion looks like! Remember, "gpt2" is nowhere near as large as ChatGPT, so expect a much simpler answer. # # In[ ]: output = await kernel.invoke( my_function, request="What are whales?", ) output = str(output).strip() query_result1 = await memory.search( collection=collection_id, query="What are sharks?", limit=1, min_relevance_score=0.3 ) print(f"The queried result for 'What are sharks?' is {query_result1[0].text}") print(f"{text_service_id} completed prompt with: '{output}'")