#!/usr/bin/env python # coding: utf-8 # # Hybrid Router # # The `HybridRouter` in the Semantic Router library can improve making performance particularly for niche use-cases that contain specific terminology, such as finance or medical. # # It helps us provide more importance to making based on the keywords contained in our utterances and user queries. # ## Getting Started # # We start by installing the library: # # In[1]: #!pip install -qU semantic-router==0.1.0 # We start by defining a dictionary mapping s to example phrases that should trigger those s. # # In[1]: from semantic_router.route import Route politics = Route( name="politics", utterances=[ "isn't politics the best thing ever", "why don't you tell me about your political opinions", "don't you just love the president", "don't you just hate the president", "they're going to destroy this country!", "they will save the country!", ], ) # Let's define another for good measure: # # In[2]: chitchat = Route( name="chitchat", utterances=[ "how's the weather today?", "how are things going?", "lovely weather today", "the weather is horrendous", "let's go to the chippy", ], ) routes = [politics, chitchat] # Now we initialize our embedding models, we use a dense encoder from [OpenAI](https://platform.openai.com/) and a sparse encoder from [Aurelio](https://platform.aurelio.ai/). The `AurelioSparseEncoder` we use here provides a remote sparse encoder that can significantly improve routing accuracy when combined with dense embeddings. # # Semantic Router supports other _local_ sparse encoders like `TfidfEncoder` or `BM25Encoder`. Compared to these, the `AurelioSparseEncoder`: # # 1. Doesn't require local fitting (training) on your dataset # 2. Handles out-of-vocabulary words better # 3. Works better with asymmetric retrieval (different encoding for queries vs. documents) # # We initialize both like so: # In[4]: import os from semantic_router.encoders import OpenAIEncoder, AurelioSparseEncoder from getpass import getpass # get OpenAI API key from https://platform.openai.com/ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") or getpass( "Enter OpenAI API Key: " ) dense_encoder = OpenAIEncoder(name="text-embedding-3-small", score_threshold=0.3) # get Aurelio API key from https://platform.aurelio.ai # use "SRHYBRIDROUTER" for free credits os.environ["AURELIO_API_KEY"] = os.getenv("AURELIO_API_KEY") or getpass( "Enter Aurelio API Key: " ) # Using Aurelio's BM25 sparse encoder sparse_encoder = AurelioSparseEncoder(name="bm25") # Now we define the `RouteLayer`. When called, the route layer will consume text (a query) and output the category (`Route`) it belongs to — to initialize a `RouteLayer` we need our `encoder` model and a list of `routes`. # # In[5]: from semantic_router.routers import HybridRouter router = HybridRouter( encoder=dense_encoder, sparse_encoder=sparse_encoder, routes=routes, alpha=0.5, # Balance between dense (0) and sparse (1) embeddings ) # In[6]: router("don't you love politics?") # In[7]: router("how's the weather today?") # --- #