#!/usr/bin/env python # coding: utf-8 # [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/07-multi-modal.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/07-multi-modal.ipynb) # # Multi-Modal Routes # The Semantic Router library can also be used for detection of specific images or videos, for example the detection of **N**ot **S**hrek **F**or **W**ork (NSFW) and **S**hrek **F**or **W**ork (SFW) images as we will demonstrate in this walkthrough. # ## Getting Started # We start by installing the library: # In[ ]: get_ipython().system('pip install -qU "semantic-router[vision]" datasets==2.17.0') get_ipython().system('pip install datasets') # We start by downloading a multi-modal dataset, we'll be using the `aurelio-ai/shrek-detection` dataset from Hugging Face. # In[ ]: from datasets import load_dataset data = load_dataset("aurelio-ai/shrek-detection", split="train", trust_remote_code=True) data[3]["image"] # We will grab the images that are labeled with `is_shrek`: # In[ ]: shrek_pics = [d["image"] for d in data if d["is_shrek"]] not_shrek_pics = [d["image"] for d in data if not d["is_shrek"]] print(f"We have {len(shrek_pics)} shrek pics, and {len(not_shrek_pics)} not shrek pics") # We start by defining a dictionary mapping routes to example phrases that should trigger those routes. # In[ ]: from semantic_router import Route shrek = Route( name="shrek", utterances=shrek_pics, ) # Let's define another for good measure: # In[ ]: not_shrek = Route( name="not_shrek", utterances=not_shrek_pics, ) routes = [shrek, not_shrek] # Now we initialize our embedding model: # In[ ]: from semantic_router.encoders.clip import CLIPEncoder encoder = CLIPEncoder() # Now we define the `RouteLayer`. When called, the route layer will consume text (a query) and output the category (`Route`) it belongs to — to initialize a `RouteLayer` we need our `encoder` model and a list of `routes`. # In[ ]: from semantic_router.layer import RouteLayer rl = RouteLayer(encoder=encoder, routes=routes) # Now we can test it with _text_ to see if we hit the routes that we defined with images: # In[ ]: rl("don't you love politics?") # In[ ]: rl("shrek") # In[ ]: rl("dwayne the rock johnson") # Everything is being classified accurately, let's pull in some images that we haven't seen before and see if we can classify them as NSFW or SFW. # In[ ]: test_data = load_dataset( "aurelio-ai/shrek-detection", split="test", trust_remote_code=True ) test_data # In[ ]: test_data[0]["image"] # In[ ]: rl(test_data[0]["image"]).name # In[ ]: test_data[1]["image"] # In[ ]: rl(test_data[1]["image"]).name # In[ ]: test_data[2]["image"] # In[ ]: rl(test_data[2]["image"]).name # These all look great, we've managed to accurately classify these images using our relatively small route space. # # Taking this further, we can easily use [route optimization](https://github.com/aurelio-labs/semantic-router/blob/main/docs/06-threshold-optimization.ipynb), the addition of more utterances, or more routes to improve performance further when and where required. # ---