The Stable Conceptualizer enables you to use pre-learned concepts on Stable Diffusion via textual-inversion using 🤗 Hugging Face 🧨 Diffusers library.
Navigate the library of pre-learned concepts here. For teaching the model new concepts using Textual Inversion, use this notebook.
#@title Install the required libs
!pip install -qq diffusers==0.16.1 transformers ftfy accelerate
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 934.9/934.9 kB 13.5 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.0/7.0 MB 52.7 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 53.1/53.1 kB 5.3 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 219.1/219.1 kB 15.2 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 224.5/224.5 kB 3.2 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.8/7.8 MB 55.5 MB/s eta 0:00:00
#@title Login to the Hugging Face Hub
from huggingface_hub import notebook_login
notebook_login()
VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…
#@title Import required libraries
import os
import torch
import PIL
from PIL import Image
from diffusers import StableDiffusionPipeline
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
def image_grid(imgs, rows, cols):
assert len(imgs) == rows*cols
w, h = imgs[0].size
grid = Image.new('RGB', size=(cols*w, rows*h))
grid_w, grid_h = grid.size
for i, img in enumerate(imgs):
grid.paste(img, box=(i%cols*w, i//cols*h))
return grid
If you want to teach Stable Diffusion your own concepts, use this notebook.
#@markdown `pretrained_model_name_or_path` which Stable Diffusion checkpoint you want to use. This should match the one used for training the embeddings.
pretrained_model_name_or_path = "CompVis/stable-diffusion-v1-4" #@param {type:"string"}
#@title Load your concept here
#@markdown Enter the `repo_id` for a concept you like (you can find pre-learned concepts in the public [SD Concepts Library](https://huggingface.co/sd-concepts-library))
repo_id_embeds = "sd-concepts-library/cat-toy" #@param {type:"string"}
Downloading learned_embeds.bin: 0%| | 0.00/3.82k [00:00<?, ?B/s]
Downloading (…)token_identifier.txt: 0%| | 0.00/9.00 [00:00<?, ?B/s]
<cat-toy>
¶#@title Load the Stable Diffusion pipeline
pipe = StableDiffusionPipeline.from_pretrained(
pretrained_model_name_or_path,
torch_dtype=torch.float16
).to("cuda")
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
#@title Load the concept into pipeline
pipe.load_textual_inversion(repo_id_embeds)
#@title Run the Stable Diffusion pipeline
#@markdown Don't forget to use the placeholder token in your prompt
prompt = "a grafitti in a favela wall with a <cat-toy> on it" #@param {type:"string"}
num_samples = 2 #@param {type:"number"}
num_rows = 2 #@param {type:"number"}
all_images = []
for _ in range(num_rows):
images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=50, guidance_scale=7.5).images
all_images.extend(images)
grid = image_grid(all_images, num_samples, num_rows)
grid
0%| | 0/50 [00:00<?, ?it/s]
0%| | 0/50 [00:00<?, ?it/s]