# Transformers installation ! pip install transformers datasets # To install from source instead of the last release, comment the command above and uncomment the following one. # ! pip install git+https://github.com/huggingface/transformers.git #@title from IPython.display import HTML HTML('') from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.") print(encoded_input) tokenizer.decode(encoded_input["input_ids"]) batch_sentences = [ "But what about second breakfast?", "Don't think he knows about second breakfast, Pip.", "What about elevensies?", ] encoded_inputs = tokenizer(batch_sentences) print(encoded_inputs) batch_sentences = [ "But what about second breakfast?", "Don't think he knows about second breakfast, Pip.", "What about elevensies?", ] encoded_input = tokenizer(batch_sentences, padding=True) print(encoded_input) batch_sentences = [ "But what about second breakfast?", "Don't think he knows about second breakfast, Pip.", "What about elevensies?", ] encoded_input = tokenizer(batch_sentences, padding=True, truncation=True) print(encoded_input) batch_sentences = [ "But what about second breakfast?", "Don't think he knows about second breakfast, Pip.", "What about elevensies?", ] encoded_input = tokenizer(batch, padding=True, truncation=True, return_tensors="pt") print(encoded_input) batch_sentences = [ "But what about second breakfast?", "Don't think he knows about second breakfast, Pip.", "What about elevensies?", ] encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf") print(encoded_input) from datasets import load_dataset, Audio dataset = load_dataset("PolyAI/minds14", name="en-US", split="train") dataset[0]["audio"] dataset = load_dataset("PolyAI/minds14", name="en-US", split="train") dataset[0]["audio"] dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000)) dataset[0]["audio"] from transformers import AutoFeatureExtractor feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base") audio_input = [dataset[0]["audio"]["array"]] feature_extractor(audio_input, sampling_rate=16000) dataset[0]["audio"]["array"].shape dataset[1]["audio"]["array"].shape def preprocess_function(examples): audio_arrays = [x["array"] for x in examples["audio"]] inputs = feature_extractor( audio_arrays, sampling_rate=16000, padding=True, max_length=100000, truncation=True, ) return inputs processed_dataset = preprocess_function(dataset[:5]) processed_dataset["input_values"][0].shape processed_dataset["input_values"][1].shape from datasets import load_dataset dataset = load_dataset("food101", split="train[:100]") dataset[0]["image"] from transformers import AutoFeatureExtractor feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224") from torchvision.transforms import Compose, Normalize, RandomResizedCrop, ColorJitter, ToTensor normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) _transforms = Compose( [RandomResizedCrop(feature_extractor.size), ColorJitter(brightness=0.5, hue=0.5), ToTensor(), normalize] ) def transforms(examples): examples["pixel_values"] = [_transforms(image.convert("RGB")) for image in examples["image"]] return examples dataset.set_transform(transforms) dataset[0]["image"] import numpy as np import matplotlib.pyplot as plt img = dataset[0]["pixel_values"] plt.imshow(img.permute(1, 2, 0)) from datasets import load_dataset lj_speech = load_dataset("lj_speech", split="train") lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"]) lj_speech[0]["audio"] lj_speech[0]["text"] lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000)) from transformers import AutoProcessor processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h") def prepare_dataset(example): audio = example["audio"] example["input_values"] = processor(audio["array"], sampling_rate=16000) with processor.as_target_processor(): example["labels"] = processor(example["text"]).input_ids return example prepare_dataset(lj_speech[0])