#!/usr/bin/env python # coding: utf-8 # [![image](https://raw.githubusercontent.com/visual-layer/visuallayer/main/imgs/vl_horizontal_logo.png)](https://www.visual-layer.com) # # Hugging Face Datasets # This notebook shows how you can load VL Datasets from Hugging Face Datasets and train in PyTorch. # # We will load the [`vl-food101`](https://huggingface.co/datasets/visual-layer/vl-food101) dataset - a sanitized version of the original [Food-101 dataset](https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/). Learn more [here](https://docs.visual-layer.com/docs/available-datasets#vl-food101). # # The `vl-food101` is curated to minimize duplicates, outliers, blurry, overly dark and bright images. # The following table summarizes the issues we found in the original Food101 dataset and were removed in in vl-food101. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
CategoryPercentageCount
Duplicates
0.23%
235
Outliers
0.08%
77
Blur
0.18%
185
Dark
0.04%
43
Leakage
0.086%
87
Total
0.62%
627
# # ## Installation # In[1]: get_ipython().system('pip install -Uq datasets torchvision') # ## Load Dataset # # Now load the vl-food101 dataset from Hugging Face Dataset. See the dataset card [here](https://huggingface.co/datasets/visual-layer/vl-food101). # In[2]: from datasets import load_dataset train_dataset = load_dataset("visual-layer/vl-food101", split="train", cache_dir='images_dir') valid_dataset = load_dataset("visual-layer/vl-food101", split="test", cache_dir='images_dir') # In[3]: train_dataset # In[4]: train_dataset[0] # ## Transform Dataset # In[5]: import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader import torchvision import torchvision.transforms as transforms train_transforms = transforms.Compose( [ transforms.RandomResizedCrop(64), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) valid_transform = transforms.Compose( [ transforms.Resize((64, 64)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) def preprocess_train(example_batch): """Apply train_transforms across a batch.""" example_batch["pixel_values"] = [ train_transforms(image.convert("RGB")) for image in example_batch["image"] ] return example_batch def preprocess_valid(example_batch): """Apply valid_transforms across a batch.""" example_batch["pixel_values"] = [ valid_transform(image.convert("RGB")) for image in example_batch["image"] ] return example_batch # In[6]: train_dataset.set_transform(preprocess_train) valid_dataset.set_transform(preprocess_valid) # In[7]: train_dataset[0] # In[8]: train_dataset[0]["pixel_values"].shape # In[9]: def collate_fn(examples): pixel_values = torch.stack([example["pixel_values"] for example in examples]) labels = torch.tensor([example["label"] for example in examples]) return {"pixel_values": pixel_values, "labels": labels} # In[10]: train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True , collate_fn=collate_fn) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=256, shuffle=True, collate_fn=collate_fn) # ## Define Model and Hyperparameters # In[11]: model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.DEFAULT) num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, len(train_dataset.features["label"].names)) # In[12]: criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # ## Train and Evaluate # In[13]: from tqdm.auto import tqdm num_epochs = 5 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") model.to(device) for epoch in tqdm(range(num_epochs), desc="Epochs"): running_loss = 0.0 for i, data in tqdm(enumerate(train_loader), total=len(train_loader), leave=False): inputs, labels = data["pixel_values"], data["labels"] inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() print(f"Epoch {epoch+1} - Loss: {running_loss/len(train_loader)}") # In[14]: correct = 0 total = 0 with torch.no_grad(): for data in tqdm(valid_loader, desc="Validation"): inputs, labels = data["pixel_values"], data["labels"] inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print(f"Accuracy: {100 * correct / total}%") # ## Wrap Up # # In this notebook we showed how you can load VL Datasets from the Hugging Face and train with PyTorch. You can choose to load other datasets and also train using other frameworks of your choice. # # [**Try our free cloud product VL Profiler**](https://app.visual-layer.com) - VL Profiler is our first no-code commercial product that lets you visualize and inspect your dataset in your browser. # # [![image](https://raw.githubusercontent.com/visual-layer/fastdup/main/gallery/vl_profiler_promo.svg)](https://app.visual-layer.com) # In[ ]: