This notebook shows how you can load VL Datasets from Hugging Face Datasets and train in PyTorch.
We will load the vl-food101
dataset - a sanitized version of the original Food-101 dataset. Learn more here.
The vl-food101
is curated to minimize duplicates, outliers, blurry, overly dark and bright images.
The following table summarizes the issues we found in the original Food101 dataset and were removed in in vl-food101.
Category | Percentage | Count |
---|---|---|
Duplicates | 0.23% |
235 |
Outliers | 0.08% |
77 |
Blur | 0.18% |
185 |
Dark | 0.04% |
43 |
Leakage | 0.086% |
87 |
Total | 0.62% |
627 |
!pip install -Uq datasets torchvision
from datasets import load_dataset
train_dataset = load_dataset("visual-layer/vl-food101", split="train", cache_dir='images_dir')
valid_dataset = load_dataset("visual-layer/vl-food101", split="test", cache_dir='images_dir')
Found cached dataset parquet (/media/dnth/Active-Projects/vl-datasets/notebooks/images_dir/visual-layer___parquet/visual-layer--vl-food101-bd3d25b1793d94e4/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7) Found cached dataset parquet (/media/dnth/Active-Projects/vl-datasets/notebooks/images_dir/visual-layer___parquet/visual-layer--vl-food101-bd3d25b1793d94e4/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)
train_dataset
Dataset({ features: ['image', 'label'], num_rows: 75284 })
train_dataset[0]
{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=512x512>, 'label': 0}
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
train_transforms = transforms.Compose(
[
transforms.RandomResizedCrop(64),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
valid_transform = transforms.Compose(
[
transforms.Resize((64, 64)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
def preprocess_train(example_batch):
"""Apply train_transforms across a batch."""
example_batch["pixel_values"] = [
train_transforms(image.convert("RGB")) for image in example_batch["image"]
]
return example_batch
def preprocess_valid(example_batch):
"""Apply valid_transforms across a batch."""
example_batch["pixel_values"] = [
valid_transform(image.convert("RGB")) for image in example_batch["image"]
]
return example_batch
train_dataset.set_transform(preprocess_train)
valid_dataset.set_transform(preprocess_valid)
train_dataset[0]
{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=512x512>, 'label': 0, 'pixel_values': tensor([[[ 2.1119, 2.0948, 2.0605, ..., 0.8618, 1.3755, 1.7523], [ 1.8893, 1.8379, 1.7523, ..., 0.8961, 1.2557, 1.3070], [ 1.9749, 1.8893, 1.8037, ..., 0.9132, 0.9988, 0.9646], ..., [-1.0048, -0.9877, -1.0219, ..., 0.4679, 0.8104, 0.6392], [-1.0390, -1.0390, -1.0219, ..., 1.4098, 0.8789, 0.3481], [-1.0733, -1.0219, -1.0390, ..., 1.2043, 0.5878, 0.3823]], [[ 2.3410, 2.3235, 2.2885, ..., -0.4601, -0.0574, 0.3627], [ 2.0784, 2.0259, 1.9384, ..., -0.4601, -0.1975, -0.1450], [ 2.2185, 2.0959, 1.9734, ..., -0.4426, -0.3901, -0.4426], ..., [-1.7031, -1.7206, -1.7556, ..., -0.2675, 0.0301, -0.0924], [-1.7031, -1.7381, -1.7381, ..., 0.8880, 0.1352, -0.3725], [-1.7206, -1.7206, -1.7206, ..., 0.5903, -0.2325, -0.3901]], [[ 2.4831, 2.4657, 2.4483, ..., -1.4210, -1.2119, -0.9678], [ 2.0823, 2.0300, 1.9603, ..., -1.4036, -1.2467, -1.2990], [ 2.1868, 2.0823, 1.9603, ..., -1.3687, -1.3513, -1.3861], ..., [-1.6127, -1.5953, -1.6127, ..., -0.8284, -0.6715, -0.7936], [-1.6127, -1.6127, -1.5953, ..., 0.1128, -0.6193, -1.0550], [-1.6302, -1.6127, -1.5953, ..., -0.2010, -0.8807, -1.0027]]])}
train_dataset[0]["pixel_values"].shape
torch.Size([3, 64, 64])
def collate_fn(examples):
pixel_values = torch.stack([example["pixel_values"] for example in examples])
labels = torch.tensor([example["label"] for example in examples])
return {"pixel_values": pixel_values, "labels": labels}
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True , collate_fn=collate_fn)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=256, shuffle=True, collate_fn=collate_fn)
model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.DEFAULT)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_dataset.features["label"].names))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
from tqdm.auto import tqdm
num_epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model.to(device)
for epoch in tqdm(range(num_epochs), desc="Epochs"):
running_loss = 0.0
for i, data in tqdm(enumerate(train_loader), total=len(train_loader), leave=False):
inputs, labels = data["pixel_values"], data["labels"]
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1} - Loss: {running_loss/len(train_loader)}")
Using device: cuda
Epochs: 0%| | 0/5 [00:00<?, ?it/s]
0%| | 0/295 [00:00<?, ?it/s]
Epoch 1 - Loss: 3.287348061901028
0%| | 0/295 [00:00<?, ?it/s]
Epoch 2 - Loss: 2.7990124314518297
0%| | 0/295 [00:00<?, ?it/s]
Epoch 3 - Loss: 2.61252308295945
0%| | 0/295 [00:00<?, ?it/s]
Epoch 4 - Loss: 2.4790741589109775
0%| | 0/295 [00:00<?, ?it/s]
Epoch 5 - Loss: 2.375496632365857
correct = 0
total = 0
with torch.no_grad():
for data in tqdm(valid_loader, desc="Validation"):
inputs, labels = data["pixel_values"], data["labels"]
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f"Accuracy: {100 * correct / total}%")
Validation: 0%| | 0/99 [00:00<?, ?it/s]
Accuracy: 42.05566600397614%
In this notebook we showed how you can load VL Datasets from the Hugging Face and train with PyTorch. You can choose to load other datasets and also train using other frameworks of your choice.
Try our free cloud product VL Profiler - VL Profiler is our first no-code commercial product that lets you visualize and inspect your dataset in your browser.