model_checkpoint = "facebook/convnext-tiny-224" # pre-trained model from which to fine-tune batch_size = 32 # batch size for training and evaluation !pip install -q datasets transformers !pip install -q albumentations from huggingface_hub import notebook_login notebook_login() %%capture !sudo apt -qq install git-lfs !git config --global credential.helper store from datasets import load_dataset # load a custom dataset from local/remote files using the ImageFolder feature # option 1: local/remote files (supporting the following formats: tar, gzip, zip, xz, rar, zstd) dataset = load_dataset("imagefolder", data_files="https://madm.dfki.de/files/sentinel/EuroSAT.zip") # note that you can also provide several splits: # dataset = load_dataset("imagefolder", data_files={"train": ["path/to/file1", "path/to/file2"], "test": ["path/to/file3", "path/to/file4"]}) # note that you can push your dataset to the hub very easily (and reload afterwards using load_dataset)! # dataset.push_to_hub("nielsr/eurosat") # dataset.push_to_hub("nielsr/eurosat", private=True) # option 2: local folder # dataset = load_dataset("imagefolder", data_dir="path_to_folder") # option 3: just load any existing dataset from the hub ... # dataset = load_dataset("cifar10") from datasets import load_metric metric = load_metric("accuracy") dataset example = dataset["train"][10] example dataset["train"].features example['image'] example['image'].resize((200, 200)) example['label'] dataset["train"].features["label"] labels = dataset["train"].features["label"].names label2id, id2label = dict(), dict() for i, label in enumerate(labels): label2id[label] = i id2label[i] = label id2label[2] from transformers import AutoFeatureExtractor feature_extractor = AutoFeatureExtractor.from_pretrained(model_checkpoint) feature_extractor import cv2 import albumentations as A import numpy as np size = feature_extractor.size train_transforms = A.Compose([ A.Resize(height=size, width=size), A.RandomRotate90(), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(p=0.2), A.Normalize(), ]) val_transforms = A.Compose([ A.Resize(height=size, width=size), A.Normalize(), ]) def preprocess_train(examples): examples["pixel_values"] = [ train_transforms(image=np.array(image))["image"] for image in examples["image"] ] return examples def preprocess_val(examples): examples["pixel_values"] = [ val_transforms(image=np.array(image))["image"] for image in examples["image"] ] return examples # split up training into training + validation splits = dataset["train"].train_test_split(test_size=0.1) train_ds = splits['train'] val_ds = splits['test'] train_ds.set_transform(preprocess_train) val_ds.set_transform(preprocess_val) train_ds[0] from transformers import AutoModelForImageClassification, TrainingArguments, Trainer num_labels = len(id2label) model = AutoModelForImageClassification.from_pretrained( model_checkpoint, label2id=label2id, id2label=id2label, ignore_mismatched_sizes = True, # provide this in case you'd like to fine-tune an already fine-tuned checkpoint ) model_name = model_checkpoint.split("/")[-1] args = TrainingArguments( f"{model_name}-finetuned-eurosat-albumentations", remove_unused_columns=False, evaluation_strategy = "epoch", save_strategy = "epoch", learning_rate=5e-5, per_device_train_batch_size=batch_size, gradient_accumulation_steps=4, per_device_eval_batch_size=batch_size, num_train_epochs=3, warmup_ratio=0.1, logging_steps=10, load_best_model_at_end=True, metric_for_best_model="accuracy", push_to_hub=True, ) import numpy as np # the compute_metrics function takes a Named Tuple as input: # predictions, which are the logits of the model as Numpy arrays, # and label_ids, which are the ground-truth labels as Numpy arrays. def compute_metrics(eval_pred): """Computes accuracy on a batch of predictions""" predictions = np.argmax(eval_pred.predictions, axis=1) return metric.compute(predictions=predictions, references=eval_pred.label_ids) import torch def collate_fn(examples): images = [] labels = [] for example in examples: image = np.moveaxis(example["pixel_values"], source=2, destination=0) images.append(torch.from_numpy(image)) labels.append(example["label"]) pixel_values = torch.stack(images) labels = torch.tensor(labels) return {"pixel_values": pixel_values, "labels": labels} trainer = Trainer( model, args, train_dataset=train_ds, eval_dataset=val_ds, tokenizer=feature_extractor, compute_metrics=compute_metrics, data_collator=collate_fn, ) trainer.train() metrics = trainer.evaluate() print(metrics) trainer.push_to_hub() from PIL import Image import requests url = 'https://huggingface.co/nielsr/convnext-tiny-224-finetuned-eurosat-albumentations/resolve/main/highway.jpg' image = Image.open(requests.get(url, stream=True).raw) image from transformers import AutoModelForImageClassification, AutoFeatureExtractor repo_name = "nielsr/convnext-tiny-224-finetuned-eurosat-albumentations" feature_extractor = AutoFeatureExtractor.from_pretrained(repo_name) model = AutoModelForImageClassification.from_pretrained(repo_name) # prepare image for the model encoding = feature_extractor(image.convert("RGB"), return_tensors="pt") print(encoding.pixel_values.shape) import torch # forward pass with torch.no_grad(): outputs = model(**encoding) logits = outputs.logits predicted_class_idx = logits.argmax(-1).item() print("Predicted class:", model.config.id2label[predicted_class_idx]) from transformers import pipeline pipe = pipeline("image-classification", "nielsr/convnext-tiny-224-finetuned-eurosat-albumentations") pipe(image) pipe = pipeline("image-classification", model=model, feature_extractor=feature_extractor) pipe(image)