!pip install timm # For our convenience, take a peek at what we're working with !nvidia-smi # Import the core modules, check which GPU we end up with and scale batch size accordingly import torch torch.backends.cudnn.benchmark = True import timm from timm.data import * from timm.utils import * import pandas as pd import numpy as np import pynvml from collections import OrderedDict import logging import time def log_gpu_memory(): handle = pynvml.nvmlDeviceGetHandleByIndex(0) info = pynvml.nvmlDeviceGetMemoryInfo(handle) info.free = round(info.free / 1024**2) info.used = round(info.used / 1024**2) logging.info('GPU memory free: {}, memory used: {}'.format(info.free, info.used)) return info.used def get_gpu_memory_total(): handle = pynvml.nvmlDeviceGetHandleByIndex(0) info = pynvml.nvmlDeviceGetMemoryInfo(handle) info.total = round(info.total / 1024**2) return info.total setup_default_logging() print('PyTorch version:', torch.__version__) if torch.cuda.is_available(): print('CUDA available') device='cuda' else: print('CUDA is not available') device='cpu' BATCH_SIZE = 128 if device == 'cuda': pynvml.nvmlInit() log_gpu_memory() total_gpu_mem = get_gpu_memory_total() HAS_T4 = False if total_gpu_mem > 12300: HAS_T4 = True logging.info('Running on a T4 GPU or other with > 12GB memory, setting batch size to {}'.format(BATCH_SIZE)) else: BATCH_SIZE = 64 logging.info('Running on a K80 GPU or other with < 12GB memory, batch size set to {}'.format(BATCH_SIZE)) # Download and extract the dataset (note it's not actually a gz like the file says) if not os.path.exists('./imagenetv2-matched-frequency'): !curl -s https://s3-us-west-2.amazonaws.com/imagenetv2public/imagenetv2-matched-frequency.tar.gz | tar x dataset = Dataset('./imagenetv2-matched-frequency/') assert len(dataset) == 10000 from torchvision.utils import make_grid import torchvision.transforms as transforms import matplotlib.pyplot as plt def show_img(ax, img): npimg = img.numpy() ax.imshow(np.transpose(npimg, (1,2,0)), interpolation='bicubic') fig = plt.figure(figsize=(8, 16), dpi=100) ax = fig.add_subplot('111') num_images = 4*8 images = [] dataset.transform = transforms.Compose([ transforms.Resize(320), transforms.CenterCrop(320), transforms.ToTensor()]) for i in np.random.permutation(np.arange(len(dataset)))[:num_images]: images.append(dataset[i][0]) grid_img = make_grid(images, nrow=4, padding=10, normalize=True, scale_each=True) show_img(ax, grid_img) # a basic validation routine and runner that configures each model and loader from timm.models import TestTimePoolHead def validate(model, loader, criterion=None, device='cuda'): # metrics batch_time = timm.utils.AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # for collecting per sample prediction/loss details losses_val = [] top5_idx = [] top5_val = [] end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): target = target.to(device) input = input.to(device) output = model(input) if criterion is not None: loss = criterion(output, target) if not loss.size(): losses.update(loss.item(), input.size(0)) else: # only bother collecting top5 we're also collecting per-example loss output = output.softmax(1) top5v, top5i = output.topk(5, 1, True, True) top5_val.append(top5v.cpu().numpy()) top5_idx.append(top5i.cpu().numpy()) losses_val.append(loss.cpu().numpy()) losses.update(loss.mean().item(), input.size(0)) prec1, prec5 = timm.utils.accuracy(output, target, topk=(1, 5)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if i % 20 == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, top1=top1, top5=top5)) results = OrderedDict( top1=top1.avg, top1_err=100 - top1.avg, top5=top5.avg, top5_err=100 - top5.avg, ) if criterion is not None: results['loss'] = losses.avg if len(top5_idx): results['top5_val'] = np.concatenate(top5_val, axis=0) results['top5_idx'] = np.concatenate(top5_idx, axis=0) if len(losses_val): results['losses_val'] = np.concatenate(losses_val, axis=0) print(' * Prec@1 {:.3f} ({:.3f}) Prec@5 {:.3f} ({:.3f})'.format( results['top1'], results['top1_err'], results['top5'], results['top5_err'])) return results def runner(model_args, dataset, device='cuda', collect_loss=False): model_name = model_args['model'] model = timm.create_model(model_name, pretrained=True) ttp = False if 'ttp' in model_args and model_args['ttp']: ttp = True logging.info('Applying test time pooling to model') model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size']) model = model.to(device) model.eval() if HAS_T4: model = model.half() data_config = timm.data.resolve_data_config(model_args, model=model, verbose=True) loader = timm.data.create_loader( dataset, input_size=data_config['input_size'], batch_size=BATCH_SIZE, use_prefetcher=True, interpolation='bicubic', mean=data_config['mean'], std=data_config['std'], fp16=HAS_T4, crop_pct=1.0 if ttp else data_config['crop_pct'], num_workers=2) criterion = None if collect_loss: criterion = torch.nn.CrossEntropyLoss(reduction='none').to(device) results = validate(model, loader, criterion, device) # cleanup checkpoint cache to avoid running out of disk space shutil.rmtree(os.path.join(os.environ['HOME'], '.cache', 'torch', 'checkpoints'), True) # add some non-metric values for charting / comparisons results['model'] = model_name results['img_size'] = data_config['input_size'][-1] # create key to identify model in charts key = [model_name, str(data_config['input_size'][-1])] if ttp: key += ['ttp'] key = '-'.join(key) return key, results models = [ dict(model='mobilenetv3_100'), dict(model='dpn68b'), dict(model='gluon_resnet50_v1d'), dict(model='efficientnet_b2'), dict(model='gluon_seresnext50_32x4d'), dict(model='dpn92'), dict(model='gluon_seresnext101_32x4d'), dict(model='inception_resnet_v2'), dict(model='pnasnet5large'), dict(model='tf_efficientnet_b5'), dict(model='ig_resnext101_32x8d'), dict(model='ig_resnext101_32x16d'), dict(model='ig_resnext101_32x32d'), dict(model='ig_resnext101_32x48d'), ] results = OrderedDict() for ma in models: mk, mr = runner(ma, dataset, device) results[mk] = mr results_df = pd.DataFrame.from_dict(results, orient='index') results_df.to_csv('./cached-results.csv') import numpy as np import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = [16, 10] names_all = list(results.keys()) top1_all = np.array([results[m]['top1'] for m in names_all]) top1_sort_ix = np.argsort(top1_all) top1_sorted = top1_all[top1_sort_ix] top1_names_sorted = np.array(names_all)[top1_sort_ix] top5_all = np.array([results[m]['top5'] for m in names_all]) top5_sort_ix = np.argsort(top5_all) top5_sorted = top5_all[top5_sort_ix] top5_names_sorted = np.array(names_all)[top5_sort_ix] fig = plt.figure() ax1 = fig.add_subplot(111) ax1.barh(top1_names_sorted, top1_sorted, color='lightcoral') ax1.set_title('Top-1 by Model') ax1.set_xlabel('Top-1 Accuracy (%)') ax1.set_yticklabels(top1_names_sorted) ax1.autoscale(True, axis='both') acc_min = top1_sorted[0] acc_max = top1_sorted[-1] plt.xlim([math.ceil(acc_min - .3*(acc_max - acc_min)), math.ceil(acc_max)]) plt.vlines(plt.xticks()[0], *plt.ylim(), color='0.5', alpha=0.2, linestyle='--') plt.show() print('Results by top-1 accuracy:') results_by_top1 = list(sorted(results.keys(), key=lambda x: results[x]['top1'], reverse=True)) for m in results_by_top1: print(' Model: {:30} Top-1 {:4.2f}, Top-5 {:4.2f}'.format(m, results[m]['top1'], results[m]['top5'])) !wget -q https://raw.githubusercontent.com/rwightman/pytorch-image-models/master/results/results-all.csv original_df = pd.read_csv('./results-all.csv', index_col=0) original_results = original_df.to_dict(orient='index') # helper methods for dumbbell plot import matplotlib.lines as mlines def label_line_horiz(ax, line, label, color='0.5', fs=14, halign='center'): xdata, ydata = line.get_data() x1, x2 = xdata xx = 0.5 * (x1 + x2) text = ax.annotate( label, xy=(xx, ydata[0]), xytext=(0, 1), textcoords='offset points', size=fs, color=color, zorder=3, bbox=dict(boxstyle="round", fc="w", color='0.5'), horizontalalignment='center', verticalalignment='center') return text def draw_line_horiz(ax, p1, p2, label, color='black'): l = mlines.Line2D(*zip(p1, p2), color=color, zorder=0) ax.add_line(l) label_line(ax, l, label) return l def label_line_vert(ax, line, label, color='0.5', fs=14, halign='center'): xdata, ydata = line.get_data() y1, y2 = ydata yy = 0.5 * (y1 + y2) text = ax.annotate( label, xy=(xdata[0], yy), xytext=(0, 0), textcoords='offset points', size=fs, color=color, zorder=3, bbox=dict(boxstyle="round", fc="w", color='0.5'), horizontalalignment='center', verticalalignment='center') return text def draw_line_vert(ax, p1, p2, label, color='black'): l = mlines.Line2D(*zip(p1, p2), color=color, zorder=0) ax.add_line(l) label_line_vert(ax, l, label) return l fig = plt.figure() ax1 = fig.add_subplot(111) # draw the ImageNet-V2 dots, we're sorted on this ax1.scatter(x=top1_names_sorted, y=top1_sorted, s=64, c='lightcoral',marker="o", label='ImageNet-V2 Matched-Freq') # draw the original ImageNet-1k validation dots orig_top1 = [original_results[results[n]['model']]['top1'] for n in top1_names_sorted] ax1.scatter(x=top1_names_sorted, y=orig_top1, s=64, c='steelblue', marker="o", label='ImageNet-1K') for n, vo, vn in zip(top1_names_sorted, orig_top1, top1_sorted): draw_line_vert(ax1, (n, vo), (n, vn), str(round(vo - vn, 2)), 'skyblue') ax1.set_title('Top-1 Difference') ax1.set_ylabel('Top-1 Accuracy (%)') ax1.set_xlabel('Model') yl, yh = ax1.get_ylim() yl = 5 * ((yl + 1) // 5 + 1) yh = 5 * (yh // 5 + 1) for y in plt.yticks()[0][1:-1]: ax1.axhline(y, 0.02, 0.98, c='0.5', alpha=0.2, linestyle='-.') ax1.set_xticklabels(top1_names_sorted, rotation='-30', ha='left') ax1.legend(loc='upper left') plt.show() print('Results by absolute accuracy gap between ImageNet-Sketch and original ImageNet top-1:') gaps = {x: (results[x]['top1'] - original_results[results[x]['model']]['top1']) for x in results.keys()} sorted_keys = list(sorted(results.keys(), key=lambda x: gaps[x], reverse=True)) for m in sorted_keys: print(' Model: {:30} {:4.2f}%'.format(m, gaps[m])) print() print('Results by relative accuracy gap between ImageNet-Sketch and original ImageNet top-1:') gaps = {x: 100 * (results[x]['top1'] - original_results[results[x]['model']]['top1']) / original_results[results[x]['model']]['top1'] for x in results.keys()} sorted_keys = list(sorted(results.keys(), key=lambda x: gaps[x], reverse=True)) for m in sorted_keys: print(' Model: {:30} {:4.2f}%'.format(m, gaps[m])) print() fig = plt.figure() ax1 = fig.add_subplot(111) # draw the ImageNet-V2 top-5 dots, we're sorted on this ax1.scatter(x=top5_names_sorted, y=top5_sorted, s=64, c='lightcoral',marker="o", label='ImageNet-V2 Matched-Freq') # draw the original ImageNet-1k validation dots orig_top5 = [original_results[results[n]['model']]['top5'] for n in top5_names_sorted] ax1.scatter(x=top5_names_sorted, y=orig_top5, s=64, c='steelblue', marker="o", label='ImageNet-1K') for n, vo, vn in zip(top5_names_sorted, orig_top5, top5_sorted): draw_line_vert(ax1, (n, vo), (n, vn), str(round(vo - vn, 2)), 'skyblue') ax1.set_title('Top-5 Difference') ax1.set_ylabel('Top-5 Accuracy (%)') ax1.set_xlabel('Model') yl, yh = ax1.get_ylim() yl = 5 * ((yl + 1) // 5 + 1) yh = 5 * (yh // 5 + 1) for y in plt.yticks()[0][2:-2]: ax1.axhline(y, 0.02, 0.98, c='0.5', alpha=0.2, linestyle='-.') ax1.set_xticklabels(top5_names_sorted, rotation='-30', ha='left') ax1.legend(loc='upper left') plt.show() print('Results by relative accuracy gap between ImageNet-Sketch and original ImageNet top-5:') gaps = {x: (results[x]['top5'] - original_results[results[x]['model']]['top5']) for x in results.keys()} sorted_keys = list(sorted(results.keys(), key=lambda x: gaps[x], reverse=True)) for m in sorted_keys: print(' Model: {:30} {:4.2f}%'.format(m, gaps[m])) print() print('Results by relative accuracy gap between ImageNet-Sketch and original ImageNet top-5:') gaps = {x: 100 * (results[x]['top5'] - original_results[results[x]['model']]['top5']) / original_results[results[x]['model']]['top5'] for x in results.keys()} sorted_keys = list(sorted(results.keys(), key=lambda x: gaps[x], reverse=True)) for m in sorted_keys: print(' Model: {:30} {:4.2f}%'.format(m, gaps[m])) # some code to display images in a grid and ground truth vs predictions for specified indices from torchvision.utils import make_grid import torchvision.transforms as transforms import matplotlib.pyplot as plt def show_img(ax, img): npimg = img.numpy() ax.imshow(np.transpose(npimg, (1,2,0)), interpolation='bicubic') def show_summary(indices, dataset, nrows): col_scale = len(indices) // nrows top5_idx = mr['top5_idx'][indices] top5_val = mr['top5_val'][indices] images = [] labels = [] filenames = [] dataset.transform = transforms.Compose([ transforms.Resize(320, Image.BICUBIC), transforms.CenterCrop(320), transforms.ToTensor()]) for i in indices: img, label = dataset[i] images.append(img) labels.append(label) filenames = dataset.filenames(list(indices), basename=True) fig = plt.figure(figsize=(10, 10 * col_scale), dpi=100) ax = fig.add_subplot('111') grid_best = make_grid(images, nrow=nrows, padding=10, normalize=True, scale_each=True) show_img(ax, grid_best) plt.show() for i, l in enumerate(labels): print('{} ground truth = {}'.format( id_to_synset[i] + '/' + filenames[i], id_to_text[l])) print('Predicted:') for pi, pv in zip(top5_idx[i], top5_val[i]): if pv > 2e-5: print(' {:.3f} {}'.format(100*pv, id_to_text[pi])) print() # create mappings of label id to text and synset !wget -q https://raw.githubusercontent.com/HoldenCaulfieldRye/caffe/master/data/ilsvrc12/synset_words.txt with open('./synset_words.txt', 'r') as f: split_lines = [l.strip().split(' ') for l in f.readlines()] id_to_synset = dict(enumerate([l[0] for l in split_lines])) id_to_text = dict(enumerate([' '.join(l[1:]) for l in split_lines])) BATCH_SIZE=128 mk, mr = runner(dict(model='ig_resnext101_32x32d'), dataset, device, collect_loss=True) nrows = 2 num_images = 10 best_idx = np.argsort(mr['losses_val'])[:num_images] show_summary(best_idx, dataset, nrows) nrows = 2 num_images = 20 worst_idx = np.argsort(mr['losses_val'])[-num_images:][::-1] show_summary(worst_idx, dataset, nrows) # only doing this one if we're on a T4 if HAS_T4: mk, mr = runner(dict(model='ig_resnext101_32x32d', img_size=288, ttp=True), dataset, device, collect_loss=True) nrows = 2 num_images = 20 worst_idx = np.argsort(mr['losses_val'])[-num_images:][::-1] show_summary(worst_idx, dataset, nrows)