#!/usr/bin/env python # coding: utf-8 # # Periodic Spectral Ergodicity: A measure for Neural Architecture Search # # (c) 2019 # Developed by # Mehmet Süzen # suzen at acm dot org # ## Sketch # # * Introduce a measure for spectral ergodicity on the set of different length square matrices. # This is called `periodic spectral ergodicity (PSE)`. # * Report `periodic spectral ergodicity (PSE)` measure for pre-trained networks. # * ResNet/VGG variants with top-1/top-5 test errors vs. PSE. # * VGG-11, VGG-13, VGG-16, VGG-19, VGG-11-bn, VGG-13-bn, VGG-16-bn, VGG-19-bn, # RestNet-18, ResNet-34, ResNet-50, ResNet-101, ResNet-152, ResNeXt-101-32x8d, # resnext50_32x4d # * extract 4d/2d weights. # * reshape 4d to 2d. # * Report ensemble sizes/weight matrix sizes # * Answering questions: # * Are there any relationship between network predictive performance and PSE? # * Can we say anyting about PSE causing better architecure? # * How can we use PSE in Neural Architecture Search (NAS)? # ``` # ('numpy version:', # '1.17.2', # 'matplotlib :', # '3.1.1', # 'Python version:', # '3.7.3 (default, Mar 27 2019, 16:54:48) \n[Clang 4.0.1 (tags/RELEASE_401/final)]', # 'Bristol version', # '0.2.5', # 'pytorch:', # '1.3.0', # 'torchvision:', # '0.4.1a0+d94043a') # ``` # In[ ]: get_ipython().run_line_magic('load_ext', 'lab_black') get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import sys import matplotlib import matplotlib.pyplot as plt import bristol from bristol.spectral import Ergodicity import json ergo = Ergodicity() import torchvision import torchvision.models as models import torch "numpy version:", np.__version__, "matplotlib :", matplotlib.__version__, "Python version:", sys.version, "Bristol version", bristol.__version__, "pytorch:", torch.__version__, "torchvision:", torchvision.__version__ # ## Links # # * [pytorch models](https://pytorch.org/docs/stable/torchvision/models.html) # Pytorch model pre-trained with top1 top5 errors. # * pedrodiamel [visualise layers and networks](https://github.com/pedrodiamel/nettutorial/blob/master/pytorch/pytorch_visualization.ipynb) # * Charles Martin's extraction of weight matrices # | [slice CNN](https://github.com/CalculatedContent/ImplicitSelfRegularization/blob/master/All-pytorch-models-wCNNs-Slices.ipynb) | [pytorch CV calls](https://github.com/CalculatedContent/WeightWatcher/blob/master/WeightWatcher-Full-PyTorchCV.ipynb) | # * `Charles pulls slices from a single layer multiple times on convolutional layers, we will do a reshape and # get a single weight matrix per layer rather than slices.` # * Cyclic list for PES computation # [iterools cycle overflow](https://stackoverflow.com/questions/23416381/circular-list-iterator-in-python) # ## Python Functions # ### Get Layer Matrix Set of pretrained network # In[ ]: def get_layer_matrix_set(pmodel): """ Return layer matrix set of a given pre-trained model Input pmodel : pytorch torchvision pre-trained model Returns: A tuple (A_set, A_set_N, A_set_types) A_set : A list of 2D np-array, weight matrices A_set_N : Shape of NxN matrices. A_set_types : Layer type, pytorch object type that is extracted as 2D weight matrix. """ A_set = [] A_set_N = [] A_set_types = [] for x in pmodel.modules(): type_mod = str(type(x)) # module/method name if "torch.nn.modules" in type_mod: try: layer_weights = torch.Tensor(x.weight) shape_layer = list(layer_weights.shape) len_shape = len(shape_layer) if len_shape >= 2: N = shape_layer[0] M = np.prod(shape_layer[1:]) if N > 1 and M > 1: X = layer_weights Ap = np.array(X.reshape(N, M).detach().numpy()) A = np.matmul(Ap, np.transpose(Ap)) A_set.append(A) A_set_N.append(A.shape) A_set_types.append(type_mod) except: pass return (A_set, A_set_N, A_set_types) # ### Get Layer Matrix Set of pretrained network, test with resnet18 # In[ ]: pmodel = models.resnet18(pretrained=True) # In[ ]: A_t = get_layer_matrix_set(pmodel) # ### Get Eigenvalues of Layer Matrix set # In[ ]: def get_eigenvals_layer_matrix_set(A_set): """ Compute eigenvalues of given set of matrices Input: A_set : list of 2D ndarrays, square real Output eigenvals_set : List of list of eigenvalues """ eigenvals_set = [] for A in A_set: eigen_values = np.linalg.eigvals(np.matmul(A, np.transpose(A))) eigenvals_set.append(eigen_values) return eigenvals_set # ### Get Eigenvalues of Layer Matrix set test with resnet18 # In[ ]: eset = get_eigenvals_layer_matrix_set(A_t[0]) # In[ ]: # len(eset) # In[ ]: # [len(ei) for ei in eset] # ### Convert layer matrix set eigenvalues to periodic set. # In[ ]: from itertools import cycle def list2plist(lst, upper_bound): """ Given list lst ans upper_bound. Return period_lst, cycle. """ pool = cycle(lst) c = 1 lst_period = [] for item in pool: c = c + 1 if isinstance(item, np.complex64): item = item.real # catch for numerical small-unstable numbers lst_period.append(abs(item)) if c > upper_bound: break return lst_period def eigenvals_set_to_periodic(layer_eigens): """ Layer matrix set eigenvalues to periodic set """ upper_bound = np.max([len(e) for e in eset]) eset_period = [list2plist(e, upper_bound) for e in eset] return eset_period # ### Convert layer matrix set eigenvalues to periodic set test with resnet18 # In[ ]: eset_per = eigenvals_set_to_periodic(eset) # In[ ]: np.array(eset_per)[0][999] # ### Compute PSE using periodic set coming from pretrained network # # These methods actuall do not know if eigenvalues comes from pretrained network. # # PSE is quantified by symmetric distance. # In[ ]: def d_layers_pse(eset_per): """ Progression of D_layers given periodic set """ nlayers = len(eset_per) - 1 # minus 1 for the last layer print(nlayers) N = len(eset_per[0]) D_layer = [] for l in np.arange(1, nlayers): eigen_l = np.ravel(np.array(eset_per[0:l])) l1 = l + 1 eigen_l1 = np.ravel(np.array(eset_per[0:l1])) omega_l = ergo.thirumalai_mountain(eigen_l, l, N) omega_l1 = ergo.thirumalai_mountain(eigen_l1, l1, N) dl = ergo.kl_distance_symmetric(omega_l, omega_l1) D_layer.append(dl) return D_layer # ### Compute PSE using periodic set coming from pretrained network test with resnet18 # ## Data generate and results # # # ### Generate data for list of pretrained networks. # In[ ]: netnames = [ "vgg11", "vgg13", "vgg16", "vgg19", "vgg11_bn", "vgg13_bn", "vgg16_bn", "vgg19_bn", "resnet18", "resnet34", "resnet50", "resnet101", "resnet152", ] # In[ ]: d_layers_dict = {} for netname in netnames: print("d_layer for ", netname) pmodel = getattr(models, netname)(pretrained=True) print(type(pmodel)) A_t = get_layer_matrix_set(pmodel) eset = get_eigenvals_layer_matrix_set(A_t[0]) eset_per = eigenvals_set_to_periodic(eset) d_layers = d_layers_pse(eset_per) d_layers_dict[netname] = d_layers # ### Save data # In[ ]: with open("/Users/msuzen/data/d_layers_dict.json", "w") as fp: json.dump(d_layers_dict, fp) # ### Load data. # In[ ]: # read with open("/Users/msuzen/data/d_layers_dict.json", "r") as fp: d_layers_dict = json.load(fp) # In[ ]: d_layers_dict.keys() # ### Resnet results # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') font = {"family": "normal", "weight": "bold", "size": 14} plt.rc("font", **font) Dl_18 = d_layers_dict["resnet18"] Dl_18l = np.log10(Dl_18) m = len(Dl_18) plt.plot(np.arange(1, m + 1), Dl_18l, "-", label="resnet18") Dl_34 = d_layers_dict["resnet34"] Dl_34l = np.log10(Dl_34) m = len(Dl_34) plt.plot(np.arange(1, m + 1), Dl_34l, "x-", label="resnet34") Dl_50 = d_layers_dict["resnet50"] Dl_50l = np.log10(Dl_50) m = len(Dl_50) plt.plot(np.arange(1, m + 1), Dl_50l, "o-", label="resnet50") Dl_101 = d_layers_dict["resnet101"] Dl_101l = np.log10(Dl_101) m = len(Dl_101) plt.plot(np.arange(1, m + 1), Dl_101l, "--", label="resnet101") Dl_152 = d_layers_dict["resnet152"] Dl_152l = np.log10(Dl_152) m = len(Dl_152) plt.plot(np.arange(1, m + 1), Dl_152l, ".-", label="resnet152") plt.legend(loc="upper right") plt.xlabel("Network layer depth", **font) plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font) plt.title("Resnet Pre-trained Architectures ", **font) plt.savefig( "plots/resnet_symmetric_resnet.eps", format="eps", dpi=1000, bbox_inches="tight" ) # plt.cla() # plt.clf() # plt.gca() # plt.gcf() # plt.close() # In[ ]: mean_pse = [ np.mean(Dl_18l), np.mean(Dl_34l), np.mean(Dl_50l), np.mean(Dl_101l), np.mean(Dl_152l), ] # In[ ]: mean_pse # In[ ]: top1_err = [30.24, 26.70, 23.85, 22.63, 21.69] # https://pytorch.org/docs/stable/torchvision/models.html top5_err = [10.92, 8.58, 7.13, 6.44, 5.94] # In[ ]: np.corrcoef(mean_pse, top1_err) # In[ ]: np.corrcoef(mean_pse, top1_err) # In[ ]: np.corrcoef( mean_pse, [1.1, 4.5, 2.3, 2.6, 0.5] ) # some random seq. to see the correlation # ### VGG results # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') font = {"family": "normal", "weight": "bold", "size": 14} # 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn' plt.rc("font", **font) Dl = d_layers_dict["vgg11"] Dl_11 = np.log10(Dl) m = len(Dl) plt.plot(np.arange(1, m + 1), Dl_11, "-", label="vgg11") Dl = d_layers_dict["vgg13"] Dl_13 = np.log10(Dl) m = len(Dl) plt.plot(np.arange(1, m + 1), Dl_13, "x-", label="vgg13") Dl = d_layers_dict["vgg16"] Dl_16 = np.log10(Dl) m = len(Dl) plt.plot(np.arange(1, m + 1), Dl_16, "o-", label="vgg16") Dl = d_layers_dict["vgg19"] Dl_19 = np.log10(Dl) m = len(Dl) plt.plot(np.arange(1, m + 1), Dl_19, "--", label="vgg19") plt.legend(loc="upper right") plt.xlabel("Network layer depth", **font) plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font) plt.title("VGG Pre-trained Architectures", **font) plt.savefig("plots/vgg_symmetric_pse.eps", format="eps", dpi=1000, bbox_inches="tight") # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') font = {"family": "normal", "weight": "bold", "size": 14} # 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn' Dl = d_layers_dict["vgg11_bn"] Dl_11_bn = np.log10(Dl) m = len(Dl) plt.plot(np.arange(1, m + 1), Dl_11_bn, "-", label="vgg11_bn") plt.rc("font", **font) Dl = d_layers_dict["vgg13_bn"] Dl_13_bn = np.log10(Dl) m = len(Dl) plt.plot(np.arange(1, m + 1), Dl_13_bn, "x-", label="vgg13_bn") Dl = d_layers_dict["vgg16_bn"] Dl_16_bn = np.log10(Dl) m = len(Dl) plt.plot(np.arange(1, m + 1), Dl_16_bn, "o-", label="vgg16_bn") Dl = d_layers_dict["vgg19_bn"] Dl_19_bn = np.log10(Dl) m = len(Dl) plt.plot(np.arange(1, m + 1), Dl_19_bn, "--", label="vgg19_bn") plt.legend(loc="upper right") plt.xlabel("Network layer depth", **font) plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font) plt.title("VGG Pre-trained Architectures with BN", **font) plt.savefig( "plots/vgg_symmetric_pse_bn.eps", format="eps", dpi=1000, bbox_inches="tight" ) # In[ ]: mean_pse = [np.mean(Dl_11), np.mean(Dl_13), np.mean(Dl_16), np.mean(Dl_19)] # In[ ]: mean_pse # In[ ]: top1_err = [30.98, 30.07, 28.41, 27.62] # https://pytorch.org/docs/stable/torchvision/models.html top5_err = [11.37, 10.75, 9.62, 9.12] # In[ ]: np.corrcoef(mean_pse, top1_err) # In[ ]: mean_pse = [np.mean(Dl_11_bn), np.mean(Dl_13_bn), np.mean(Dl_16_bn), np.mean(Dl_19_bn)] # In[ ]: np.corrcoef(mean_pse, top5_err) # In[ ]: mean_pse = [np.mean(Dl_11_bn), np.mean(Dl_13_bn), np.mean(Dl_16_bn), np.mean(Dl_19_bn)] top1_err = [29.62, 28.45, 26.63, 25.76] # https://pytorch.org/docs/stable/torchvision/models.html top5_err = [10.19, 9.63, 8.5, 8.15] # In[ ]: mean_pse # In[ ]: np.corrcoef(mean_pse, top1_err) # In[ ]: np.corrcoef(mean_pse, top5_err) # # Compiled results table # # ``` # \begin{table}[] # \centering # \begin{tabular}{|l|l|l|l|} # \hline # Architecture & Top-1 error & Top-5 error & cPSE \\ \hline # vgg11 & 30.98 & 11.37 & 0.04 \\ \hline # vgg13 & 30.07 & 10.75 & 0.41 \\ \hline # vgg16 & 28.41 & 9.63 & 0.14 \\ \hline # vgg19 & 27.62 & 9.12 &-0.10 \\ \hline # vgg11bn & 29.62 & 10.19 & 0.38 \\ \hline # vgg13bn & 28.45 & 9.63 & 0.36 \\ \hline # vgg16bn & 26.63 & 8.50 & 0.18 \\ \hline # vgg19bn & 25.76 & 8.15 &-0.07 \\ \hline # resnet18 & 30.24 & 10.92 &-0.19 \\ \hline # resnet34 & 26.70 & 8.58 &-0.74 \\ \hline # resnet50 & 23.85 & 7.13 &-1.03 \\ \hline # resnet101 & 22.63 & 6.44 &-1.77 \\ \hline # resnet152 & 21.69 & 5.94 &-2.29 \\ \hline # \end{tabular} # \ # \caption{Classification performance and cPSE of investigated architectures. The correlation between # both classification performances and cPSE for ResNet ($\rho=0.94$) for VGG ($\rho=0.44$ and $\rho_{bn}=0.93$ # with batch normalisation.} # \label{corr} # \end{table} # ```