Periodic Spectral Ergodicity: A measure for Neural Architecture Search

(c) 2019
Developed by
Mehmet Süzen
suzen at acm dot org

## Sketch

* Introduce a measure for spectral ergodicity on the set of different length square matrices.
This is called `periodic spectral ergodicity (PSE)`.
* Report `periodic spectral ergodicity (PSE)` measure for pre-trained networks.
* ResNet/VGG variants with top-1/top-5 test errors vs. PSE.
* VGG-11, VGG-13, VGG-16, VGG-19, VGG-11-bn, VGG-13-bn, VGG-16-bn, VGG-19-bn,
RestNet-18, ResNet-34, ResNet-50, ResNet-101, ResNet-152, ResNeXt-101-32x8d,
resnext50_32x4d
* extract 4d/2d weights.
* reshape 4d to 2d.
* Report ensemble sizes/weight matrix sizes
* Answering questions:
* Are there any relationship between network predictive performance and PSE?
* Can we say anyting about PSE causing better architecure?
* How can we use PSE in Neural Architecture Search (NAS)?

import numpy as np
import sys
import matplotlib
import matplotlib.pyplot as plt
import bristol
from bristol.spectral import Ergodicity
import json
ergo = Ergodicity()
import torchvision
import torchvision.models as models
import torch

## Links

* [pytorch models](https://pytorch.org/docs/stable/torchvision/models.html)
Pytorch model pre-trained with top1 top5 errors.
* pedrodiamel [visualise layers and networks](https://github.com/pedrodiamel/nettutorial/blob/master/pytorch/pytorch_visualization.ipynb)
* Charles Martin's extraction of weight matrices
| [slice CNN](https://github.com/CalculatedContent/ImplicitSelfRegularization/blob/master/All-pytorch-models-wCNNs-Slices.ipynb) | [pytorch CV calls](https://github.com/CalculatedContent/WeightWatcher/blob/master/WeightWatcher-Full-PyTorchCV.ipynb) |
* `Charles pulls slices from a single layer multiple times on convolutional layers, we will do a reshape and
get a single weight matrix per layer rather than slices.`
* Cyclic list for PES computation
[iterools cycle overflow](https://stackoverflow.com/questions/23416381/circular-list-iterator-in-python)

## Python Functions

### Get Layer Matrix Set of pretrained network

def get_layer_matrix_set(pmodel):
    """
    Return layer matrix set of a given pre-trained model
    Input
       pmodel : pytorch torchvision pre-trained model
    Returns:
       A tuple (A_set, A_set_N, A_set_types)
       A_set      : A list of 2D np-array, weight matrices
       A_set_N    : Shape of NxN matrices. A_set_types : Layer type, pytorch object type that is extracted as 2D weight matrix. A_set_types : Layer type, pytorch object type that is extracted as 2D weight matrix.
    """
    A_set = []
    A_set_N = []
    A_set_types = []
    for x in pmodel.modules():
        type_mod = str(type(x))  # module/method name
        if "torch.nn.modules" in type_mod:
            try:
                layer_weights = torch.Tensor(x.weight)
                shape_layer = list(layer_weights.shape)
                len_shape = len(shape_layer)
                if len_shape >= 2:
                    N = shape_layer[0]
                    M = np.prod(shape_layer[1:])
                    if N > 1 and M > 1:
                        X = layer_weights
                        Ap = np.array(X.reshape(N, M).detach().numpy())
                        A = np.matmul(Ap, np.transpose(Ap))
                        A_set.append(A)
                        A_set_N.append(A.shape)
                        A_set_types.append(type_mod)
            except:
                pass
    return (A_set, A_set_N, A_set_types)

### Get Layer Matrix Set of pretrained network, test with resnet18

pmodel = models.resnet18(pretrained=True)

A_t = get_layer_matrix_set(pmodel)

### Get Eigenvalues of Layer Matrix set

def get_eigenvals_layer_matrix_set(A_set):
    """
    Compute eigenvalues of given set of matrices
    Input:
       A_set : list of 2D ndarrays, square real
    Output
       eigenvals_set : List of list of eigenvalues
    """
    eigenvals_set = []
    for A in A_set:
        eigen_values = np.linalg.eigvals(np.matmul(A, np.transpose(A)))
        eigenvals_set.append(eigen_values)
    return eigenvals_set

### Get Eigenvalues of Layer Matrix set test with resnet18

eset = get_eigenvals_layer_matrix_set(A_t[0])

### Convert layer matrix set eigenvalues to periodic set.

from itertools import cycle


def list2plist(lst, upper_bound):
    """
    Given list lst ans upper_bound. Return period_lst, cycle. Return period_lst, cycle.
    """
    pool = cycle(lst)
    c = 1
    lst_period = []
    for item in pool:
        c = c + 1
        if isinstance(item, np.complex64):
            item = item.real  # catch for numerical small-unstable numbers
        lst_period.append(abs(item))
        if c > upper_bound:
            break
    return lst_period


def eigenvals_set_to_periodic(layer_eigens):
    """
    Layer matrix set eigenvalues to periodic set
    """
    upper_bound = np.max([len(e) for e in eset])
    eset_period = [list2plist(e, upper_bound) for e in eset]
    return eset_period

### Convert layer matrix set eigenvalues to periodic set test with resnet18

eset_per = eigenvals_set_to_periodic(eset)

np.array(eset_per)[0][999]

### Compute PSE using periodic set coming from pretrained network

These methods actuall do not know if eigenvalues comes from pretrained network.

PSE is quantified by symmetric distance.

def d_layers_pse(eset_per):
    """
    Progression of D_layers given periodic set
    """
    nlayers = len(eset_per) - 1  # minus 1 for the last layer
    print(nlayers)
    N = len(eset_per[0])
    D_layer = []
    for l in np.arange(1, nlayers):
        eigen_l = np.ravel(np.array(eset_per[0:l]))
        l1 = l + 1
        eigen_l1 = np.ravel(np.array(eset_per[0:l1]))
        omega_l = ergo.thirumalai_mountain(eigen_l, l, N)
        omega_l1 = ergo.thirumalai_mountain(eigen_l1, l1, N)
        dl = ergo.kl_distance_symmetric(omega_l, omega_l1)
        D_layer.append(dl)
    return D_layer ### Compute PSE using periodic set coming from pretrained network test with resnet18

## Data generate and results

### Generate data for list of pretrained networks.

netnames = [
    "vgg11",
    "vgg13",
    "vgg16",
    "vgg19",
    "vgg11_bn",
    "vgg13_bn",
    "vgg16_bn",
    "vgg19_bn",
    "resnet18",
    "resnet34",
    "resnet50",
    "resnet101",
    "resnet152",
]

d_layers_dict = {}
for netname in netnames:
    print("d_layer for ", netname)
    pmodel = getattr(models, netname)(pretrained=True)
    print(type(pmodel))
    A_t = get_layer_matrix_set(pmodel)
    eset = get_eigenvals_layer_matrix_set(A_t[0])
    eset_per = eigenvals_set_to_periodic(eset)
    d_layers = d_layers_pse(eset_per)
    d_layers_dict[netname] = d_layers

### Save data

with open("/Users/msuzen/data/d_layers_dict.json", "w") as fp:
    json.dump(d_layers_dict, fp)

### Load data.

with open("/Users/msuzen/data/d_layers_dict.json", "r") as fp:
    d_layers_dict = json.load(fp)

d_layers_dict.keys()

### Resnet results

font = {"family": "normal", "weight": "bold", "size": 14}
plt.rc("font", **font)
Dl_18 = d_layers_dict["resnet18"]
Dl_18l = np.log10(Dl_18)
m = len(Dl_18)
plt.plot(np.arange(1, m + 1), Dl_18l, "-", label="resnet18")
Dl_34 = d_layers_dict["resnet34"]
Dl_34l = np.log10(Dl_34)
m = len(Dl_34)
plt.plot(np.arange(1, m + 1), Dl_34l, "x-", label="resnet34")
Dl_50 = d_layers_dict["resnet50"]
Dl_50l = np.log10(Dl_50)
m = len(Dl_50)
plt.plot(np.arange(1, m + 1), Dl_50l, "o-", label="resnet50")
Dl_101 = d_layers_dict["resnet101"]
Dl_101l = np.log10(Dl_101)
m = len(Dl_101)
plt.plot(np.arange(1, m + 1), Dl_101l, "--", label="resnet101")
Dl_152 = d_layers_dict["resnet152"]
Dl_152l = np.log10(Dl_152)
m = len(Dl_152)
plt.plot(np.arange(1, m + 1), Dl_152l, ".-", label="resnet152")
plt.legend(loc="upper right")
plt.xlabel("Network layer depth", **font)
plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font)
plt.title("Resnet Pre-trained Architectures ", **font)
plt.savefig(
    "plots/resnet_symmetric_resnet.eps", format="eps", dpi=1000, bbox_inches="tight"
) mean_pse = [
    np.mean(Dl_18l),
    np.mean(Dl_34l),
    np.mean(Dl_50l),
    np.mean(Dl_101l),
    np.mean(Dl_152l),
]

mean_pse

top1_err = [30.24, 26.70, 23.85, 22.63, 21.69]  # https://pytorch.org/docs/stable/torchvision/models.html
top5_err = [10.92, 8.58, 7.13, 6.44, 5.94]

np.corrcoef(mean_pse, top1_err)

np.corrcoef(mean_pse, top1_err)

np.corrcoef(
    mean_pse, [1.1, 4.5, 2.3, 2.6, 0.5]
)  # some random seq. to see the correlation

### VGG results

font = {"family": "normal", "weight": "bold", "size": 14}
# 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn'
plt.rc("font", **font)
Dl = d_layers_dict["vgg11"]
Dl_11 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_11, "-", label="vgg11")
Dl = d_layers_dict["vgg13"]
Dl_13 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_13, "x-", label="vgg13")
Dl = d_layers_dict["vgg16"]
Dl_16 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_16, "o-", label="vgg16")
Dl = d_layers_dict["vgg19"]
Dl_19 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_19, "--", label="vgg19")
plt.legend(loc="upper right")
plt.xlabel("Network layer depth", **font)
plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font)
plt.title("VGG Pre-trained Architectures", **font)
plt.savefig("plots/vgg_symmetric_pse.eps", format="eps", dpi=1000, bbox_inches="tight") font = {"family": "normal", "weight": "bold", "size": 14}
# 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn'
Dl = d_layers_dict["vgg11_bn"]
Dl_11_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_11_bn, "-", label="vgg11_bn")
plt.rc("font", **font)
Dl = d_layers_dict["vgg13_bn"]
Dl_13_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_13_bn, "x-", label="vgg13_bn")
Dl = d_layers_dict["vgg16_bn"]
Dl_16_bn = np.log10( In[ ]: mean_pse = [np.mean(Dl_11), np.mean(Dl_13), np.mean(Dl_16), np.mean(Dl_19)] # In[ ]: mean_pse # In[ ]: top1_err = [30.98, 30.07, 28.41, 27.62] # https://pytorch.org/docs/stable/torchvision/models.html top5_err = [11.37, 10.75, 9.62, 9.12] # In[ ]: np.corrcoef(mean_pse, top1_err) # In[ ]: mean_pse = [np.mean(Dl_11_bn), np.mean(Dl_13_bn), np.mean(Dl_16_bn), np.mean(Dl_19_bn)] # In[ ]: np.corrcoef(mean_pse, top5_err) # In[ ]: mean_pse = [np.mean(Dl_11_bn), np.mean(Dl_13_bn), np.mean(Dl_16_bn), np.mean(Dl_19_bn)] top1_err = [29.62, 28.45, 26.63, 25.76] # https://pytorch.org/docs/stable/torchvision/models.html top5_err = [10.19, 9.63, 8.5, 8.15] # In[ ]: mean_pse # In[ ]: np.corrcoef(mean_pse, top1_err) # In[ ]: np.corrcoef(mean_pse, top5_err) # # Compiled results table # # ``` # \begin{table}[] # \centering # \begin{tabular}{|l|l|l|l|} # \hline # Architecture & Top-1 error & Top-5 error & cPSE \\ \hline # vgg11 & 30.98 & 11.37 & 0.04 \\ \hline # vgg13 & 30.07 & 10.75 & 0.41 \\ \hline # vgg16 & 28.41 & 9.63 & 0.14 \\ \hline # vgg19 & 27.62 & 9.12 &-0.10 \\ \hline # vgg11bn & 29.62 & 10.19 & 0.38 \\ \hline # vgg13bn & 28.45 & 9.63 & 0.36 \\ \hline # vgg16bn & 26.63 & 8.50 & 0.18 \\ \hline # vgg19bn & 25.76 & 8.15 &-0.07 \\ \hline # resnet18 & 30.24 & 10.92 &-0.19 \\ \hline # resnet34 & 26.70 & 8.58 &-0.74 \\ \hline # resnet50 & 23.85 & 7.13 &-1.03 \\ \hline # resnet101 & 22.63 & 6.44 &-1.77 \\ \hline # resnet152 & 21.69 & 5.94 &-2.29 \\ \hline # \end{tabular} # \ # \caption{Classification performance and cPSE of investigated architectures. The correlation between # both classification performances and cPSE for ResNet ($\rho=0.94$) for VGG ($\rho=0.44$ and $\rho_{bn}=0.93$ # with batch normalisation.} # \label{corr} # \end{table} # ```