#!/usr/bin/env python
# coding: utf-8

# # Periodic Spectral Ergodicity: A measure for Neural Architecture Search
# 
# (c) 2019   
#     Developed by  
#     Mehmet Süzen        
#     suzen at acm dot org  

# ## Sketch
# 
# * Introduce a measure for spectral ergodicity on the set of different length square matrices.  
#   This is called `periodic spectral ergodicity (PSE)`.
# * Report `periodic spectral ergodicity (PSE)` measure for pre-trained networks.
#     * ResNet/VGG variants with top-1/top-5 test errors vs. PSE.
#         * VGG-11, VGG-13, VGG-16, VGG-19, VGG-11-bn, VGG-13-bn, VGG-16-bn, VGG-19-bn,   
#           RestNet-18, ResNet-34, ResNet-50, ResNet-101, ResNet-152, ResNeXt-101-32x8d, 
#           resnext50_32x4d
#         * extract 4d/2d weights.
#         * reshape 4d to 2d.
#     * Report ensemble sizes/weight matrix sizes
# * Answering questions:
#   * Are there any relationship between network predictive performance and PSE?
#   * Can we say anyting about PSE causing better architecure?
#   * How can we use PSE in Neural Architecture Search (NAS)?

# ```
# ('numpy version:',
#  '1.17.2',
#  'matplotlib :',
#  '3.1.1',
#  'Python version:',
#  '3.7.3 (default, Mar 27 2019, 16:54:48) \n[Clang 4.0.1 (tags/RELEASE_401/final)]',
#  'Bristol version',
#  '0.2.5',
#  'pytorch:',
#  '1.3.0',
#  'torchvision:',
#  '0.4.1a0+d94043a')
#  ```

# In[ ]:


get_ipython().run_line_magic('load_ext', 'lab_black')
get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import sys
import matplotlib
import matplotlib.pyplot as plt
import bristol
from bristol.spectral import Ergodicity
import json

ergo = Ergodicity()

import torchvision
import torchvision.models as models
import torch

"numpy version:", np.__version__, "matplotlib :", matplotlib.__version__, "Python version:", sys.version, "Bristol version", bristol.__version__, "pytorch:", torch.__version__, "torchvision:", torchvision.__version__


# ## Links
# 
# * [pytorch models](https://pytorch.org/docs/stable/torchvision/models.html)   
#   Pytorch model pre-trained with top1 top5 errors.
# * pedrodiamel [visualise layers and networks](https://github.com/pedrodiamel/nettutorial/blob/master/pytorch/pytorch_visualization.ipynb)
# * Charles Martin's extraction of weight matrices
#   | [slice CNN](https://github.com/CalculatedContent/ImplicitSelfRegularization/blob/master/All-pytorch-models-wCNNs-Slices.ipynb) | [pytorch CV calls](https://github.com/CalculatedContent/WeightWatcher/blob/master/WeightWatcher-Full-PyTorchCV.ipynb) | 
#       * `Charles pulls slices from a single layer multiple times on convolutional layers, we will do a reshape and 
#         get a single weight matrix per layer rather than slices.`
# * Cyclic list for PES computation 
#   [iterools cycle overflow](https://stackoverflow.com/questions/23416381/circular-list-iterator-in-python)

# ## Python Functions

# ### Get Layer Matrix Set of pretrained network

# In[ ]:


def get_layer_matrix_set(pmodel):
    """
    
    Return layer matrix set of a given pre-trained model 
    
    Input
    
    pmodel : pytorch torchvision pre-trained model
    
    Returns:
    
    A tuple (A_set, A_set_N, A_set_types)
        A_set       : A list of 2D np-array, weight matrices
        A_set_N     : Shape of NxN matrices. 
        A_set_types : Layer type, pytorch object type that is
                      extracted as 2D weight matrix.
    
    """
    A_set = []
    A_set_N = []
    A_set_types = []
    for x in pmodel.modules():
        type_mod = str(type(x))  # module/method name
        if "torch.nn.modules" in type_mod:
            try:
                layer_weights = torch.Tensor(x.weight)
                shape_layer = list(layer_weights.shape)
                len_shape = len(shape_layer)
                if len_shape >= 2:
                    N = shape_layer[0]
                    M = np.prod(shape_layer[1:])
                    if N > 1 and M > 1:
                        X = layer_weights
                        Ap = np.array(X.reshape(N, M).detach().numpy())
                        A = np.matmul(Ap, np.transpose(Ap))
                        A_set.append(A)
                        A_set_N.append(A.shape)
                        A_set_types.append(type_mod)
            except:
                pass
    return (A_set, A_set_N, A_set_types)


# ### Get Layer Matrix Set of pretrained network, test with resnet18

# In[ ]:


pmodel = models.resnet18(pretrained=True)


# In[ ]:


A_t = get_layer_matrix_set(pmodel)


# ### Get Eigenvalues of Layer Matrix set

# In[ ]:


def get_eigenvals_layer_matrix_set(A_set):
    """
    
    Compute eigenvalues of given set of matrices
    
    Input: 
    
    A_set : list of 2D ndarrays, square real 
    
    Output
    eigenvals_set : List of list of eigenvalues
    
    
    """
    eigenvals_set = []
    for A in A_set:
        eigen_values = np.linalg.eigvals(np.matmul(A, np.transpose(A)))
        eigenvals_set.append(eigen_values)
    return eigenvals_set


# ### Get Eigenvalues of Layer Matrix set test with resnet18

# In[ ]:


eset = get_eigenvals_layer_matrix_set(A_t[0])


# In[ ]:


# len(eset)


# In[ ]:


# [len(ei) for ei in eset]


# ### Convert layer matrix set eigenvalues to periodic set.

# In[ ]:


from itertools import cycle


def list2plist(lst, upper_bound):
    """
    
    Given list lst ans upper_bound.
    Return period_lst, cycle. 
    
    """
    pool = cycle(lst)
    c = 1
    lst_period = []
    for item in pool:
        c = c + 1
        if isinstance(item, np.complex64):
            item = item.real  # catch for numerical small-unstable numbers
        lst_period.append(abs(item))
        if c > upper_bound:
            break
    return lst_period


def eigenvals_set_to_periodic(layer_eigens):
    """
    
    Layer matrix set eigenvalues to periodic set
    
    """
    upper_bound = np.max([len(e) for e in eset])
    eset_period = [list2plist(e, upper_bound) for e in eset]
    return eset_period


# ### Convert layer matrix set eigenvalues to periodic set test with resnet18

# In[ ]:


eset_per = eigenvals_set_to_periodic(eset)


# In[ ]:


np.array(eset_per)[0][999]


# ### Compute PSE using periodic set coming from pretrained network
# 
# These methods actuall do not know if eigenvalues comes from pretrained network.
# 
# PSE is quantified by symmetric distance.

# In[ ]:


def d_layers_pse(eset_per):
    """
    
    Progression of D_layers given periodic set
    
    """
    nlayers = len(eset_per) - 1  # minus 1 for the last layer
    print(nlayers)
    N = len(eset_per[0])
    D_layer = []
    for l in np.arange(1, nlayers):
        eigen_l = np.ravel(np.array(eset_per[0:l]))
        l1 = l + 1
        eigen_l1 = np.ravel(np.array(eset_per[0:l1]))
        omega_l = ergo.thirumalai_mountain(eigen_l, l, N)
        omega_l1 = ergo.thirumalai_mountain(eigen_l1, l1, N)
        dl = ergo.kl_distance_symmetric(omega_l, omega_l1)
        D_layer.append(dl)
    return D_layer


# ### Compute PSE using periodic set coming from pretrained network test with resnet18

# ## Data generate and results
# 
# 

# ### Generate data for list of pretrained networks.

# In[ ]:


netnames = [
    "vgg11",
    "vgg13",
    "vgg16",
    "vgg19",
    "vgg11_bn",
    "vgg13_bn",
    "vgg16_bn",
    "vgg19_bn",
    "resnet18",
    "resnet34",
    "resnet50",
    "resnet101",
    "resnet152",
]


# In[ ]:


d_layers_dict = {}
for netname in netnames:
    print("d_layer for ", netname)
    pmodel = getattr(models, netname)(pretrained=True)
    print(type(pmodel))
    A_t = get_layer_matrix_set(pmodel)
    eset = get_eigenvals_layer_matrix_set(A_t[0])
    eset_per = eigenvals_set_to_periodic(eset)
    d_layers = d_layers_pse(eset_per)
    d_layers_dict[netname] = d_layers


# ### Save data

# In[ ]:


with open("/Users/msuzen/data/d_layers_dict.json", "w") as fp:
    json.dump(d_layers_dict, fp)


# ### Load data.

# In[ ]:


# read
with open("/Users/msuzen/data/d_layers_dict.json", "r") as fp:
    d_layers_dict = json.load(fp)


# In[ ]:


d_layers_dict.keys()


# ### Resnet results

# In[ ]:


get_ipython().run_line_magic('matplotlib', 'inline')
font = {"family": "normal", "weight": "bold", "size": 14}

plt.rc("font", **font)
Dl_18 = d_layers_dict["resnet18"]
Dl_18l = np.log10(Dl_18)
m = len(Dl_18)
plt.plot(np.arange(1, m + 1), Dl_18l, "-", label="resnet18")
Dl_34 = d_layers_dict["resnet34"]
Dl_34l = np.log10(Dl_34)
m = len(Dl_34)
plt.plot(np.arange(1, m + 1), Dl_34l, "x-", label="resnet34")
Dl_50 = d_layers_dict["resnet50"]
Dl_50l = np.log10(Dl_50)
m = len(Dl_50)
plt.plot(np.arange(1, m + 1), Dl_50l, "o-", label="resnet50")
Dl_101 = d_layers_dict["resnet101"]
Dl_101l = np.log10(Dl_101)
m = len(Dl_101)
plt.plot(np.arange(1, m + 1), Dl_101l, "--", label="resnet101")
Dl_152 = d_layers_dict["resnet152"]
Dl_152l = np.log10(Dl_152)
m = len(Dl_152)
plt.plot(np.arange(1, m + 1), Dl_152l, ".-", label="resnet152")
plt.legend(loc="upper right")
plt.xlabel("Network layer depth", **font)
plt.ylabel("Approach to PSE:  $\log_{10} D_{pse}$", **font)
plt.title("Resnet Pre-trained Architectures  ", **font)
plt.savefig(
    "plots/resnet_symmetric_resnet.eps", format="eps", dpi=1000, bbox_inches="tight"
)
# plt.cla()
# plt.clf()
# plt.gca()
# plt.gcf()
# plt.close()


# In[ ]:


mean_pse = [
    np.mean(Dl_18l),
    np.mean(Dl_34l),
    np.mean(Dl_50l),
    np.mean(Dl_101l),
    np.mean(Dl_152l),
]


# In[ ]:


mean_pse


# In[ ]:


top1_err = [30.24, 26.70, 23.85, 22.63, 21.69]
# https://pytorch.org/docs/stable/torchvision/models.html
top5_err = [10.92, 8.58, 7.13, 6.44, 5.94]


# In[ ]:


np.corrcoef(mean_pse, top1_err)


# In[ ]:


np.corrcoef(mean_pse, top1_err)


# In[ ]:


np.corrcoef(
    mean_pse, [1.1, 4.5, 2.3, 2.6, 0.5]
)  # some random seq. to see the correlation


# ### VGG results

# In[ ]:


get_ipython().run_line_magic('matplotlib', 'inline')
font = {"family": "normal", "weight": "bold", "size": 14}

# 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn'

plt.rc("font", **font)
Dl = d_layers_dict["vgg11"]
Dl_11 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_11, "-", label="vgg11")

Dl = d_layers_dict["vgg13"]
Dl_13 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_13, "x-", label="vgg13")

Dl = d_layers_dict["vgg16"]
Dl_16 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_16, "o-", label="vgg16")

Dl = d_layers_dict["vgg19"]
Dl_19 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_19, "--", label="vgg19")


plt.legend(loc="upper right")
plt.xlabel("Network layer depth", **font)
plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font)
plt.title("VGG Pre-trained Architectures", **font)
plt.savefig("plots/vgg_symmetric_pse.eps", format="eps", dpi=1000, bbox_inches="tight")


# In[ ]:


get_ipython().run_line_magic('matplotlib', 'inline')
font = {"family": "normal", "weight": "bold", "size": 14}

# 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn'

Dl = d_layers_dict["vgg11_bn"]
Dl_11_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_11_bn, "-", label="vgg11_bn")

plt.rc("font", **font)
Dl = d_layers_dict["vgg13_bn"]
Dl_13_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_13_bn, "x-", label="vgg13_bn")

Dl = d_layers_dict["vgg16_bn"]
Dl_16_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_16_bn, "o-", label="vgg16_bn")

Dl = d_layers_dict["vgg19_bn"]
Dl_19_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_19_bn, "--", label="vgg19_bn")

plt.legend(loc="upper right")
plt.xlabel("Network layer depth", **font)
plt.ylabel("Approach to PSE:  $\log_{10} D_{pse}$", **font)
plt.title("VGG Pre-trained Architectures with BN", **font)
plt.savefig(
    "plots/vgg_symmetric_pse_bn.eps", format="eps", dpi=1000, bbox_inches="tight"
)


# In[ ]:


mean_pse = [np.mean(Dl_11), np.mean(Dl_13), np.mean(Dl_16), np.mean(Dl_19)]


# In[ ]:


mean_pse


# In[ ]:


top1_err = [30.98, 30.07, 28.41, 27.62]
# https://pytorch.org/docs/stable/torchvision/models.html
top5_err = [11.37, 10.75, 9.62, 9.12]


# In[ ]:


np.corrcoef(mean_pse, top1_err)


# In[ ]:


mean_pse = [np.mean(Dl_11_bn), np.mean(Dl_13_bn), np.mean(Dl_16_bn), np.mean(Dl_19_bn)]


# In[ ]:


np.corrcoef(mean_pse, top5_err)


# In[ ]:


mean_pse = [np.mean(Dl_11_bn), np.mean(Dl_13_bn), np.mean(Dl_16_bn), np.mean(Dl_19_bn)]
top1_err = [29.62, 28.45, 26.63, 25.76]
# https://pytorch.org/docs/stable/torchvision/models.html
top5_err = [10.19, 9.63, 8.5, 8.15]


# In[ ]:


mean_pse


# In[ ]:


np.corrcoef(mean_pse, top1_err)


# In[ ]:


np.corrcoef(mean_pse, top5_err)


# # Compiled results table
# 
# ```
# \begin{table}[]
# \centering
# \begin{tabular}{|l|l|l|l|}
# \hline
# Architecture  & Top-1 error  &  Top-5 error  & cPSE   \\ \hline
# vgg11         & 30.98        &   11.37       & 0.04   \\ \hline
# vgg13         & 30.07        &   10.75       & 0.41   \\ \hline
# vgg16         & 28.41        &    9.63       & 0.14   \\ \hline
# vgg19         & 27.62        &    9.12       &-0.10   \\ \hline
# vgg11bn       & 29.62        &   10.19       & 0.38   \\ \hline
# vgg13bn       & 28.45        &    9.63       & 0.36   \\ \hline
# vgg16bn       & 26.63        &    8.50       & 0.18   \\ \hline
# vgg19bn       & 25.76        &    8.15       &-0.07   \\ \hline
# resnet18      & 30.24        &   10.92       &-0.19   \\ \hline
# resnet34      & 26.70        &    8.58       &-0.74   \\ \hline
# resnet50      & 23.85        &    7.13       &-1.03   \\ \hline
# resnet101     & 22.63        &    6.44       &-1.77   \\ \hline
# resnet152     & 21.69        &    5.94       &-2.29   \\ \hline
# \end{tabular}
# \
# \caption{Classification performance and cPSE of investigated architectures. The correlation between
# both classification performances and cPSE for ResNet ($\rho=0.94$)  for VGG ($\rho=0.44$ and $\rho_{bn}=0.93$
# with batch normalisation.}
# \label{corr}
# \end{table}
# ```