Notebook

Comparison of different sampling strategies for GDS¶

In [61]:

%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

In [62]:

# Importing needed libraries 
import matplotlib
import networkx as nx
import random
import numpy as np
import copy
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

from scipy.sparse import csgraph
from scipy.sparse.linalg import eigsh, svds
from scipy.linalg import eigh, orth, null_space

from IPython.display import display, clear_output
import ipywidgets as widgets
from ipywidgets import Button, Layout, GridspecLayout

In [63]:

# nbi:hide_in

# Creating the graph class
class Graph(object):
    r"""
    Args:
        edges ([num_edges, 3] array): Graph connectivity in COO format 
        (instead of saving the adjacency matrix coo format saves only the node 
        values so the weights need to be given separetely). Third argument is 
        the weight. 
    """
    def __init__(self,  numNodes=1, edges=[], samples=[], **kwargs):
        self.edges = edges
        self.numNodes = numNodes
        self.nodes = [i for i in range(numNodes)]
        self.samples = samples
        self.pos = None
        
    def adj(self):
        adjacency_matr = np.zeros([self.numNodes, self.numNodes])
        for idx, row in enumerate(self.edges):
            ind1 = self.nodes.index(row[0])
            ind2 = self.nodes.index(row[1])
            adjacency_matr[ind1, ind2] = row[2]
            adjacency_matr[ind2, ind1] = adjacency_matr[ind1, ind2]
        return adjacency_matr
    
    def degrees(self):
        adj = self.adj()
        degrees = np.sum(adj, axis=0)
        return degrees
    
    def laplacian(self):
        Adj = self.adj()
        D = np.diag(self.degrees())
        Lap = D - Adj
        return Lap
        
    def add_node(self):
        self.numNodes += 1
        self.nodes.append(max(self.nodes)+1)
            
    def add_edge(self, edge):
        if edge!=None:
            self.edges.append(edge)
            
    def add_sample(self, node):
        if node not in self.samples:
            self.samples.append(node)
            
    def del_sample(self, node):
        if node in self.samples:
            self.samples.remove(node)
            
    def del_node(self, node):
        if node in self.nodes:
            self.numNodes-=1
            self.edges = [item for item in self.edges if item[0]!=node and item[1]!=node]
            self.nodes.remove(node)
            self.del_sample(node)
    
    def del_edge(self, pair):
        self.edges[:] = [item for item in self.edges if item[:2]!=pair and item[:2]!=(pair[1], pair[0])]
        
    def change_edge(self, newedge):
        for edge in self.edges:
            if (edge[0], edge[1])==(newedge[0], newedge[1]) or (edge[1], edge[0])==(newedge[0], newedge[1]):
                self.del_edge((newedge[0], newedge[1]))
                self.add_edge(newedge)
                       
    #reset graph
    def reset(self):
        self.numNodes = 1
        self.nodes = [i for i in range(self.numNodes)]
        self.edges = []
        self.pos=None
    
    def lapl_eigen(self, dim=None):
        Lap=self.laplacian()
        if dim==None:
            dim=G.numNodes
        vals, U = eigh(Lap, subset_by_index=[0,dim-1])
        return vals, U
    
    def adjacent2(self):
        """Return the adjoint nodes for given node"""
        adjacency = {node:[] for node in self.nodes}
        for edge in self.edges:
            adjacency[edge[0]].append(edge[1])
            adjacency[edge[1]].append(edge[0])
        return adjacency
    
    def is_connected(self):
        """Check if the graph is connected using width-first search"""
        adjacency = self.adjacent2()
        count=0
        found = {i:False for i in self.nodes}
        Q = []
        Q.append(0)
        while Q: # checks if Q is empty
            nhbs = adjacency[Q[0]]
            for node in nhbs:
                if found[node]==False:
                    count+=1
                    found[node]=True
                    Q.append(node)
            Q.pop(0)
        if count==self.numNodes:
            return True
        else:
            return False
        
    def draw(self, ax, output=None, update=False, title=None, show_eigen=False, k=None, labels=None):
        #create the networkx graph
        Gnx = nx.Graph()
        Gnx.add_nodes_from(self.nodes)
        Gnx.add_weighted_edges_from(self.edges)
        if self.pos==None or update==True:
            self.pos = nx.spring_layout(Gnx)
            
        # colors
        if labels!=None:
            if k==None:
                k=len(set(labels))
            colors = plt.cm.get_cmap('tab20', k)
            color_label = colors(labels)
            node_colors = [(1.0, 1.0, 0.7, 1.0) if node in G.samples else color_label[node] for node in G.nodes]
            node_edges = [color_label[node] for node in G.nodes]
        else:
            node_colors = [(1.0, 1.0, 0.7, 1.0) if node in G.samples else (0.15, 0.5, 0.7, 1.) for node in G.nodes]
            node_edges = (0.15, 0.5, 0.7, 1.)
        
        #plot                
        if output==None:
            ax.cla()
            nx.draw_networkx(Gnx, ax=ax, node_color=node_colors, edgecolors=node_edges, node_size=400, 
                             pos=self.pos)
            ax.set_title(title)
            display(fig); 
        else:
            with output:                 
                ax.cla()
                nx.draw_networkx(Gnx, ax=ax, node_color=node_colors, edgecolors=node_edges, node_size=400, 
                             pos=self.pos)
                ax.set_title(title)
                display(fig); 
                
                if show_eigen==True:
                    eig, U = self.lapl_eigen()
                    display("Laplacian eigenvalues are")
                    display(eig)
                    display("Laplacian eigenvectors are")
                    display(U)

In [64]:

def dynamic(A, numIter, V):
    Mat = np.eye(A.shape[0])
    for i in range(numIter-1):
        Mat = np.concatenate([np.eye(A.shape[0]), Mat @ A])
    F = Mat @ V
    return F.reshape(A.shape[0], numIter*V.shape[1], order="F")
            
def gds(G, pw_dim, numIter, output, options=0):
    # sampling matrix
    S = np.zeros([G.numNodes, len(G.samples)])
    for j, node in enumerate(G.samples):
        i = G.nodes.index(node)
        S[i, j]=1
    
    # Compute PW eigenvectors
    vals, U =  G.lapl_eigen(pw_dim)
                            
    # Compute the dynamical sampling vectors
    if options==0:
        Lap=G.laplacian()
        B = dynamic(Lap, numIter, S)
    if options==1:
        Adj=G.adj()
        B = dynamic(Adj, numIter, S)
                            
    # Project onto PW space
    PF = U.transpose() @ B
                            
    # Compute frame bounds
    Frame_op = PF @ PF.transpose()
    low = svds(Frame_op, k=1, which='SM', return_singular_vectors=False)[0]
    up = svds(Frame_op, k=1, which='LM', return_singular_vectors=False)[0]
#     cond = np.linalg.cond(Frame_op)

    # print
    if output==None:
        cond = np.linalg.cond(Frame_op)
        return cond
    else:
        with output:
            display("Lower frame bound = {:.2e}".format(low), "Upper frame bound = {:.2e}".format(up))
            if low!=0:
                display("Condition number = {:.2e}".format(up/low))
    #             display("Condition number form numpy = {:.2e}".format(cond))

In [65]:

# Generate a random connected graph           
def random_connected_graph(numNodes, numEdges):
    """Uses rejection sampling to uniformly sample a connected graph"""
    G = Graph(numNodes)
    
    if numEdges<numNodes-1:
        raise ValueError("Not enough edges")
    if numEdges>numNodes*(numNodes):
        raise ValueError("Too many edges")
    all_edges = [(i,j,1) for i in range(numNodes) for j in range(i)]
    while True:
        G.edges = random.sample(all_edges, numEdges)
        if G.is_connected():
            break
        # breadth first search to determine if it is connected
    return G

In [66]:

# Samples from the cluster using sklearn

def Kmeans_sklrn(X, k):
    """Assigns the rows of X into k clusters"""
    kmeans = KMeans(n_clusters=k).fit(X)
    labels = list(kmeans.labels_)
    
    clusters = {i:[] for i in range(k)}
    
    for indx, item in enumerate(labels):
        clusters[item].append(indx)
    
    return labels, clusters

def clustered_samples(clusters, degrees, order="min"):
    """In each cluster pick the node with largest degree"""
    if order=="min":
        samples = [clusters[key][np.argmin(degrees[clusters[key]])] for key in clusters.keys()]
    if order=="max":
        samples = [clusters[key][np.argmax(degrees[clusters[key]])] for key in clusters.keys()]
    return samples 

In [67]:

# Greedy sample

def dynamic_basis(G, numIter, tol=0.1):
    Lap=G.laplacian()
    basis = {}
    for node in G.nodes:
        # the Diract vector
        vec = np.zeros([G.numNodes,1])
        i = G.nodes.index(node)
        vec[i] = 1
        # the iterated system 
        B = dynamic(Lap, numIter, vec)
        Bsvd = orth(B, tol)
        basis[node] = Bsvd
    return basis
    

def greedy_node(G, samples, basis, U, criterion):
    scores = {}
    for node in G.nodes:
        if node not in samples:
            Bsvd = basis[node]
            C = U.transpose() @ Bsvd
            if criterion=="frob":
                scores[node] = np.linalg.norm(C)
            elif criterion=="cond":
                scores[node] = np.linalg.cond(C)
    if criterion=="frob":
        # add the new sample with largest correlation
        new_sample = max(scores, key=scores.get)    
    elif criterion=="cond":
        # add the new sample with smallest condition number
        new_sample = min(scores, key=scores.get)    
    samples.append(new_sample)
    # update the recovery space
    V = basis[new_sample]
    Proj = U.transpose() @ V
    basisProj = orth(Proj)
    complement = np.eye(U.shape[1]) - basisProj @ basisProj.transpose()
    U = orth(U @ complement)
    return samples, U
    

def greedy_samples(G, pw_dim, numIter, numSamples, criterion="frob"):
    # Compute PW eigenvectors
    vals, U =  G.lapl_eigen(pw_dim)
    basis = dynamic_basis(G, numIter)
    samples = []
      
    for i in range(numSamples):
        samples, U = greedy_node(G, samples, basis, U, criterion)
        if U.shape[1]==0:
            break
    return samples

In [68]:

# Sample using the A^n
def degree_sample(Adj, k, s, order="min"):
    ones = np.ones([Adj.shape[0],])
    importance = np.linalg.matrix_power(Adj, k) @ ones
    if order=="max":
        samples = np.argpartition(importance, -s)[-s:]
    if order=="min":
        samples = np.argpartition(importance, s)[:s]
    return samples.tolist()

In [69]:

# Sample using largest minor 

from itertools import combinations

def all_subsets(N,s):
    """Returns all subsets of size s"""
    set = np.arange(N)
    return list(combinations(set, s))

def minor_sample_double(Adj, k, s):
    Mat = np.linalg.matrix_power(Adj, k)
    samples_list = samples(Adj.shape[0],s) 
    numOptions = len(samples_list)
    if numOptions>10000:
        raise ValueError("Matrix is too large")      
    vals = np.zeros([N_options, numOptions])
    for i in range(numOptions):
        for j in range(numOptions):
            row_slice = samples_list[i]
            column_slice = samples_list[j]
            vals[i,j] = np.sum(Mat[row_slice, columns_slice])
    i,j = np.unravel_index(np.argmin(vals), vals.shape)
    
    return samples_list[i], samples_list[j]    

def minor_sample(Adj, k, s):
    Mat = np.linalg.matrix_power(Adj, k)
    samples_list = all_subsets(Adj.shape[0],s) 
    numOptions = len(samples_list)
    if numOptions>1000000:
        raise ValueError("Too large")      
    vals = np.zeros(numOptions)
    for i in range(numOptions):
        sample = samples_list[i]
        vals[i] = np.sum(Mat[np.ix_(sample, sample)])
    ind = np.argmin(vals)  
    return samples_list[ind]

In [70]:

# The figure
fig, ax = plt.subplots(figsize=(10, 10))
ax.spines["top"].set_visible(False)  
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["left"].set_visible(False)
plt.close()

output = widgets.Output()

In [71]:

# generate a random graph
numNodes=30
numEdges=36
G = random_connected_graph(numNodes, numEdges)

In [72]:

# set up samples for different algorithms and plot

numSamples=5
numIter=10
pwDim=5

output.clear_output()

# add eigenvalues to the output
eig, U = G.lapl_eigen()
with output:
    display("Laplacian eigenvalues")
    display(eig)

# sampled with spectral clusters
_, X = G.lapl_eigen(numSamples)
labels, clusters = Kmeans_sklrn(X, k=numSamples) 
degrees = G.degrees()
G.samples = clustered_samples(clusters, degrees, order="min")
gds(G, pwDim, numIter, output)
G.draw(ax, output, labels=labels, title="Clustered")

# greedy sample with frob
G.samples =  greedy_samples(G, pwDim, numIter, numSamples, criterion="frob")
gds(G, pwDim, numIter, output)
G.draw(ax, output, title="Greedy")

# # greedy sample with cond
# G.samples =  greedy_samples(G, pwDim, numIter, numSamples, criterion="cond")
# gds(G, pwDim, numIter, output)
# G.draw(ax, output)

# best sample
samples_list = all_subsets(G.numNodes, numSamples) 
numOptions = len(samples_list)
if numOptions>1000000:
    raise ValueError("Too large")      
vals = np.zeros(numOptions)
for i in range(numOptions):
    G.samples = samples_list[i]
    vals[i] = gds(G, pwDim, numIter, output=None)
ind = np.argmin(vals)
G.samples = samples_list[ind]
gds(G, pwDim, numIter, output)
G.draw(ax, output, title="Best")    
    

# # sampled with random samples
# G.samples = random.sample(G.nodes, numSamples)
# gds(G, pwDim, numIter, output)
# G.draw(ax, output)

# sampled with A^n
G.samples = degree_sample(G.adj(), numSamples, numSamples, order="min")
gds(G, pwDim, numIter, output) 
G.draw(ax, output, title="A^n")


# # sampled with A^n minors
# G.samples = minor_sample(G.adj(), numSamples, numSamples)
# gds(G, pwDim, numIter, output)  
# G.draw(ax, output)

display(output)

Output()

In [ ]: