import numpy as np
import itertools
from scipy.sparse import csr_matrix
import networkx as nx
from scipy.sparse import diags, bmat
from scipy.sparse.linalg import eigsh
from scipy.io import mmread
from sklearn.cluster import KMeans
def DCSBM(C_matrix,c, label, theta):
''' Function that generates the adjacency matrix A with n nodes and k communities
Use: A, label = adj(C_matrix,c, label, theta)
Input:
* C_matrix (array of size k x k) : affinity matrix of the network C
* c (scalar) : average connectivity of the network
* label (array of size n) : vector containing the label of each node
* theta (array of size n) : vector with the intrinsic probability connection of each node
Output:
* A (sparse matrix of size n x n) : symmetric adjacency matrix
* label (array): label vector of the nodes in the giant component if giant = True
'''
# number of communities
k = len(np.unique(label))
fs = list()
ss = list()
n = len(theta)
c_v = C_matrix[label].T # (k x n) matrix where we store the value of the affinity wrt a given label for each node
first = np.random.choice(n,int(n*c),p = theta/n) # we choose the nodes that should get connected wp = theta_i/n: the number of times the node appears equals to the number of connection it will have
for i in range(k):
v = theta*c_v[i]
first_selected = first[label[first] == i] # among the nodes of first, select those with label i
fs.append(first_selected.tolist())
second_selected = np.random.choice(n,len(first_selected), p = v/np.sum(v)) # choose the nodes to connect to the first_selected
ss.append(second_selected.tolist())
fs = list(itertools.chain(*fs))
ss = list(itertools.chain(*ss))
fs = np.array(fs)
ss = np.array(ss)
edge_list = np.column_stack((fs,ss)) # create the edge list from the connection defined earlier
edge_list = np.unique(edge_list, axis = 0) # remove edges appearing more than once
edge_list = edge_list[edge_list[:,0] > edge_list[:,1]] # keep only the edges such that A_{ij} = 1 and i > j
A = csr_matrix((np.ones(len(edge_list[:,0])), (edge_list[:,0], edge_list[:,1])), shape=(n, n))
A = A + A.transpose() # symmetrize the matrix
return A, label
def ErdosRenyi(n, c):
'''This function returns the sparse adjacency matrix of a Erdos-Renyi random graph
Use: A = ErdosRenyi(n, c)
Input:
* n (int): number of nodes
* c (float): expected average degree
Output:
* A (scipy sparse array): (n x n) sparse adjacency matrix
'''
A, _ = DCSBM(np.ones(([c])),c, np.zeros(n).astype(int), np.ones(n))
return A
def ComputeModularity(A, label):
'''This function computes the modularity of a partition on a network.
Use: mod = ComputeModularity(A, label)
Input:
* A (scipy sparse array): graph adjacency matrix
* label (array): label vector
Output:
* mod (float): modularity
'''
all_labels = np.unique(label)
mod = 0
m = np.sum(A)
for a in all_labels:
idx = label == a
Σ_in = np.sum(A[idx][:,idx])
Σ_tot= np.sum(A[idx])
mod += Σ_in/m - (Σ_tot/m)**2
return mod
def BuildBp(A):
'''This function ccreates the 2nx2n matrix Bp sharing the spectrum with the non-backtracking matrix
Use: Bp = BuildBp(A)
Input:
* A (scipy sparse array): graph adjacency matrix
Outoput:
* Bp (scipy sparse array)
'''
n, _ = A.shape
D = diags(A@np.ones(n))
Id = diags(np.ones(n))
Bp = bmat([[A, -Id], [D-Id, None]])
return Bp
def SpectralClustering(M, which, k):
'''
This algorithm performs spectral clustering
Use: label = SpectralClustering(M, which, k)
Input:
* M (scipy sparse array): matrix used to perform SC
* which (string): sets which eigenvalues to compute: 'LA' are the largest, 'SA' the smallest
* k (int): number of communities
Output:
* label (array): estimated label vector
'''
γ, X = eigsh(M, k = k, which = which)
if which == 'LA':
idx = np.argsort(γ)[::-1]
else:
idx = np.argsort(γ)
X = X[:,idx]
X = X[:,1:]
label = KMeans(n_clusters = k).fit(X).labels_
return label