Temporal networks

13. Temporal networks#

In this notebook we show how to perform some basic operations on temporal graphs, reproducing the main plots appearing in the notes. Besides the classical packages, we will deploy some functions that are contained in the Tnet.py file whose content of this file is provided in the next notebook. If you want to run this notebook, create a folder named src, and add a file named Tnet.py with the codes insde of it.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.cm as cm
from copy import copy
import networkx as nx

# upload the codes in the src folder
from src.Tnet import *

import warnings
warnings.filterwarnings("ignore")

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
/tmp/ipykernel_74460/2886845828.py in <module>
      8 
      9 # upload the codes in the src folder
---> 10 from src.Tnet import *
     11 
     12 import warnings

ModuleNotFoundError: No module named 'src'

# load the datasets in the tij format
tij_school = pd.read_csv('Data/SP/School_tij.csv')
tij_conference = pd.read_csv('Data/SP/Conference_tij.csv')
tij_office = pd.read_csv('Data/SP/Office_tij.csv')
tij_village = pd.read_csv('Data/SP/Village_tij.csv')


# we print the first rows of one of these datasets to provide an example
tij_school.head()

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_74460/1109803879.py in <module>
      1 # load the datasets in the tij format
----> 2 tij_school = pd.read_csv('Data/SP/School_tij.csv')
      3 tij_conference = pd.read_csv('Data/SP/Conference_tij.csv')
      4 tij_office = pd.read_csv('Data/SP/Office_tij.csv')
      5 tij_village = pd.read_csv('Data/SP/Village_tij.csv')

~/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    209                 else:
    210                     kwargs[new_arg_name] = new_arg_value
--> 211             return func(*args, **kwargs)
    212 
    213         return cast(F, wrapper)

~/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    329                     stacklevel=find_stack_level(),
    330                 )
--> 331             return func(*args, **kwargs)
    332 
    333         # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no

~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    948     kwds.update(kwds_defaults)
    949 
--> 950     return _read(filepath_or_buffer, kwds)
    951 
    952 

~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
    603 
    604     # Create the parser.
--> 605     parser = TextFileReader(filepath_or_buffer, **kwds)
    606 
    607     if chunksize or iterator:

~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
   1440 
   1441         self.handles: IOHandles | None = None
-> 1442         self._engine = self._make_engine(f, self.engine)
   1443 
   1444     def close(self) -> None:

~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py in _make_engine(self, f, engine)
   1733                 if "b" not in mode:
   1734                     mode += "b"
-> 1735             self.handles = get_handle(
   1736                 f,
   1737                 mode,

~/anaconda3/lib/python3.9/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    854         if ioargs.encoding and "b" not in ioargs.mode:
    855             # Encoding
--> 856             handle = open(
    857                 handle,
    858                 ioargs.mode,

FileNotFoundError: [Errno 2] No such file or directory: 'Data/SP/School_tij.csv'

# load the datasets in the tijtau format
tijtau_school = pd.read_csv('Data/SP/School_tijtau.csv')
tijtau_conference = pd.read_csv('Data/SP/Conference_tijtau.csv')
tijtau_office = pd.read_csv('Data/SP/Office_tijtau.csv')
tijtau_village = pd.read_csv('Data/SP/Village_tijtau.csv')

# we print the first rows of one of these datasets to provide an example
tijtau_school.head()

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_74460/619524950.py in <module>
      1 # load the datasets in the tijtau format
      2 # tijtau_school = pd.read_csv('Data/SP/School_tijtau.csv')
----> 3 tijtau_conference = pd.read_csv('Data/SP/Conference_tijtau.csv')
      4 tijtau_office = pd.read_csv('Data/SP/Office_tijtau.csv')
      5 tijtau_village = pd.read_csv('Data/SP/Village_tijtau.csv')

~/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    209                 else:
    210                     kwargs[new_arg_name] = new_arg_value
--> 211             return func(*args, **kwargs)
    212 
    213         return cast(F, wrapper)

~/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    329                     stacklevel=find_stack_level(),
    330                 )
--> 331             return func(*args, **kwargs)
    332 
    333         # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no

~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    948     kwds.update(kwds_defaults)
    949 
--> 950     return _read(filepath_or_buffer, kwds)
    951 
    952 

~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
    603 
    604     # Create the parser.
--> 605     parser = TextFileReader(filepath_or_buffer, **kwds)
    606 
    607     if chunksize or iterator:

~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
   1440 
   1441         self.handles: IOHandles | None = None
-> 1442         self._engine = self._make_engine(f, self.engine)
   1443 
   1444     def close(self) -> None:

~/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py in _make_engine(self, f, engine)
   1733                 if "b" not in mode:
   1734                     mode += "b"
-> 1735             self.handles = get_handle(
   1736                 f,
   1737                 mode,

~/anaconda3/lib/python3.9/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    854         if ioargs.encoding and "b" not in ioargs.mode:
    855             # Encoding
--> 856             handle = open(
    857                 handle,
    858                 ioargs.mode,

FileNotFoundError: [Errno 2] No such file or directory: 'Data/SP/Conference_tijtau.csv'

13.1. Create and plot the aggregated graph#

We consider the school dataset and create its static aggregated version. We plot it, color-coding the nodes according to their class affiliation. As shown in the cell above, the dataframe tij_school contains two additional columns (C1 and C2) that indicate the class to which i and j belong to, respectively.

# copy the tij_school in df. This is done for two reasons:
# * first, if you want to repeat the analysis on a different graph you only have to change the name once.
# * secondly, remember that when you x = y in python it is a copy by reference and not by name. Using the copy() command you avoid making modification to the original input variable.

df = copy(tij_school)

# add a weight column to the dataframe: in the tij representation all contacts have equal weight
df['w'] = 1

# aggregate the contact by the interacting indices
df = df.groupby(['i', 'j']).sum().reset_index()[['i', 'j', 'w']]
df.head()

	i	j	w
0	600	609	53
1	600	610	20
2	600	613	567
3	600	616	1
4	600	623	4

In the dataframe df we built, we removed the column t and substituted encoded in w the number of appearances of a single link. We now build a weighted adjacency matrix of this graph using the scipy package and then passing it to networkx to obtain the graph. To do so, we must first create a dictionary that converts the indices contained in df into integers between \(1\) and \(n\), where \(n\) is the number of nodes.

# make a list of all nodes
all_nodes = np.unique(df[['i', 'j']])

# count how many nodes there are
n = len(all_nodes)

# create the dictionary between each node id and an integer
NodeMapper = dict(zip(all_nodes, np.arange(n)))

# map the nodes id to the integer
df.i = df.i.map(lambda x: NodeMapper[x])
df.j = df.j.map(lambda x: NodeMapper[x])

df.head()

	j	w
0	7	53
1	8	20
2	11	567
3	14	1
4	21	4

# convert the edge list into a sparse matrix: df.i, df.j are the coordinates of the non-zero entries of W and df.w is the value
W = csr_matrix((df.w, (df.i, df.j)), shape = (n,n))

# symmetrize the matrix
W = W + W.T

# build the graph from the matrix W
G = nx.from_scipy_sparse_array(W)

Now we plot

# this return a dictionary giving x and y coordinates of each node so that nodes that are more densely connected are close to one another and it results in a better plot
pos = nx.spring_layout(G)

# we set the node size proportional to the total interaction time 
node_size = 0.25*(W@np.ones(n))
edge_size = 0.3

# create the class label vector
classes = []

for node in all_nodes:
    _df = tij_school[tij_school.i == node]
    if len(_df) > 1:
        classes.append(_df.iloc[0].C1)

    else:
        _df = tij_school[tij_school.j == node]
        classes.append(_df.iloc[0].C2)

# plot functions      
all_classes = np.unique(classes)
ClassMapper = dict(zip(all_classes, np.arange(len(all_classes))))
norm = mpl.colors.Normalize(vmin = 0, vmax = len(all_classes))
cmap = cm.Set1
m = cm.ScalarMappable(norm = norm, cmap = cmap)
colors = [m.to_rgba(ClassMapper[c]) for c in classes]

plt.figure(figsize = (12,8))
nx.draw_networkx_edges(G, pos, alpha = 0.5, width = edge_size, edge_color = "forestgreen")
nx.draw_networkx_nodes(G, pos, node_size = node_size, node_color = colors, alpha = 0.8, edgecolors = 'k')
plt.axis('off')

# uncomment to save
# plt.savefig('Figures/graphPlot.pdf', bbox_inches = 'tight', dpi = 400)
plt.show();

../_images/3374b95959029b21cfd8c22abae8393e79841b56da553f83fd3c07917799bb19.png

13.2. Reachability#

As we mentioned in the course, the reachability (i.e the fraction of nodes that can be reached by a walk on a graph as a function of time) is small in temporal graphs than it is on static ones. Here we consider \(4\) SocioPatterns and, for each of them, we compare the reachability over the first \(8\) hours of measurement for the temporal graph and its aggregated counterpart.

T = 1440 # 8 hours (8*3600/20)
names = ['Conference', 'Office', 'School', 'Village']
dfv = [tij_conference, tij_office, tij_school, tij_village]

R, Ragg = [], []

for counter, tij in enumerate(dfv):

    print(f'Dataset: {names[counter]}')
    
    # select iteratively one of the four graphs and limit the observations to the first 8 hours
    df = copy(tij)
    df = df[df.t < T]
    
    # create a mapping between the nodes and integers
    all_nodes = np.unique(df[['i', 'j']])
    n = len(all_nodes)
    NodeMapper = dict(zip(all_nodes, np.arange(n)))
    df.i = df.i.map(lambda x: NodeMapper[x])
    df.j = df.j.map(lambda x: NodeMapper[x])

    # compute the reachability vector 
    R.append(ComputeReachabilityVector(df, T))
    
    ################################################################
    
    # create the aggregated (unweighted) version of the graph
    df_g = df.groupby(['i', 'j']).mean().reset_index()
    W = csr_matrix((np.ones(len(df_g)), (df_g.i, df_g.j)), shape = (n,n))
    W = W + W.T + diags(np.ones(n))
    Wt = [W for t in range(T)]

    # compute the reachability vector
    Ragg.append(ReachabilityFromAdjacencyList(Wt))

Dataset: Conference
Dataset: Office
Dataset: School
Dataset: Village
Progress: 99%

# PLOT
fig, ax = plt.subplots(2,2, figsize = (8, 6))
tv = np.arange(T)

for counter in range(4):

    a, b = int(counter/2), counter%2
    ax[a,b].plot(np.log(tv+1), R[counter], color = 'dodgerblue', label = 'temporal', linewidth = 3)
    ax[a,b].plot(np.log(tv+1), Ragg[counter], color = 'crimson', linestyle = '--', label = 'aggregated',
                linewidth = 3)
    
    if b == 0:
        ax[a,b].set_ylabel('Reachability', fontsize = 18)
        
    if a == 1:
        ax[a,b].set_xlabel('log(t)', fontsize = 18)
        ax[a, b].tick_params(axis = 'x', labelsize = 15)
    else:
        ax[a,b].tick_params(axis = 'x', labelsize = 0)
        
    ax[a,b].tick_params(axis = 'y', labelsize = 15)
    ax[a,b].set_title(names[counter], fontsize = 18)
    
ax[0,1].legend(fontsize = 14)
    
plt.tight_layout()

# uncomment to save
# plt.savefig('Figures/tnet_reachability.pdf', bbox_inches = 'tight', dpi = 400)
plt.show();

../_images/0c8c3e5ba1ad75c5d10b738e0e0d31cae19a508466b9e56cf5166e4e5b223802.png

13.3. Contact duration and interevent statistics#

We now look at the distribution of the event duration and of the interevent duration.

names = ['School', 'Conference', 'Office', 'Village']
dfv = [tijtau_school, tijtau_conference, tijtau_office, tijtau_village]

intervals = []

for i, df in enumerate(dfv):
    print(names[i])
    intervals.append(ComputeIntereventStatistic(copy(df)))
    print('\n')

School
Progress: 99%

Conference
Progress: 99%

Office
Progress: 99%

Village
Progress: 99%

# PLOT
fig, ax = plt.subplots(1, 2, figsize = (15,5))

markers = ['o', 'd', 'D', 's']
norm = mpl.colors.Normalize(vmin = 0, vmax = 3)
cmap = cm.Accent
m = cm.ScalarMappable(norm = norm, cmap = cmap)
allt = np.logspace(0, 5, 100)

for i, df in enumerate(dfv):
    
    # compute the probability that the event duration is larger than a threshold
    cdf = np.array([np.mean(df.τ >= t) for t in allt])
    
    idx = cdf > 0
    ax[0].scatter(np.log(allt[idx]), np.log(cdf[idx]), marker = markers[i],s = 120, color = m.to_rgba(i),
               edgecolor = 'white', alpha = .7, label = names[i])

    # keep only the interevent durations below 1000. Those that are above this number are likely associated
    # to different days
    x = intervals[i][intervals[i] < 1000]
    cdf = np.array([np.mean(x >= t) for t in allt])
    idx = cdf > 0
    ax[1].scatter(np.log(allt[idx]), np.log(cdf[idx]), marker = markers[i],s = 120, color = m.to_rgba(i),
               edgecolor = 'white', alpha = .7)
    
ax[0].tick_params(axis = 'both', which = 'major', labelsize = 20)
ax[1].tick_params(axis = 'both', which = 'major', labelsize = 20)

ax[0].legend(fontsize = 18)
ax[0].set_xlabel(r'${\rm log}(\tau)$', fontsize = 24)
ax[0].set_ylabel(r'${\rm log(1-cdf)}$', fontsize = 24)

ax[1].set_xlabel(r'${\rm log}(\tau_{\rm inter})$', fontsize = 24)
ax[1].set_ylabel(r'${\rm log(1-cdf)}$', fontsize = 0)

plt.tight_layout()

# uncomment to save
# plt.savefig('Figures/tnet_pl.pdf', bbox_inches = 'tight', dpi = 400)
plt.show();

../_images/b2825f5577afe5a1f69f800f5539115d3e19a4fe518f7be0d22783581e64c6a3.png

13.4. Burstiness#

We now visualize the effect of burstiness. We take the Conference network and select one of its nodes \(i\) and look at the temporal series of its interaction times. We then show a simple modeled quantity \(\mathscr{Q}\) that increases by one for every contact \(i\) has and decreases by the \(3\%\) at each time stamp in which there is no interaction.

We compare the results with sythetic generated data in which the temporal series is obtained from a Poisson process. The results show that, even though the number of contact events is very similar, \(\mathscr{Q}\) reaches much larger values for the bursty dynamics.

# model parameters
x0 = 1
α = 0.03
T = 2500

########################################
########### POISSON DYNAMICS ###########
########################################

tv_Poi = [0]
flag = 0

# create the inter-event distribution
while flag == 0:
    newt = tv_Poi[-1] + np.random.poisson(7)
    tv_Poi.append(newt)
    if newt > T:
        flag = 1
    
tv_Poi = np.array(tv_Poi)

# generate the signal Q
Q_Poi = np.zeros(T)

for t in range(1, T):
    if t in tv_Poi:
        Q_Poi[t] = Q_Poi[t-1] + x0
    else:
        Q_Poi[t] = Q_Poi[t-1]*(1-α) 
        

# compute the burstiness
inter = (tv_Poi - np.roll(tv_Poi, 1))[1:]
s = np.std(inter)/np.mean(inter)
B = (s-1)/(s+1)
        
        
print('Poissonian case')
print(f'Number of events: {len(tv_Poi)}')
print(f'Burstiness: B = {B}')
print('\n')
        
########################################
############## REAL DATA ###############
########################################

# select only the contacts involving node index
df = copy(tij_conference)
index = 1467
idx = np.logical_or(df.i == index, df.j == index)
df = df[idx]

# generate the signal Q
Q = np.zeros(T)
tvr = np.sort(df.t.values)

for t in range(1, T):
    if t in tvr:
        Q[t] = Q[t-1] + x0
    else:
        Q[t] = Q[t-1]*(1-α) 
      
# compute the burstiness
inter = (df.t - np.roll(df.t, 1))[1:].values
s = np.std(inter)/np.mean(inter)

B = (s-1)/(s+1)
        
print('Real data')
print(f'Number of events: {len(df)}')
print(f'Burstiness: B = {B}')

Poissonian case
Number of events: 359
Burstiness: B = -0.44566561125641657


Real data
Number of events: 349
Burstiness: B = 0.6981801093753959

# PLOT

fig, ax = plt.subplots(2, 2, figsize = (14, 10))

for t in tv_Poi:
    if t < T:
        ax[0,0].plot([t, t], [0,1], color = 'white')
    
ax[0,0].set_facecolor('silver')
ax[0,0].set_ylim(-.5, 1.5)
ax[0,0].tick_params(axis = 'both', labelsize = 0)
ax[0,0].text(0, 1.6, '(a)', fontsize = 25)

######################################################

for t in df.t:
    if t < T:
        ax[0,1].plot([t, t], [0,1], color = 'white')
    
ax[0,1].set_facecolor('silver')
ax[0,1].set_ylim(-.5, 1.5)
ax[0,1].tick_params(axis = 'both', labelsize = 0)
ax[0,1].text(0, 1.6, '(b)', fontsize = 25)

#######################################################
        
ax[1, 0].plot(Q_Poi, linewidth = 2, color = 'orange')
ax[1,0].tick_params(axis = 'both', labelsize = 15)
ax[1,0].set_xlabel('time', fontsize = 25)
ax[1,0].set_ylabel(r'$\mathscr{Q}$', fontsize = 40)
ax[1,0].text(0, 62, '(c)', fontsize = 25)
ax[1,0].set_ylim(-0.3,60)
ax[1, 0].axhline(20, color = 'forestgreen', linewidth = 3, linestyle = '--')

# ##################################################

ax[1,1].plot(Q, linewidth = 2, color = 'orange')
ax[1,1].tick_params(axis = 'x', labelsize = 15)
ax[1,1].tick_params(axis = 'y', labelsize = 0)
ax[1,1].set_xlabel('time', fontsize = 25)
ax[1,1].text(0, 62, '(d)', fontsize = 25)
ax[1,1].set_ylim(-1,60)
ax[1,1].axhline(20, color = 'forestgreen', linewidth = 3, linestyle = '--')

plt.tight_layout()

# uncomment to save
# plt.savefig('Figures/tnet_burst.pdf', bbox_inches = 'tight', dpi = 400)
plt.show();

../_images/22abff80dc78280ce58a61dd2d18abf7ea505b61f7d861ebb1ad872ebda8fd03.png