5. Basics of network analysis#
import networkx as nx
import seaborn as sns
%pylab inline
%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib
5.1. Connectivity and clustering of a graph#
We study the network of coauthorships of Astro-Ph, from the SNAP database.
filepath = "./../datasets/ca-AstroPh.txt"
!more ./../datasets/cma-AstroPh.txt
./../datasets/cma-AstroPh.txt: No such file or directory
G = nx.Graph()
fh = open(filepath, "r")
for line in fh.readlines():
s = line.strip().split()
if s[0] != "#":
origin = int(s[0])
dest = int(s[1])
G.add_edge(origin, dest)
fh.close()
print("The graph has", len(G), "nodes and", len(G.edges()), "edges")
The graph has 18772 nodes and 198110 edges
G
<networkx.classes.graph.Graph at 0x158bfc160>
print("Is the graph simply connected?", nx.is_connected(G))
Is the graph simply connected? False
5.1.1. Show the components of the graph#
print("The graph has", nx.number_connected_components(G), "connected components")
The graph has 290 connected components
for k in nx.connected_components(G):
print(len(k))
17903
2
3
4
8
2
2
4
3
5
3
3
2
2
5
5
2
7
2
3
10
3
4
4
4
2
2
3
6
4
2
4
2
3
2
2
5
4
6
2
2
5
3
2
2
2
2
4
3
2
4
2
3
3
2
3
3
2
3
3
5
3
2
3
5
3
2
3
2
2
3
2
3
4
3
3
3
3
2
5
2
4
2
4
10
2
3
3
3
4
3
2
3
2
2
3
5
2
3
2
3
2
3
2
3
9
2
3
4
3
4
3
3
5
5
2
3
2
2
2
2
2
2
2
3
4
2
2
2
2
2
3
3
2
3
2
2
4
2
2
4
4
3
3
2
2
3
3
3
2
3
2
2
2
2
4
3
2
12
2
3
8
3
2
2
7
4
3
3
2
2
2
2
3
2
3
2
2
3
2
2
3
2
4
2
2
2
2
2
2
2
3
3
2
2
2
4
2
3
2
3
2
4
2
4
2
4
4
2
5
2
2
2
3
2
4
2
2
3
3
3
3
8
2
3
2
2
3
2
2
2
2
4
1
3
2
2
2
2
3
2
2
3
2
2
2
2
3
2
2
5
2
4
2
6
18
3
2
3
3
2
2
3
4
2
2
4
3
3
4
7
5
2
3
3
2
2
4
2
4
2
2
2
2
2
2
2
3
2
2
5.1.2. Extract the largest Connected Component as a subgraph#
nx.connected_components(G)
<generator object connected_components at 0x158be69e0>
graphs = list(nx.connected_components(G))
H = G.subgraph(graphs[0])
len(H)
17903
print(len(G) - len(H))
869
print("Check that the graph is now connected")
nx.is_connected(H)
Check that the graph is now connected
True
5.2. Global clustering coefficient#
The global clustering coefficient measures the number of triangles in the network and it’s defined as:
nx.triangles(H)
{84424: 210,
276: 54,
1662: 218,
5089: 1,
6058: 918,
6229: 107,
10639: 820,
16442: 137,
19325: 828,
19834: 3,
20113: 44,
21937: 13,
25452: 1906,
26902: 3,
29829: 11,
30222: 506,
32432: 128,
33040: 2759,
39238: 16,
39521: 1793,
41418: 487,
45009: 2384,
45098: 189,
45242: 702,
47005: 27,
47968: 5646,
47999: 148,
49934: 360,
50220: 44,
50897: 1594,
51730: 1,
53681: 23,
57537: 10,
58458: 10,
59326: 158,
61571: 354,
63552: 934,
64124: 76,
64568: 1587,
66200: 73,
69839: 101,
72391: 11,
73543: 537,
76259: 205,
77098: 423,
77915: 8,
78627: 396,
83560: 1145,
85420: 236,
88768: 15,
89131: 6,
89308: 117,
89994: 72,
90506: 121,
91060: 236,
92387: 32,
93296: 143,
94138: 513,
94329: 113,
95070: 11,
95531: 285,
96570: 114,
97101: 32,
98506: 491,
99104: 3003,
104802: 1907,
106611: 526,
107829: 412,
109016: 29,
112605: 623,
117751: 41,
122908: 295,
124023: 89,
125190: 1,
130825: 42,
132445: 1246,
560: 3,
15829: 50,
42972: 1001,
55528: 1,
57618: 142,
60310: 1,
63707: 90,
63769: 110,
68320: 22,
75607: 780,
92552: 650,
93762: 68,
93889: 29,
95860: 49,
96909: 41,
99870: 1409,
112293: 84,
113138: 1104,
116343: 45,
116453: 104,
118859: 233,
121831: 111,
122003: 5,
123268: 28,
125345: 172,
127886: 5187,
129530: 187,
129910: 45,
132104: 57,
2175: 9,
3365: 1081,
4279: 7,
4930: 41,
5335: 3028,
8445: 46,
9039: 378,
10976: 4324,
11050: 73,
11580: 12,
19204: 1930,
23986: 6157,
24075: 969,
26526: 58,
28516: 4399,
28787: 486,
31404: 5,
33777: 13,
34608: 6711,
35729: 71,
36907: 5282,
37861: 0,
38921: 1102,
42512: 11,
42994: 1248,
44011: 122,
46847: 2251,
47856: 2318,
50991: 0,
52870: 30,
53890: 1201,
55233: 462,
56117: 108,
58532: 17,
59151: 1000,
59173: 79,
60390: 378,
61461: 1429,
61472: 4,
61686: 543,
61929: 147,
62496: 555,
64054: 4975,
65114: 1377,
65794: 20,
66766: 14,
68639: 195,
69582: 999,
70618: 548,
72233: 386,
72941: 1590,
74612: 18,
77580: 629,
79936: 187,
83003: 642,
84150: 0,
87214: 1003,
87237: 130,
92504: 7,
94235: 6826,
95822: 2152,
101953: 196,
105827: 110,
110499: 23,
112243: 0,
115344: 35,
118103: 1133,
118630: 382,
118958: 51,
120454: 400,
126152: 81,
130165: 380,
63225: 42,
2969: 1,
8744: 28,
19113: 41,
20188: 0,
23579: 11,
26065: 15,
27281: 76,
30400: 18,
30864: 19,
39902: 166,
42077: 5,
49633: 1,
57772: 1052,
59444: 10,
65213: 7,
69459: 3,
99504: 15,
104589: 479,
106833: 85,
110210: 430,
113335: 15,
118286: 10,
120586: 32,
121399: 1832,
121988: 68,
122460: 18,
127393: 216,
4526: 11,
15406: 142,
17485: 376,
22055: 8,
25095: 132,
26713: 1541,
29421: 599,
31334: 1369,
38861: 193,
39298: 1954,
40400: 2,
43227: 685,
45858: 131,
51604: 3,
51631: 586,
52480: 59,
55508: 120,
65713: 2256,
66511: 516,
67988: 154,
68577: 106,
68629: 28,
69935: 82,
70637: 853,
71296: 58,
71722: 2,
78060: 409,
78070: 357,
79077: 8,
86411: 429,
91401: 1762,
92357: 1405,
93661: 23,
98621: 822,
100099: 1666,
102617: 1939,
106954: 21,
107534: 671,
107797: 320,
107933: 260,
112779: 206,
112903: 22,
115420: 205,
117953: 875,
123695: 62,
124913: 238,
131639: 55,
132766: 2,
106274: 3070,
811: 490,
1086: 3006,
2158: 1880,
3335: 36,
4239: 4132,
5099: 33,
5866: 888,
5959: 422,
6358: 859,
6540: 231,
7026: 859,
8381: 846,
8583: 1922,
8833: 2055,
10426: 1493,
12203: 269,
13219: 410,
14651: 922,
14859: 1637,
15223: 506,
16043: 351,
16400: 921,
16434: 1068,
17549: 36,
17756: 196,
18007: 610,
18344: 36,
19383: 1927,
20296: 948,
21192: 554,
21718: 8186,
21728: 843,
21729: 171,
22070: 683,
27862: 2529,
28128: 1769,
28210: 1460,
29606: 1489,
30422: 22,
30502: 496,
30625: 535,
32595: 473,
33659: 36,
34945: 635,
35290: 7065,
35302: 91,
35638: 153,
36814: 1067,
37179: 1007,
38109: 9008,
38898: 447,
38966: 4793,
39696: 2308,
40649: 289,
40745: 207,
41473: 1005,
42366: 899,
42551: 1820,
43850: 1835,
44022: 36,
44183: 1441,
44237: 198,
47128: 1119,
48423: 582,
49689: 902,
51306: 601,
51587: 1164,
51588: 253,
52412: 740,
52682: 4197,
53056: 362,
53213: 11269,
55066: 363,
55085: 919,
55999: 1880,
56361: 104,
57961: 844,
60088: 893,
60471: 2212,
61892: 25,
62459: 834,
66006: 540,
67410: 3315,
67670: 252,
67716: 1894,
68363: 2270,
69493: 676,
70950: 839,
71801: 1401,
72114: 109,
72380: 402,
74225: 153,
74361: 926,
75690: 2223,
76706: 1521,
76749: 2673,
76854: 1537,
77019: 1105,
77152: 193,
77208: 579,
79379: 346,
80218: 499,
80782: 964,
82347: 777,
82704: 2003,
84122: 4490,
86202: 1995,
86721: 829,
87325: 1322,
88348: 407,
89588: 1213,
89969: 1200,
93101: 380,
93271: 2154,
94560: 35,
95036: 2947,
95261: 28,
95911: 1330,
96786: 462,
97012: 89,
99023: 1835,
99239: 2134,
100394: 2275,
100461: 867,
100717: 820,
102653: 621,
103169: 1306,
104241: 598,
105680: 3343,
106763: 1432,
107846: 1397,
108722: 302,
109056: 253,
111385: 1950,
111660: 1041,
111709: 2061,
112157: 2382,
112695: 1113,
112848: 1861,
113726: 858,
113825: 671,
114249: 954,
115209: 3395,
115281: 206,
115607: 3644,
115700: 307,
116194: 21,
116337: 1529,
118896: 779,
119042: 36,
119664: 1682,
120199: 1889,
120618: 2541,
121461: 465,
122425: 1180,
122495: 827,
123233: 485,
125226: 871,
125267: 824,
125557: 338,
125678: 1090,
125766: 77,
125774: 312,
126948: 643,
127225: 259,
127274: 455,
127816: 670,
129273: 1271,
129410: 899,
129483: 509,
131234: 940,
131592: 820,
1513: 28,
4851: 332,
4893: 36,
6256: 1654,
11727: 56,
12494: 117,
14382: 17,
18042: 42,
26219: 853,
27124: 284,
31235: 180,
33883: 362,
35703: 117,
37378: 1291,
41567: 397,
43405: 152,
48115: 464,
48267: 184,
54771: 1512,
65484: 116,
65610: 54,
68921: 540,
79758: 100,
82866: 179,
87294: 2594,
92001: 195,
95855: 6,
101422: 47,
105901: 710,
106176: 425,
107608: 197,
108944: 189,
111957: 723,
112410: 37,
119031: 529,
119480: 412,
121026: 1868,
124998: 106,
125365: 696,
128934: 36,
131519: 77,
131914: 94,
132206: 646,
2689: 276,
2760: 276,
4148: 525,
6374: 38,
6934: 78,
10767: 47,
15921: 431,
15973: 870,
16035: 446,
18257: 81,
18505: 79,
20775: 145,
22036: 78,
23453: 3468,
23638: 279,
26849: 82,
29228: 78,
31097: 88,
31931: 78,
37918: 206,
38977: 282,
42886: 679,
43737: 168,
44244: 286,
47735: 140,
54608: 320,
59069: 518,
60427: 15,
68381: 727,
82214: 297,
93583: 276,
93984: 327,
95072: 6,
97387: 373,
101326: 4,
101994: 780,
112383: 285,
115063: 276,
117676: 442,
118912: 875,
121209: 91,
128878: 276,
131320: 489,
17737: 0,
9648: 4,
16793: 0,
27573: 7,
28827: 3,
37818: 1,
54011: 6,
56224: 1,
71157: 11,
87601: 0,
119665: 1,
122165: 1,
122336: 1,
130188: 1,
47255: 5,
38313: 30,
50634: 1,
76014: 23,
76026: 635,
84047: 3,
86017: 0,
103432: 1,
109229: 180,
122293: 1,
130391: 195,
48541: 3,
48546: 502,
58486: 531,
58719: 22,
64569: 1,
71627: 8,
73308: 23,
80089: 58,
80534: 3,
84851: 843,
86063: 3,
93360: 214,
116725: 3,
116919: 13,
1968: 184,
17447: 264,
29301: 283,
36197: 239,
43161: 153,
43731: 153,
56031: 490,
57916: 153,
61859: 8,
62844: 153,
63840: 11,
72171: 3,
78024: 471,
79455: 477,
85941: 6,
85993: 3,
95051: 163,
95109: 169,
99717: 1034,
110812: 235,
111014: 175,
111916: 7,
117311: 6,
122105: 159,
122740: 197,
124653: 166,
130921: 24,
17097: 821,
1500: 212,
3377: 618,
3420: 1331,
5088: 353,
5665: 941,
6331: 436,
7520: 373,
8332: 573,
8912: 257,
9103: 1331,
10479: 728,
12600: 564,
14783: 547,
16042: 522,
18553: 529,
19695: 527,
20598: 1220,
21333: 19,
22767: 1767,
22833: 656,
34097: 285,
35020: 1177,
35452: 171,
39597: 637,
46229: 1198,
54239: 698,
63399: 21,
65895: 22,
70035: 560,
71482: 869,
71602: 736,
73216: 668,
73714: 59,
75761: 530,
76529: 329,
76830: 708,
77194: 673,
78328: 1348,
78777: 364,
78800: 519,
79552: 1334,
91772: 234,
91787: 140,
91843: 954,
93530: 652,
94267: 200,
95558: 787,
96429: 368,
97491: 671,
97815: 606,
100548: 523,
104544: 913,
105528: 929,
105725: 42,
107436: 1048,
110773: 75,
114530: 1984,
120321: 650,
123670: 1027,
131704: 1718,
133062: 970,
97966: 3,
18877: 1,
33380: 12,
80396: 3,
84299: 30,
108170: 22,
112976: 2,
12890: 34,
23769: 17,
31638: 87,
39339: 9,
51539: 33,
61974: 42,
87885: 211,
123201: 41,
5412: 3,
20664: 14,
39978: 0,
110535: 3,
13353: 55,
242: 3,
680: 21,
1418: 6,
14706: 10,
17837: 2421,
24682: 81,
26624: 27,
29131: 6,
30601: 2,
30614: 9,
34051: 39,
35317: 14,
38226: 61,
42327: 2,
45826: 1,
46803: 1,
50264: 14,
52497: 3,
56970: 1,
58023: 40,
60078: 1,
68322: 6,
68830: 8,
68950: 284,
70804: 4,
71583: 3,
72370: 125,
72441: 3,
72444: 2,
74787: 122,
75665: 3,
78370: 1,
80482: 3,
80823: 2,
80960: 5,
82947: 27,
83877: 1,
88082: 69,
92276: 5,
96326: 23,
98853: 111,
100536: 39,
109585: 2,
109894: 63,
112256: 12,
120094: 3,
120169: 4,
127688: 4,
131741: 4,
132492: 1480,
132691: 2,
13640: 2,
26731: 4,
28273: 3,
47881: 3,
59716: 27,
68758: 4,
74685: 1,
92281: 3,
93585: 3,
97683: 15,
113717: 8,
119757: 4,
123558: 3,
118675: 36,
4932: 139,
8053: 702,
11020: 17,
12924: 214,
35111: 67,
38185: 18,
44467: 150,
48243: 18,
57276: 13,
58633: 79,
65657: 19,
75731: 115,
79176: 17,
105955: 40,
113867: 39,
125136: 61,
8546: 35,
9522: 7,
18079: 97,
20129: 3,
23626: 2776,
50316: 27,
55462: 56,
55675: 13,
59052: 19,
67060: 327,
67618: 1006,
70981: 0,
76641: 3,
86126: 11,
87760: 48,
101610: 166,
107618: 25,
109192: 17,
114995: 0,
120732: 167,
122852: 91,
128685: 1229,
291: 196,
3998: 107,
4959: 1329,
6619: 21,
12328: 1600,
16693: 69,
25625: 1,
26474: 3,
34218: 23,
34862: 1525,
34928: 8,
38868: 4,
41140: 271,
47237: 1487,
55155: 1746,
57559: 26,
57974: 4,
61750: 6,
66826: 1406,
71861: 3,
75546: 30,
83393: 21,
85862: 2,
86460: 236,
90269: 21,
91858: 28,
95291: 4,
95680: 39,
97431: 1095,
97788: 108,
99613: 32,
101271: 21,
102475: 0,
102874: 95,
104466: 6,
106392: 152,
110670: 109,
111496: 2053,
114677: 1350,
128657: 230,
128732: 1095,
129683: 908,
130919: 36,
24527: 108,
25831: 16,
28623: 784,
30018: 29,
37519: 28,
43765: 1065,
44674: 77,
56205: 27,
68941: 308,
91247: 0,
94743: 78,
98712: 32,
118945: 14,
59946: 46,
66875: 91,
126260: 1,
35690: 781,
2478: 1771,
2736: 2355,
4679: 1988,
4786: 3053,
5746: 1,
5979: 325,
6387: 366,
7208: 331,
8978: 524,
9049: 28,
11513: 1267,
14149: 3544,
16884: 55,
24128: 353,
25329: 3834,
27242: 69,
33351: 1806,
37907: 2231,
38413: 2206,
40208: 748,
44790: 222,
45732: 906,
47674: 490,
49486: 3361,
50163: 581,
50808: 5103,
53219: 360,
54681: 3936,
55554: 3,
55602: 1997,
56184: 314,
56302: 1018,
61574: 3,
65337: 4088,
70366: 1487,
71040: 3067,
77480: 3423,
77999: 605,
79811: 573,
80988: 175,
83171: 1845,
85668: 55,
85740: 3538,
92810: 619,
93624: 2092,
96525: 144,
97471: 190,
99798: 18,
100181: 580,
100237: 156,
101520: 445,
105651: 296,
108958: 609,
111161: 6457,
112453: 375,
113591: 1093,
117816: 26,
121653: 51,
121655: 15,
124529: 2813,
126451: 868,
126916: 3154,
128381: 325,
128985: 36,
129458: 595,
130997: 5373,
1155: 2522,
1324: 3217,
1334: 484,
1663: 1540,
1776: 3330,
2094: 3232,
5453: 1555,
6443: 231,
6548: 370,
7083: 1614,
7205: 3755,
7264: 3115,
8273: 3856,
8305: 190,
9048: 360,
10914: 1276,
11799: 5596,
19066: 2289,
19674: 1002,
21530: 527,
21560: 442,
23457: 171,
25850: 409,
28577: 2580,
29489: 1492,
32202: 1386,
34110: 791,
36781: 1471,
36833: 1524,
37479: 4185,
39264: 992,
40242: 1416,
40495: 2257,
41554: 1533,
42092: 1529,
43051: 511,
43601: 2369,
44434: 1976,
44785: 3365,
45637: 549,
46415: 1408,
48507: 1149,
48703: 3163,
49258: 1550,
51121: 687,
51697: 2516,
56221: 1516,
56933: 2531,
57156: 4234,
58278: 3683,
58926: 218,
59843: 1030,
59861: 2980,
60211: 1684,
60576: 1540,
62842: 379,
62855: 1540,
64374: 1276,
66522: 409,
66553: 933,
67299: 3944,
67755: 253,
70099: 967,
70339: 1540,
73589: 2300,
74570: 3857,
75947: 6515,
77373: 441,
80186: 3383,
80187: 171,
80429: 436,
80561: 1542,
82020: 0,
83351: 4834,
85009: 1667,
88155: 1794,
92092: 1036,
92790: 8357,
95179: 1515,
95765: 3312,
95934: 979,
97249: 4756,
97320: 3357,
99086: 1868,
99131: 105,
99539: 1579,
99825: 1804,
100621: 3,
100930: 1373,
101167: 3414,
101641: 312,
102282: 414,
103290: 714,
104749: 2372,
105049: 2917,
106945: 1057,
107877: 3566,
110402: 1379,
114352: 2914,
115480: 3788,
115796: 153,
115900: 4620,
119515: 3029,
122753: 190,
122787: 3086,
125954: 2915,
126258: 85,
128641: 1004,
130519: 2973,
130853: 2973,
131221: 2488,
...}
How many triangles are there in the whole network?
tt = sum(list(nx.triangles(H).values()))
tt / 3
1350014.0
The transitivity is the fraction of all possible triangles in the network.
5.3. Average clustering coefficient#
As an alternative to the global clustering coefficient, the overall level of clustering in a network is measured by Watts and Strogatz as the average of the local clustering coefficients of all the vertices \(n\):
It is worth noting that this metric places more weight on the low degree nodes, while the transitivity ratio places more weight on the high degree nodes. In fact, a weighted average where each local clustering score is weighted by \(k_i(k_i-1)\) is identical to the global clustering coefficient.
print("The average clustering coefficient of H is")
nx.average_clustering(H)
The average clustering coefficient of H is
0.6328232091518589
5.4. Average sorthest path length#
5.4.1. Warning! Calculating the shortest paths is intensive!!#
The graph is small world.
nx.average_shortest_path_length(H)
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In [22], line 1
----> 1 nx.average_shortest_path_length(H)
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/generic.py:417, in average_shortest_path_length(G, weight, method)
413 return nx.single_source_bellman_ford_path_length(G, v, weight=weight)
415 if method in single_source_methods:
416 # Sum the distances for each (ordered) pair of source and target node.
--> 417 s = sum(l for u in G for l in path_length(u).values())
418 else:
419 if method == "floyd-warshall":
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/generic.py:417, in <genexpr>(.0)
413 return nx.single_source_bellman_ford_path_length(G, v, weight=weight)
415 if method in single_source_methods:
416 # Sum the distances for each (ordered) pair of source and target node.
--> 417 s = sum(l for u in G for l in path_length(u).values())
418 else:
419 if method == "floyd-warshall":
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/generic.py:409, in average_shortest_path_length.<locals>.path_length(v)
407 def path_length(v):
408 if method == "unweighted":
--> 409 return nx.single_source_shortest_path_length(G, v)
410 elif method == "dijkstra":
411 return nx.single_source_dijkstra_path_length(G, v, weight=weight)
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/unweighted.py:59, in single_source_shortest_path_length(G, source, cutoff)
57 cutoff = float("inf")
58 nextlevel = {source: 1}
---> 59 return dict(_single_shortest_path_length(G.adj, nextlevel, cutoff))
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/unweighted.py:91, in _single_shortest_path_length(adj, firstlevel, cutoff)
89 return
90 for v in found:
---> 91 nextlevel.update(adj[v])
92 level += 1
93 del seen
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/classes/coreviews.py:282, in <genexpr>(.0)
280 if node_ok_shorter:
281 return (n for n in self.NODE_OK.nodes if n in self._atlas)
--> 282 return (n for n in self._atlas if self.NODE_OK(n))
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/classes/coreviews.py:337, in FilterAdjacency.__getitem__.<locals>.new_node_ok(nbr)
336 def new_node_ok(nbr):
--> 337 return self.NODE_OK(nbr) and self.EDGE_OK(node, nbr)
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/classes/filters.py:20, in no_filter(*items)
1 """Filter factories to hide or show sets of nodes and edges.
2
3 These filters return the function used when creating `SubGraph`.
4 """
5 __all__ = [
6 "no_filter",
7 "hide_nodes",
(...)
16 "show_multidiedges",
17 ]
---> 20 def no_filter(*items):
21 return True
24 def hide_nodes(nodes):
KeyboardInterrupt:
math.log(len(H), 10)
5.4.2. Compare the results with a random ER network#
We generate a random Erdos-Renyi graph with same average connectivity of H, i.e. same number of nodes and edges.
nnodes = 2000
plink = 0.0122
ER = nx.fast_gnp_random_graph(nnodes, plink)
nx.is_connected(ER)
True
print("The ER graph has", len(ER), "nodes")
print("and", len(ER.edges()), "edges")
The ER graph has 2000 nodes
and 24467 edges
print("The average clustering coefficient of ER is")
nx.average_clustering(ER)
The average clustering coefficient of ER is
0.011958816979123639
print(sum(list(nx.triangles(ER).values())) / 3)
2370.0
The ER graph is also small world!
The average shortest path scales approximately with the logarithm of the number of nodes
nx.average_shortest_path_length(ER)
math.log(len(ER), 10)
5.4.3. Compare the results with a random AB network#
AB = nx.barabasi_albert_graph(18000, 11)
print("The AB graph has", len(AB), "nodes")
print("and", len(AB.edges()), "edges")
The AB graph has 18000 nodes
and 197879 edges
from collections import Counter
degrees = dict(AB.degree()).values()
c = Counter(degrees)
import powerlaw as pwl
plt.figure(figsize=(10, 7))
x = []
y = []
for i in sorted(c):
x.append(i)
y.append(float(c[i]) / len(AB))
plt.plot(np.array(x), np.array(y))
pwl.plot_pdf(list(degrees))
plt.xlabel("$k$", fontsize=18)
plt.ylabel("$P(k)$", fontsize=18)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.yscale("log")
plt.xscale("log")
plt.axis([10, 1000, 0.0000001, 1])
plt.show()
fit_function = pwl.Fit(list(degrees), xmin=11)
fit_function.power_law.alpha
3.05323051233271
fit_function.power_law.sigma
0.015303876663508869
fit_function.power_law.xmin
11.0
print("The average clustering coefficient of AB is")
nx.average_clustering(AB)
The average clustering coefficient of AB is
0.007877683345432804
print("The number of triangles is ", sum(list(nx.triangles(AB).values())) / 3)
The number of triangles is 24257.0
The AB network is also small-world.
nx.average_shortest_path_length(AB)
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In [39], line 1
----> 1 nx.average_shortest_path_length(AB)
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/generic.py:417, in average_shortest_path_length(G, weight, method)
413 return nx.single_source_bellman_ford_path_length(G, v, weight=weight)
415 if method in single_source_methods:
416 # Sum the distances for each (ordered) pair of source and target node.
--> 417 s = sum(l for u in G for l in path_length(u).values())
418 else:
419 if method == "floyd-warshall":
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/generic.py:417, in <genexpr>(.0)
413 return nx.single_source_bellman_ford_path_length(G, v, weight=weight)
415 if method in single_source_methods:
416 # Sum the distances for each (ordered) pair of source and target node.
--> 417 s = sum(l for u in G for l in path_length(u).values())
418 else:
419 if method == "floyd-warshall":
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/generic.py:409, in average_shortest_path_length.<locals>.path_length(v)
407 def path_length(v):
408 if method == "unweighted":
--> 409 return nx.single_source_shortest_path_length(G, v)
410 elif method == "dijkstra":
411 return nx.single_source_dijkstra_path_length(G, v, weight=weight)
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/unweighted.py:59, in single_source_shortest_path_length(G, source, cutoff)
57 cutoff = float("inf")
58 nextlevel = {source: 1}
---> 59 return dict(_single_shortest_path_length(G.adj, nextlevel, cutoff))
File ~/.pyenv/versions/venv_xgi/lib/python3.9/site-packages/networkx/algorithms/shortest_paths/unweighted.py:91, in _single_shortest_path_length(adj, firstlevel, cutoff)
89 return
90 for v in found:
---> 91 nextlevel.update(adj[v])
92 level += 1
93 del seen
KeyboardInterrupt:
math.log(len(AB), 10)
5.4.4. Compare the results with a random WS network#
WS = nx.connected_watts_strogatz_graph(18000, 23, 0.2, 50)
print("The WS graph has", len(WS), "nodes")
print("and", len(WS.edges()), "edges")
The WS graph has 18000 nodes
and 198000 edges
nx.is_connected(WS)
True
ws_degrees = dict(WS.degree()).values()
plt.figure(figsize=(10, 7))
plt.hist(ws_degrees, bins=10)
plt.xlabel("$k$", fontsize=18)
plt.ylabel("$Freq.$", fontsize=18)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
(array([ 0., 1000., 2000., 3000., 4000., 5000., 6000.]),
[Text(0, 0.0, '0'),
Text(0, 1000.0, '1000'),
Text(0, 2000.0, '2000'),
Text(0, 3000.0, '3000'),
Text(0, 4000.0, '4000'),
Text(0, 5000.0, '5000'),
Text(0, 6000.0, '6000')])
print("The average clustering coefficient of WS is")
nx.average_clustering(WS)
The average clustering coefficient of WS is
0.3675862599323307
The Watt-Strogatz network is still small-world but with high clustering.
nx.average_shortest_path_length(WS)
math.log(len(WS), 10)
5.5. Closeness Centrality#
In connected graphs there is a natural distance metric between all pairs of nodes, defined by the length of their shortest paths. The ‘’’farness’’’ of a node ‘’x’’ is defined as the sum of its distances from all other nodes, and its closeness was defined by Bavelas as the reciprocal of the farness that is:
Thus, the more central a node is the lower its total distance from all other nodes. Note that taking distances ‘’from’’ or ‘’to’’ all other nodes is irrelevant in undirected graphs, whereas in directed graphs distances ‘’to’’ a node are considered a more meaningful measure of centrality, as in general (e.g., in, the web) a node has little control over its incoming links.
Be careful! Computing all the distances between pair of nodes can be intensive.
close_centr = nx.closeness_centrality(H, 84424)
print(close_centr)