目录
3.2 Centrality & Communicability
3.4 Dispersion in fully connected graphs
+ Networks are connected bi-directional graphs
+ Nodes mark the entities in a network
+ Edges mark the relationships in a network
- Shortest path between two nodes
- Connectedness
- Centrality closeness; betweenness
- Clustering
- Communicability
1. Networkx
1.1 A simple network
- Import
import networkx as nx
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
- draw
simple_network = nx.Graph()
nodes = [1,2,3,4,5,6,7,8]
edges = [(1,2),(2,3),(1,3),(4,5),(2,7),(1,9),(3,4),(4,5),(4,9),(5,6),(7,8),(8,9)]
simple_network.add_edges_from(edges)
nx.draw(simple_network)
- 数node数
simple_network.order()
- node加label
pos=nx.spring_layout(simple_network) # positions for all nodes
# nodes
nx.draw_networkx_nodes(simple_network,pos,
node_color='r',
node_size=500,
alpha=0.8)
# edges
nx.draw_networkx_edges(simple_network,pos,
edgelist=edges, #could be a subset of edges
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in simple_network.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(simple_network,pos,node_name,font_size=16)
plt.axis('off')
plt.show() # display
- 数degree,即每个node连接的节点数
simple_network.degree()
#选最大的degree
max(list(simple_network.degree()),key=lambda x : x[1])
- 找到最大团
from networkx.algorithms.clique import find_cliques
i=0
for clique in find_cliques(simple_network):
print(clique)
i+=1
if i > 20: #Too many cliques. Will crash the notebook if we don't stop it
break
- 节点的迭代器(节点、临接字典)
list(simple_network.adjacency())
- 返回特性
print(simple_network.has_edge(2,9))
print(simple_network.has_node(2))
print(simple_network.number_of_edges())
print(simple_network.number_of_nodes())
print(simple_network.order())
print(len(simple_network))
- 不同类型的图
G = nx.Graph() #Undirected simple graph
d = nx . DiGraph () #directed simple graph
m = nx . MultiGraph () #undirected with parallel edges
h = nx . MultiDiGraph () #directed with parallel edges
- 最短路径
print(nx.shortest_path(simple_network,6,8))
print(nx.shortest_path_length(simple_network,6,8))
#根据权重
print(nx.dijkstra_path(G_C,node1,node2,weight='distance'))
print(nx.dijkstra_path_length(G_C,node1,node2,weight='distance'))
1.2 Weighted Edge
-
画雏形(weighter edge)
address_list = [('A',"Columbia University, New York, NY")]
distances = [['A','B',10]]
import networkx as nx
%matplotlib inline
G_C=nx.Graph()
node_labels=dict()
nodes = list()
for n in address_list:
nodes.append(n[0])
node_labels[n[0]] = n[1]
for e in distances:
G_C.add_edge(e[0],e[1],distance=e[2])
nx.draw(G_C)
- 添加node&edge的label
pos=nx.spring_layout(G_C) # positions for all nodes
# nodes
nx.draw_networkx_nodes(G_C,pos,
node_color='r',
node_size=500,
alpha=0.8)
# edges
nx.draw_networkx_edges(G_C,pos,
edgelist=G_C.edges(),
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in G_C.nodes():
node_name[node]=str(node)
nx.draw_networkx_edge_labels(G_C,pos,font_size=10)
node_name={}
for node in G_C.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(G_C,pos,node_name,font_size=16)
plt.axis('off')
plt.show() # display
1.3 标记不同的edge
- differentiating edges by weight
elarge=[(u,v) for (u,v,d) in G_C.edges(data=True) if d['distance'] >15]
esmall=[(u,v) for (u,v,d) in G_C.edges(data=True) if d['distance'] <=15]
pos=nx.spring_layout(G_C) # positions for all nodes
plt.figure(1,figsize=(12,12)) #Let's draw a big graph so that it is clearer
# nodes
nx.draw_networkx_nodes(G_C,pos,node_size=700)
# edges. draw the larger weight edges in solid lines and smaller weight edges in dashed lines
nx.draw_networkx_edges(G_C,pos,edgelist=elarge,
width=6)
nx.draw_networkx_edges(G_C,pos,edgelist=esmall,
width=6,alpha=0.5,edge_color='b',style='dashed')
# labels
nx.draw_networkx_labels(G_C,pos,font_size=20,font_family='sans-serif')
nx.draw_networkx_edge_labels(G_C,pos,font_size=7)
plt.axis('off')
#plt.savefig("address_graph.png") # save as png if you need to use it in a report or web app
plt.show() # display
- 标记最短路径
origin = 'B'
destination = 'H'
shortest_path = nx.dijkstra_path(G_C,origin,destination,weight='distance')
shortest_path_edges = list()
for i in range(len(shortest_path)-1):
shortest_path_edges.append((shortest_path[i],shortest_path[i+1]))
shortest_path_edges.append((shortest_path[i+1],shortest_path[i]))
path_edges=list()
other_edges=list()
node_label_list = dict()
node_label_list = {n:'' for n in G_C.nodes()}
for edge in G_C.edges():
if edge in shortest_path_edges:
path_edges.append(edge)
node_label_list[edge[0]] = edge[0]
node_label_list[edge[1]] = edge[1]
else:
other_edges.append(edge)
pos=nx.spring_layout(G_C) # positions for all nodes
fig=plt.figure(1,figsize=(12,12))
# nodes
nx.draw_networkx_nodes(G_C,pos,node_size=700)
# edges. draw the larger weight edges in solid lines and smaller weight edges in dashed lines
nx.draw_networkx_edges(G_C,pos,edgelist=path_edges,
width=6)
nx.draw_networkx_edges(G_C,pos,edgelist=other_edges,
width=6,alpha=0.5,edge_color='b',style='dashed')
# labels
nx.draw_networkx_labels(G_C,pos,font_size=20,font_family='sans-serif',labels=node_label_list)
nx.draw_networkx_edge_labels(G_C,pos,font_size=7)
plt.axis('off')
#plt.savefig("address_graph.png") # save as png if you need to use it in a report or web app
plt.show() # display
- 生成一个点到其他任意点的最短距离
location = 'G'
distance_list = list()
for node in G_C.nodes():
if node == location:
continue
if not nx.has_path(G_C,location,node):
continue
distance = nx.dijkstra_path_length(G_C,location,node,weight='distance')
distance_list.append((node,distance))
from operator import itemgetter
print(sorted(distance_list,key=itemgetter(1)))
- 一个点到另一个点的所有路径
list(nx.all_simple_paths(G_C,'A','C'))
2. Social Networks
2.1 Friend Graph
-
读取graph 并绘制
G = nx.read_gpickle('friend_graph')
%matplotlib inline
nx.draw(G)
- 去除不连接点,绘图
nodes_for_removal = [ n for n in G.nodes() if G.degree(n)==0]
for node in nodes_for_removal:
G.remove_node(node)
pos=nx.spring_layout(G)
import matplotlib.pyplot as plt
fig = plt.figure(1,figsize=(12,12))
#pos
# nodes
nx.draw_networkx_nodes(G,pos,
node_color='r',
node_size=500,
alpha=0.8)
# edges
nx.draw_networkx_edges(G,pos,width=1.0,alpha=0.5)
nx.draw_networkx_edges(G,pos,
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in G.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(G,pos,node_name,font_size=16)
fig.show()
- 分析有几个独立的团
print(len(list(nx.connected_components(G))))
for comp in nx.connected_components(G):
print(comp)
- Largest connected component subgraph
def connected_component_subgraphs(G):
for c in nx.connected_components(G):
yield G.subgraph(c)
largest_size=0 #smallest=1000000000
largest_graph = None
for g in connected_component_subgraphs(G):
if len(g) > largest_size: # <
largest_size = len(g)
largest_graph = g
nx.draw(largest_graph)
- max degree (with most friends)
d=nx.degree(G)
l=list(d)
max(l,key=lambda x: x[1])
3. Analysis Algorirhms
3.1 Clustering
Clustering is a measure of how closely knit the nodes in a graph are. We can measure the degree to which a node belongs to a cluster and the degree to which the graph is clustered
- Node clustering coefficient: A measure that shows the degree to which a node belongs to a cluster
- Graph clustering coefficient: A measure that shows the degree to which a graph is clustered
- 返回一个子图(Kn with n nodes; node label 0 to n-1)
G1=nx.complete_graph(4)
nx.draw(G1)
- 计算clustering & average clustering
nx.clustering(G1)
nx.average_clustering(G1)
node 0: possible triangles through node 0: 3个;Since only 2 of the 3 exist, the clustering coefficient is 2/3 or 0.67.
- Clustering in weighted graph
nx.average_clustering(G_C,weight='distance')
nx.clustering(G_C,weight='distance')
3.2 Centrality & Communicability
Centrality deals with identifying the most important nodes in a graph
Communicability measures how easy it is to send a message from node i to node j
+ closeness_centrality: (n-1)/sum(shortest path to all other nodes); how near a node is to every other node in a network;值越大,越central
+ betweenness_centrality: fraction of pair shortest paths that pass through node n; The number of shortest paths that go through node n/total number of shortest paths; the degree to which a node serves as a connector
+ degree centrality: fraction of nodes that n is connected to
+ communicability: the sum of all walks from one node to every other node; 两点间path越多,越高
- closeness centrality
c_c = closeness_centrality(G)
#如果weighted
c_c = nx.closeness_centrality(G_C,distance='distance')
from collections import OrderedDict
cc = OrderedDict(sorted(
c_c.items(),
key = lambda x: x[1],
reverse = True))
n=4; shortest paths from 2 (2-0:1, 2-3:1, 2-1:2) ; (n-1)/sum = 3/4 = 0.75
- Communicability
nx.communicability(G1)
#Compute the adjacency matrix A for graph above
A = [[0,1,0,0,0,0,0],
[1,0,1,0,0,1,0],
[0,1,0,1,1,0,0],
[0,0,1,0,1,0,1],
[0,0,1,1,0,1,0],
[0,1,0,0,1,0,0],
[0,0,0,1,0,0,0]]
A = np.array(A)
AA = np.dot(A,A)
AAA = np.dot(AA,A)
3.3 Betweenness Centrality
当fully connected,没有shortest path经过点,结果为0.
nx.betweenness_centrality(G1)
nx.betweenness_centrality(G_C,weight='distance')
3.4 Dispersion in fully connected graphs
+ Eccentricity: the max distance from one node to all other nodes (least eccentric is more central)
+ diameter: the max eccentricity of all nodes in a graph (the longest shortest path)
+ periphery: the set of nodes with eccentricity = diameter (The nodes with the longest shortest paths (the peripheral nodes))
+ Cliques: a subgraph in which every node is connected to every other node
+ Center: The set of nodes that are the most central (they have the smallest distance to any other node) [Graph must be fully connected]
- eccentricity
nx.eccentricity(G1)
-
diameter
nx.diameter(G1)
- periphery
nx.periphery(G1)
- Cliques
from networkx.algorithms.clique import find_cliques, cliques_containing_node
i=0
for clique in find_cliques(G):
print(clique)
i+=1
if i > 10: #Too many cliques. Will crash the notebook if we don't stop it
break
- Center
from networkx.algorithms.distance_measures import center
center(G_C)