文章目录
二分网络的投影和对称
# 查看数据
import pandas as pd
data_trade_total = pd.read_csv('./data/comtrade_trade_data_total_2003.csv')
print(len(data_trade_total)) # 44259
data_trade_total.sample(10)
# Special 'exclude_countiries' to be exluded when loading data
472 Africa CAMEU region, nes
899 Areas, nes
471 CACM, nes
129 Caribbean, nes
221 Eastern Europe, nes
97 EU-27
697 Europe EFTA, nes
492 Europe EU, nes
838 Free Zones
473 LAIA, nes
536 Neutral Zone
637 North America and Central America, nes
290 Northern Africa, nes
527 Oceania, nes
577 Other Africa, nes
490 Other Asia, nes
568 Other Europe, nes
636 Rest of America, nes
839 Special Categories
879 Western Asia, nes
0 World
网络的对称性
def net_symmetrisation(wtn_file, exclude_countries):
DG = nx.DiGraph()
Reporter_pos = 1
Flow_code_pos = 2
Partner_pos = 3
Value_pos = 9
dic_trade_flows = {}
hfile = open(wtn_file, 'r')
header = hfile.readline()
lines = hfile.readlines()
for l in lines:
l_split = l.split(',')
# the following is to prevent parsing lines without data
if len(l_split) < 2:
continue
reporter = int(l_split[Reporter_pos])
partner = int(l_split[Partner_pos])
flow_code = int(l_split[Flow_code_pos])
value = float(l_split[Value_pos])
if ((reporter in exclude_countries) or (partner in exclude_countries) or \
(reporter == partner)):
continue
if flow_code == 1 and value > 0.0:
# 1=Import, 2=Export
if (partner, reporter, 2) in dic_trade_flows:
DG[partner][reporter]['weight'] = (DG[partner][reporter]['weight'] + value) / 2.0
else:
DG.add_edge(partner, reporter, weight=value)
dic_trade_flows[(partner, reporter, 1)] = value
elif flow_code == 2 and value > 0.0:
# 1=Import, 2=Export
if (reporter, partner, 1) in dic_trade_flows:
DG[reporter][partner]['weight'] = (DG[reporter][partner]['weight'] + value) / 2.0
else:
DG.add_edge(reporter, partner, weight=value)
dic_trade_flows[( reporter, partner, 2)] = value
else:
print('trade flow not present\n')
hfile.close()
return DG
生成聚合网络
# importing the main modules
import networkx as nx
# countries to be excluded
exclude_countries = [472,899,471,129,221,97,697,492,838,473,536,\
637,290,527,577,490,568,636,839,879,0]
# this is the magic command to have the graphic embedded in the notebook
%pylab inline
DG = net_symmetrisation('./data/comtrade_trade_data_total_2003.csv', exclude_countries)
print('number of nodes: ', DG.number_of_nodes())
print('number of edges: ', DG.number_of_edges())
# number of nodes: 232
# number of edges: 27901
Reciprocity 互易性
在网络科学中,互易性是一种度量有向网络中顶点相互连接的可能性的方法。
在无权网络中定义网络互易性-wiki
r
=
L
↔
L
r=\frac{L^\leftrightarrow}{L}
r=LL↔
where
L
↔
{L^\leftrightarrow}
L↔ is the number of reciprocated links that for a connected network ammounts to
2
L
−
N
(
N
−
1
)
2L-N(N-1)
2L−N(N−1)
# weighted case
W = 0
W_rep =0
for n in DG.nodes():
for e in DG.out_edges(n, data=True):
W += e[2]['weight']
if DG.has_edge(e[1], e[0]):
W_rep += min(DG[e[0]][e[1]]['weight'], DG[e[1]][e[0]]['weight'])
print(W, W_rep, W_rep/W) # 0.07841151578718941
In the weighted case
the formula changes in:
r
=
W
↔
W
r=\frac{W^\leftrightarrow}{W}
r=WW↔
where
W
↔
=
∑
i
∑
j
≠
i
w
i
j
↔
W^\leftrightarrow=\sum_i\sum_{j\neq i}w^\leftrightarrow_{ij}
W↔=i∑j=i∑wij↔ is the sum of the reciprocated weights with
w
i
j
↔
=
m
i
n
[
w
i
j
,
w
j
i
]
=
w
j
i
↔
w^\leftrightarrow_{ij}=min[w_{ij},w_{ji}]=w^\leftrightarrow_{ji}
wij↔=min[wij,wji]=wji↔
and
W
=
∑
i
∑
j
≠
i
w
i
j
W=\sum_i\sum_{j\neq i}w_{ij}
W=i∑j=i∑wij
# weighted case
W = 0
W_rep =0
for n in DG.nodes():
for e in DG.out_edges(n, data=True):
W += e[2]['weight']
if DG.has_edge(e[1], e[0]):
W_rep += min(DG[e[0]][e[1]]['weight'], DG[e[1]][e[0]]['weight'])
print(W, W_rep, W_rep/W)
# 7170872443378.5 5195628162988.0 0.7245461698019161
Assortativity 同配性
- 同配:度
大
节点倾向于连接度大
节点,网络是度正相关的 - 异配:度
大
节点倾向于连接度小
节点,网络是度负相关的 - 中性:节点间的连接与它们自身的度值
无关
,网络具有不同的度相关性
平均度分布
# average degrees K_nn distribution
list_Knn=[]
for n in DG.nodes():
degree = 0.0
count = 0
for nn in DG.neighbors(n):
count += 1
degree = degree + DG.degree(nn)
temp = degree / len(list(DG.neighbors(n)))
list_Knn.append(temp)
#plot the histogram
hist(list_Knn,bins=12)
皮尔逊相关系数
Pearson相关性系数(Pearson Correlation
)是衡量向量相似度的一种方法 。 输出范围为-1
到+1
, 0
代表无相关性,负值
为负相关,正值
为正相关。知识链接
#basic Pearson correlation coefficient for the
r1 = nx.degree_assortativity_coefficient(DG)
print(r1) # -0.33500264363818966
Density and Strength (in and out)
Loading product Networks
data_product = pd.read_csv('./data/comtrade_trade_data_2003_product_09.csv')
data_product.sample(10)
dic_product_netowrk={}
commodity_codes=['09','10','27','29','30','39','52','71','72','84','85','87','90','93']
for c in commodity_codes:
dic_product_netowrk[c]=net_symmetrisation("./data/comtrade_trade_data_2003_product_"+ c +".csv", \
exclude_countries)
DG_aggregate=net_symmetrisation( "./data/comtrade_trade_data_total_2003.csv",exclude_countries)
rescale the weighted ajacency aggregate matrix
w i j t o t = w i j t o t ∑ h k w h k t o t w_{ij}^{tot}=\frac{ w_{ij}^{tot} }{ \sum_{hk}w_{hk}^{tot} } wijtot=∑hkwhktotwijtot
w_tot = 0
for u,v,d in DG_aggregate.edges(data=True): # data=True 边带权重值
w_tot += d['weight'] # 约等于数值1
for u,v,d in DG_aggregate.edges(data=True):
d['weight'] = d['weight'] / w_tot
rescale the weighted ajacency product matrices
w i j c = w i j c ∑ h k w h k c w_{ij}^c=\frac{w_{ij}^c}{\sum_{hk}w_{hk}^c} wijc=∑hkwhkcwijc
for c in commodity_codes:
w_tot = 0.0
for u,v,d in dic_product_netowrk[c].edges(data=True):
w_tot += d['weight']
for u,v,d in dic_product_netowrk[c].edges(data=True):
d['weight'] = d['weight'] / w_tot
Generate the table with the quantities
w i j w_{ij} wij D e n s i t y Density Density N S i n / N D i n NS_{in}/ND_{in} NSin/NDin N S o u t / N D o u t NS_{out}/ND_{out} NSout/NDout
# 测试节点数
print(len(DG_aggregate.nodes())) # 232
DG_aggregate.number_of_nodes() # 232
density_aggregate = DG_aggregate.number_of_edges() / \
(DG_aggregate.number_of_nodes() * DG_aggregate.number_of_nodes() - 1.0)
w_agg = []
NS_in = []
NS_out = []
for u,v,d in DG_aggregate.edges(data=True):
w_agg.append(d['weight'])
for n in DG_aggregate.nodes():
if DG_aggregate.in_degree(n) > 0:
NS_in.append(DG_aggregate.in_degree(n, weight='weight') / DG_aggregate.in_degree(n))
if DG_aggregate.out_degree(n) > 0:
NS_out.append(DG_aggregate.out_degree(n, weight='weight') / DG_aggregate.out_degree(n))
for c in commodity_codes:
density_commodity = dic_product_netowrk[c].number_of_edges() / \
(dic_product_netowrk[c].number_of_nodes() * dic_product_netowrk[c].number_of_nodes() - 1.0)
w_c = []
NS_c_in = []
NS_c_out = []
for u,v,d in dic_product_netowrk[c].edges(data=True):
w_c.append(d['weight'])
for n in dic_product_netowrk[c].nodes():
if dic_product_netowrk[c].in_degree(n) > 0:
NS_c_in.append(dic_product_netowrk[c].in_degree(n, weight='weight') / dic_product_netowrk[c].in_degree(n))
if dic_product_netowrk[c].out_degree(n) > 0:
NS_c_out.append(dic_product_netowrk[c].out_degree(n, weight='weight') / dic_product_netowrk[c].out_degree(n))
print(c, str(round(density_commodity/density_aggregate, 4)) + ' & ' + str(round(mean(w_c)/mean(w_agg), 4)) \
+ ' & ' + str(round(mean(NS_c_in)/mean(NS_in),4)) + ' & ' + str(round(mean(NS_c_out)/mean(NS_out), 4)) )
# output
09 0.3089 & 3.3811 & 2.553 & 2.3906
10 0.1961 & 5.5195 & 5.9919 & 2.5718
27 0.3057 & 3.3575 & 2.6786 & 3.2979
29 0.3103 & 3.3664 & 2.3579 & 1.6286
30 0.3662 & 2.803 & 2.3308 & 1.267
39 0.4926 & 2.0478 & 1.753 & 1.1385
52 0.2864 & 3.5839 & 2.7572 & 2.1254
71 0.2843 & 3.6746 & 1.9479 & 2.6704
72 0.3081 & 3.3315 & 2.5847 & 1.8484
84 0.6195 & 1.6281 & 1.3359 & 1.0259
85 0.5963 & 1.6917 & 1.3518 & 1.0692
87 0.4465 & 2.259 & 1.7488 & 1.1105
90 0.4734 & 2.1492 & 1.5879 & 1.0993
93 0.1414 & 8.4677 & 6.0618 & 4.0279
Revealed Comparative Advantage
It is necessary to weigh export of a good in relation to how much of the same product is produced worldwide, (i.e. ∑ c ′ M c ′ p \sum_{c^{\prime}} M_{c^{\prime} p} ∑c′Mc′p).
This must be compared with the importance of the export of single country, which is again a ratio between the total export ofc
(i.e. ∑ p ′ M c p ′ \sum_{p^{\prime}} M_{c p^{\prime}} ∑p′Mcp′) with respect to the global value of the exports for every country (i.e. ∑ c ′ , p ′ M c ′ p ′ \sum_{c^{\prime},p^{\prime}} M_{c^{\prime} p^{\prime}} ∑c′,p′Mc′p′).
We consider countryc
to be a competitive exporter of productp
if itsRCA
is greater than some threshold value.
R C A c p = M c p ∑ p ′ M c p ′ / ∑ c ′ M c ′ p ∑ c ′ , p ′ M c ′ p ′ RCA_{c p}=\frac{M_{c p}}{\sum_{p^{\prime}} M_{c p^{\prime}}} / \frac{\sum_{c^{\prime}} M_{c^{\prime} p}}{\sum_{c^{\prime}, p^{\prime}} M_{c^{\prime} p^{\prime}}} RCAcp=∑p′Mcp′Mcp/∑c′,p′Mc′p′∑c′Mc′p
def RCA(c, p):
X_cp = dic_product_netowrk[p].out_degree(c, weight='weight')
X_c = DG_aggregate.out_degree(c, weight='weight')
X_p = 0.0
for n in dic_product_netowrk[p].nodes():
X_p += dic_product_netowrk[p].out_degree(n, weight='weight')
X_tot = 0.0
for n in DG_aggregate.nodes():
X_tot += DG_aggregate.out_degree(n, weight='weight')
RCA_cp = (X_cp/X_c) / (X_p/X_tot)
return RCA_cp
p = '93'
c = 381
print(RCA(c, p)) # 2.104705551640614
Bipartite Network
defining the country-product matrix
C = M M T C =MM^T C=MMT P = M T M P=M^TM P=MTM ( 2.13 ) (2.13) (2.13)
M T M^T MT is the transposed matrix and the square matrices
C
andP
define the country–country network and the product–product network. The element C c c ′ C_{cc^{\prime}} Ccc′ defines the weight associated to the link between countries c c c and c ′ c^{\prime} c′ in the country–country network. Analogously P p p ′ P_{pp^{\prime}} Ppp′ gives the weight of the link between products p p p and p ′ p^{\prime} p′ in the product product network.
These weights have an interesting interpretation: if we write explicitly the expression of a generic element of the C C C matrix according to (2.13), we have that C c c ′ = ∑ p M c p M c ′ p C_{c c^{\prime}}=\sum_{p} M_{c p} M_{c^{\prime} p} Ccc′=∑pMcpMc′p . Therefore the element C c c ′ C_{cc^{\prime}} Ccc′ (since M c p M_{cp} Mcp is a binary unweighted matrix) counts the number of products exported by both countries c c c and c ′ c^{\prime} c′. In a similar way the the element P p p ′ P_{pp^{\prime}} Ppp′ counts the number of countries which export both products p p p and p ′ p^{\prime} p′.
The diagonal elements C c c C_{cc} Ccc and P p p P_{pp} Ppp are respectively the number of products exported by the country c c c and the number of exporters of the product p p p.
import numpy as np
num_countries = DG_aggregate.number_of_nodes()
num_products = len(commodity_codes)
# generate array indices
country_index = {}
i = 0
for c in DG_aggregate.nodes():
country_index[c] = i
i += 1
M = np.zeros((num_countries, num_products))
for pos_p, p in enumerate(commodity_codes):
for c in dic_product_netowrk[p].nodes():
if RCA(c, p) > 1.0: # 假定阈值为1
M[country_index[c]][pos_p] = 1.0
print('\n')
C = np.dot(M, M.transpose())
P = np.dot(M.transpose(), M)
print(C)
print(P)
[[1. 0. 0. ... 1. 0. 0.]
[0. 3. 1. ... 0. 0. 0.]
[0. 1. 3. ... 0. 1. 0.]
...
[1. 0. 0. ... 1. 0. 0.]
[0. 0. 1. ... 0. 2. 0.]
[0. 0. 0. ... 0. 0. 0.]]
[[83. 27. 28. 4. 6. 6. 29. 31. 20. 1. 3. 3. 5. 12.]
[27. 59. 19. 4. 4. 8. 27. 18. 19. 5. 3. 7. 3. 12.]
[28. 19. 71. 4. 2. 7. 20. 16. 14. 3. 4. 4. 1. 9.]
[ 4. 4. 4. 20. 9. 9. 2. 6. 5. 5. 4. 3. 7. 7.]
[ 6. 4. 2. 9. 27. 15. 7. 6. 10. 9. 3. 8. 9. 10.]
[ 6. 8. 7. 9. 15. 37. 10. 7. 15. 10. 10. 8. 9. 11.]
[29. 27. 20. 2. 7. 10. 69. 19. 18. 4. 5. 7. 5. 14.]
[31. 18. 16. 6. 6. 7. 19. 57. 10. 4. 3. 4. 6. 9.]
[20. 19. 14. 5. 10. 15. 18. 10. 56. 7. 7. 12. 2. 15.]
[ 1. 5. 3. 5. 9. 10. 4. 4. 7. 26. 12. 9. 7. 6.]
[ 3. 3. 4. 4. 3. 10. 5. 3. 7. 12. 26. 5. 8. 6.]
[ 3. 7. 4. 3. 8. 8. 7. 4. 12. 9. 5. 27. 5. 11.]
[ 5. 3. 1. 7. 9. 9. 5. 6. 2. 7. 8. 5. 20. 5.]
[12. 12. 9. 7. 10. 11. 14. 9. 15. 6. 6. 11. 5. 38.]]