#coding:utf-8
import scrapy
import xlwt, lxml
import re, json
import matplotlib.pyplot as plt
import numpy as np
import pylab
from scipy import linalg
from igraph import *
import csv
edges=[]
firstLine=True
with open('stormofswords.csv','r') as f:
for row in csv.reader(f.read().splitlines()):
if firstLine==True:
firstLine=False
continue
u,v,weight=[i for i in row]
edges.append((u,v,int(weight)))
# print(edges)
from igraph import Graph as IG
g=IG.TupleList(edges,directed=True,vertex_name_attr='name',edge_attrs=None,weights=True)
print(g)
names=g.vs['name']
weights=g.es['weight']
# print(weights)
print(g.is_weighted())
#分析网络
#角色数
print(g.vcount())
# 网络直径: 一个网络的直径(或者测地线)被定义为网络中的最长最短路径。
print(g.diameter())
#打印最长最短路径
print(g.get_diameter())#节点索引值
vnames=[names[x] for x in g.get_diameter()]
print(vnames)
#最短路径
print(g.shortest_paths('Jon','Margaery'))
print('----------------------')
print([names[x] for x in g.get_shortest_paths('Jon','Margaery')[0]])
print('-----------------------')
paths=g.get_all_shortest_paths('Jon')
for p in paths:
print([names[x] for x in p])
#中心性度量
#度中心性【仅是一个节点在网络中的连接数。在权利的游戏的图的上下文中,一个角色的度中心性是该角色交互的其他角色数】
print(g.maxdegree())
print('-----------------')
for p in g.vs:
if p.degree()>15:
print(p['name'],p.degree())
#加权度中心性
for p in g.vs:
weightedDegree=sum([x.degree() for x in p.neighbors()])
if weightedDegree>250:
print(p["name"], weightedDegree)
#邻居平均度
for p in zip(g.vs,g.knn()[0]):
if p[1]>20:
print(p[0]['name'],p[1])
#介数中心性
#中介中心性是一项重要的指标,因为它可以用于识别网络中的‘信息代理’,或者那些连接不同集群的节点
btvs=[]
for p in zip(g.vs,g.betweenness()):
print(btvs.append({'name':p[0]['name'],'bt':p[1]}))
#临近中心性
#具有高邻近中心性的节点通常在图中的集群之间被高度连接,但在集群外部不一定高度连接
ccvs=[]
for p in zip(g.vs,g.closeness()):
ccvs.append({'name':p[0]['name'],'cc':p[1]})
print(sorted(ccvs,key=lambda k:k['cc'],reverse=True))
#PageRank算法
pg=g.pagerank(vertices=None,directed=True,damping=0.85,weights=weights,arpack_options=None,implementation='prpack',niter=1000,eps=0.001)
pgvs=[]
for p in zip(g.vs,pg):
pgvs.append({'name':p[0]['name'],'pg':p[1]})
print((sorted(pgvs, key=lambda k: k['pg'], reverse=True))[:10])
#社团发现
#社团发现算法用以查找图中的集群。我们将使用igraph中实现的walktrap社团发现方法,来找到那些在社区之中频繁交互,但在社区之外不存在太多互动的角色的社区
#不懂啥意思啊啊啊啊啊!!!!!!!!!
clusters=IG.community_walktrap(g,weights='weight').as_clustering()
print(clusters)
# community_walktrap: Community detection algorithm of Latapy & Pons, based on random walks.
# Pascal Pons, Matthieu Latapy: Computing communities in large networks using random walks,
# http://arxiv.org/abs/physics/0512106.
nodes=[{'name':node['name']} for node in g.vs]
# print(nodes)
community={}
for node in nodes:
idx=g.vs.find(name=node['name']).index
node['community']=clusters.membership[idx]
# print(node['community'])
if node['community'] not in community:
community[node['community']]=[node['name']]
else:
community[node['community']].append(node["name"])
for c,l in community.items():
print('Community--%s: %s'%(c,l))
#网络可视化可pip install networkx包,待续!