#coding:utf-8
import numpy as np
from scipy import integrate
import pandas as pd
# In[ ]:
import jieba
from collections import Counter
#esult=Counter=
# top_1000=result.most_common(1000)
# top=[]
# for i in top_1000:
# print(i[0])
# length=len(i[0])
# if (length!=1):
# top.append(i[0])
# print(top)
toptfidf=
print(len(toptfidf))
import matplotlib.pyplot as plt
import networkx as nx
plt.rcParams['font.sans-serif']=['SimHei']#显示中文标签
plt.rcParams['axes.unicode_minus']=False
G = nx.Graph()
G.add_nodes_from(toptfidf)
# df = pd.read_csv('聊天.csv',engine='python',sep=',',encoding='utf-8')
file=r'C:\Users\20143\Desktop\数据\fenci.out'
a=[]
with open(file,'r+',encoding='utf-8') as f:
a=f.readlines()
print(a)
keyword = toptfidf
# togo.to_csv(r'E:\Python\togo.csv')
import itertools
# a=(df['content'].astype(str).values.tolist())
for i in list(itertools.combinations(toptfidf, 2)):
G.add_edge(i[0], i[1],weight=0)
#可以继续优化成另一种表达,交互太多了,TFIDF抽取
for i in a:
# print(i)
tmp_edge=[]
for j in toptfidf:
if str(j) in i:
tmp_edge.append(j)
if len(tmp_edge)!=0:
# for i in tmp_edge:
# G.add_edge()
# print('111')
edges=list(itertools.combinations(tmp_edge, 2))
for k in edges:
tmp=G[k[0]][k[1]]['weight']
# G.add_edge(k[0],k[1])
G[k[0]][k[1]]['weight']=tmp+1
nx.draw(G, with_labels=True, font_weight='bold')
nx.write_gexf(G, '图谱权重.gexf')
# plt.show()
#筛选边权重大于20的
FG = nx.Graph()
FG.add_nodes_from(toptfidf)
for (u, v, wt) in G.edges.data('weight'):
if wt>1000:
FG.add_edge(u, v,weight=wt )
nx.write_gexf(FG, '筛选大于1000边.gexf')
# plt.show()