建立共词网络

最新推荐文章于 2024-04-30 08:51:14 发布

作小寒

最新推荐文章于 2024-04-30 08:51:14 发布

阅读量684

点赞数 2

分类专栏：知识图谱

本文链接：https://blog.csdn.net/qq_34069180/article/details/108976244

版权

知识图谱专栏收录该内容

13 篇文章 0 订阅

订阅专栏

#coding:utf-8
import numpy as np
from scipy import integrate
import pandas as pd
# In[ ]:

import jieba
from collections import Counter

#esult=Counter=
# top_1000=result.most_common(1000)
# top=[]
# for i in top_1000:
#     print(i[0])
#     length=len(i[0])
#     if (length!=1):
#         top.append(i[0])
# print(top)

toptfidf=
print(len(toptfidf))

import matplotlib.pyplot as plt
import networkx as nx
plt.rcParams['font.sans-serif']=['SimHei']#显示中文标签
plt.rcParams['axes.unicode_minus']=False
G = nx.Graph()
G.add_nodes_from(toptfidf)
# df = pd.read_csv('聊天.csv',engine='python',sep=',',encoding='utf-8')
file=r'C:\Users\20143\Desktop\数据\fenci.out'
a=[]
with open(file,'r+',encoding='utf-8') as f:
    a=f.readlines()
print(a)
keyword = toptfidf

# togo.to_csv(r'E:\Python\togo.csv')
import itertools
# a=(df['content'].astype(str).values.tolist())

for i in list(itertools.combinations(toptfidf, 2)):
    G.add_edge(i[0], i[1],weight=0)
#可以继续优化成另一种表达，交互太多了，TFIDF抽取

for i in a:
    # print(i)
    tmp_edge=[]
    for j in toptfidf:
        if str(j) in i:
            tmp_edge.append(j)
    if len(tmp_edge)!=0:
        # for i in tmp_edge:
            # G.add_edge()
        # print('111')
        edges=list(itertools.combinations(tmp_edge, 2))
        for k in edges:
            tmp=G[k[0]][k[1]]['weight']
            # G.add_edge(k[0],k[1])
            G[k[0]][k[1]]['weight']=tmp+1

nx.draw(G, with_labels=True, font_weight='bold')
nx.write_gexf(G, '图谱权重.gexf')
# plt.show()

#筛选边权重大于20的

FG = nx.Graph()

FG.add_nodes_from(toptfidf)
for (u, v, wt) in G.edges.data('weight'):
    if wt>1000:
        FG.add_edge(u, v,weight=wt )
nx.write_gexf(FG, '筛选大于1000边.gexf')
# plt.show()

作小寒

关注

2
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
建立共词网络

#coding:utf-8import numpy as npfrom scipy import integrateimport pandas as pd# In[ ]:import jiebafrom collections import Counter#esult=Counter=# top_1000=result.most_common(1000)# top=[]# for i in top_1000:# print(i[0])# length=len(i.
复制链接

扫一扫

专栏目录