用python写了一个简单版本的textrank,实现提取关键词的功能。
import numpy as np
import jieba
import jieba.posseg as pseg
class TextRank(object):
def __init__(self, sentence, window, alpha, iternum):
self.sentence = sentence
self.window = window
self.alpha = alpha
self.edge_dict = {} #记录节点的边连接字典
self.iternum = iternum#迭代次数
#对句子进行分词
def cutSentence(self):
jieba.load_userdict('user_dict.txt')
tag_filter = ['a','d','n','v']
seg_result = pseg.cut(self.sentence)
self.word_list = [s.word for s in seg_result if s.flag in tag_filter]
print(self.word_list)
#根据窗口,构建每个节点的相邻节点,返回边的集合
def createNodes(self):
tmp_list = []
word_list_len = len(self.word_list)
for index, word in enum