import jieba
import nltk
f=open("corpus.txt", 'r', encoding='utf-8',)
sents=[]
for line in f:
sents.extend(jieba.cut(line.strip()))
finder=nltk.collocations.TrigramCollocationFinder .from_words(sents)
bigram_measures=nltk.collocations.BigramAssocMeasures()
resu=sorted(finder.ngram_fd.items(), key=lambda t: (-t[1], t[0]))[:100]
tuples = []
for x in resu:
for y in resu:
if x[0][0]==y[0][0] and x[0][2]==y[0][2] and x[0][1]!=y[0][1] :
if [y[0][1],x[0][1]] in tuples:
break
tuples.append([x[0][1],y[0][1]])
print(x[0][1]+"=="+y[0][1])
tuples
python 三元组找上下位相同的词
最新推荐文章于 2024-07-05 03:34:42 发布