Python实现抢注大词的提词工具
假设通过爬虫获得了一个自媒体.txt
想要从这些关键词中提取流量最大的关键词
可以通过如下算法实现:
from smoothnlp.algorithm.phrase import extract_phrase
import re
class_name = '自媒体'
class_name_low = class_name.lower()
top_k = 100
with open('%s.txt' % class_name,'r',encoding='utf-8') as file:
data_str = file.read()
keyword_list = data_str.split('\n')
with open('dont.txt','r',encoding='utf-8') as file:
dont_set = set(file.read().split('\n'))
word_count_dict = dict()
new_word_list = extract_phrase(keyword_list,top_k=top_k)
for new_word in new_word_list:
if new_word in dont_set:
continue
new_word_low = new_word.lower()
if class_name_low in new_word_low or class_name_low