Doc2Vec,Word2Vec文本相似度计算。python3
接上篇 (https://blog.csdn.net/qq_33805714/article/details/109247757):
import jieba
all_list = jieba.cut(xl['工作内容'][0:6],cut_all=True)
print(all_list)
every_one = xl['工作内容'].apply(lambda x:jieba.cut(x))
import traceback
def filtered_punctuations(token_list):
try:
punctuations = [' ', '\n', '\t', ',', '.', ':', ';', '?', '(', ')', '[', ']', '&', '!', '*', '@', '#', '$', '%',':',
'/','\xa0','。',';','、']
token_list_without_punctuations = [word for word in token_list