https://radimrehurek.com/gensim/models/word2vec.html
接上篇 :
import jieba
all_list = jieba.cut(xl[‘工作内容‘][0:6],cut_all=True)
print(all_list)
every_one = xl[‘工作内容‘].apply(lambda x:jieba.cut(x))
import traceback
def filtered_punctuations(token_list):
try:
punctuations = [‘ ‘, ‘\n‘, ‘\t‘, ‘,‘, ‘.‘, ‘:‘, ‘;‘, ‘?‘, ‘(‘, ‘)‘, ‘[‘, ‘]‘, ‘&‘, ‘!‘, ‘*‘, ‘@‘, ‘#‘, ‘$‘, ‘%‘,‘:‘,
‘/‘,‘\xa0‘,‘。‘,‘;‘,‘、‘]
token_list_without_punctuations = [word for word in token_list
if word not in punctuations]
#print "[INFO]: filtered_punctuations is finished!"
return token_list_without_punctuations
except Exception as e:
print (traceback.print_exc())
from gensim.models import Doc2Vec,Word2Vec
import gensim
def list_crea(everyone):
list_word = [