分词–list of list格式
(df2的来源请看上一篇博客https://blog.csdn.net/dongzixian/article/details/103474094)
import jieba
df2['cut'] = df2[0].apply(jieba.lcut)
df2.head()
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(df2['cut'],df2['class'],test_size=0.3)
x_train
from gensim.models.word2vec import Word2Vec
model = Word2Vec(size=400,min_count=5)
model.build_vocab(x_train)
model.build_vocab(x_train)