分词–list of list格式
(df2的来源请看上一篇博客https://blog.csdn.net/dongzixian/article/details/103474094)
import jieba
df2['cut'] = df2[0].apply(jieba.lcut)
df2.head()
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(df2['cut'],df2['class'],test_size=0.3)
x_train