w
o
r
d
2
v
e
c
简
介
及
词
向
量
构
建
word2vec简介及词向量构建
word2vec简介及词向量构建
#导入包import pandas as pd
import numpy as np
from gensim.models import Word2Vec
#读取数据集
train = pd.read_csv('./cnews/train.tsv',sep='\t',header=None,names=['label','content'])
val = pd.read_csv('./cnews/dev.tsv',sep='\t',header=None,names=['label','content'])
test = pd.read_csv('./cnews/test.tsv',sep='\t',header=None,names=['label','content'])
train.head()
jieba pkuseg
import jieba
defcontent_cut(x):
x = jieba.lcut(x)
x =" ".join(x)return x