一、训练词向量
import jieba
import numpy as np
import pandas as pd
from gensim.models.word2vec import Word2Vec
import joblib
from sklearn.svm import SVC
pos = pd.read_csv('weather_pos.txt', encoding='UTF-8', header=None)
neg = pd.read_csv('weather_neg.txt', encoding='UTF-8', header=None)
pos['words'] = pos[0].apply(lambda x: jieba.lcut(x))
neg['words'] = neg[0].apply(lambda x: jieba.lcut(x))
x = np.concatenate((pos['words'], neg['words']))
y = np.concatenate((np.ones(len(pos)), np.zeros(len(neg))))
word2vec = Word2Vec(x, size=128, window=3, min_count=5, sg=1, hs=1, iter=10, workers=25)
word2vec.save('word2vec.model')
二、使用词向量
- 加载词向量
word2vec_model = Word2Vec.load('word2vec.model')
- 将所有词的词向量保存
word2vec_model .wv.save_word2vec_format('word2vec.txt', binary=False)
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/9c754b921c3f9c0d3b8f530a36c09ebe.png)
- 获取某个词的词向量
word2vec_model .wv['温度']
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/aad5a34fdb47ca7b8ef5ab9f904a02f2.png)
- 获取某个词的相似词
word2vec_model .most_similar('天气')
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/4869c3831c04dd82529b5f96733af4bb.png)