使用gensim word2vec训练英文预料—最简洁版
直接上代码:
# -*- coding: utf-8 -*-
import gensim.models.word2vec as word2vec
import gensim
import logging
# word2vec Text8 的训练
def train_save_model():
# logging.basicConfig(format='%(asctime)s : %(levelname)s :%(message)',level=logging.INFO)
# 加载预料
sentences = word2vec.Text8Corpus('text8')
model = word2vec.Word2Vec(sentences, size=200)
model.save('text.model')
# 加载模型
def load_model():
model = word2vec.Word2Vec.load('text.model')
# simi = model.similar_by_vector('women', 'men')
# print(simi)
print(model.most_similar('man'))
print(model['red'])
# 执行代码
load_model()
text8的下载地址:
http://cs.fit.edu/%7Emmahoney/compression/enwik8.zip