from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
def gen_embeddings(in_file, out_file, size=100):
corpus = LineSentence(in_file)
model = Word2Vec(
sentences=corpus, size=size, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1
)
model.wv.save_word2vec_format(out_file, binary=False)
gen_embeddings('seg.txt','model')
model=Word2Vec().wv.load_word2vec_format('LianCheng', binary=False)
y2 = model.most_similar("供热", topn=10)
for item in y2:
print (item[0], item[1])
model.wv["供热"]