一、Word2Vec词向量
1.分词
①经典工具
②方法比较
2.训练
①使用Word2Vec
pip install word2vec
import word2vec
word2vec.word2vec('corpusSegDone.txt', 'corpusWord2Vec.bin', size=300, verbose=True)
②使用gensim
pip install gensim
import logging
import multiprocessing
import os.path
import sys
import jieba
from gensim.models import Word2Vec
from gensim.models.word2vec import PathLineSentences
if __name__ == '__main__':
# 日志信息输出
program = os.path.basename(sys.argv[0])
logger = logging.getLogger(program)
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
logging.root.setLevel(level=logging.INFO)
logger.info("running %s" % ' '.join(sys.argv))
# check and process input arguments
#