每个文本可以看作是由不同的主题构成了,各个主题可以当做这篇文本的特征。
# _*_coding:utf-8 _*_
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
import numpy as np
from gensim import corpora, models, similarities
from pprint import pprint
import time
import jieba
import os
from six import iteritems