1.为什么要重写
因为在D jango上莫名其妙的不能import gensim。
从而不能from gensim.models import Word2Vec。
不能load_model.word2vec_model.wmdistance(sentence1,sentence2)。
因此根据原码更改了引入gensim包的部分内容。
2.改写后的代码
import pyemd
from gensim.corpora.dictionary import Dictionary
from numpy import exp, log, dot, zeros, outer, random, dtype, float32 as REAL,\
double, uint32, seterr, array, uint8, vstack, fromstring, sqrt, newaxis,\
ndarray, empty, sum as np_sum, prod, ones, ascontiguousarray
from keras.models import load_model
import cPickle as pickle
import logging
logger = logging.getLogger(__name__)
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def sentence_distance(document1, document2):
len_pre_oov1 = len(document1)
len_pre_oo