class BaseAny2VecModel()
class BaseWordEmbeddingsModel(BaseAny2VecModel)
def __init__():
super(BaseWordEmbeddingsModel, self).__init__()
self.build_vocab(sentences)
self.train()
def build_vocab():
total_words, corpus_count = self.vocabulary.scan_vocab()
report_values = self.vocabulary.prepare_vocab()
def train():
def similarity():
def most_similar():
class Word2Vec(BaseWordEmbeddingsModel)
def __init__():
#wv: 是类 ~gensim.models.keyedvectors.Word2VecKeyedVectors生产的对象,在word2vec是一个属性
为了在不同的训练算法(Word2Vec,Fastext,WordRank,VarEmbed)之间共享单词向量查询代码,gensim将单词向量的存储和查询分离为一个单独的类 KeyedVectors包含单词和对应向量的映射。可以通过它进行词向量的查询
self.wv = Word2VecKeyedVectors(size)
self.vocabulary = Word2VecVocab()
super(Word2Vec, self).init()
def train():
return super(Word2Vec, self).train()
#保存模型
def save():
#加载模型
def load():
class Word2VecVocab()
def init():
#该方法是从句子序列中构建单词表
def scan_vocab():
def sort_vocab():
def prepare_vocab():
#使用存储的词汇单词及其词频创建一个二进制哈夫曼树
def create_binary_tree():
#训练词向量的内部浅层神经网络
class Word2VecTrainables():
def init():
def prepare_weights():
def seeded_vector():
def reset_weights():
def update_weights():