matlab lsmatvec,Python matutils.unitvec方法代碼示例-CSDN博客

本文整理匯總了Python中gensim.matutils.unitvec方法的典型用法代碼示例。如果您正苦於以下問題：Python matutils.unitvec方法的具體用法？Python matutils.unitvec怎麽用？Python matutils.unitvec使用的例子？那麽恭喜您, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在模塊gensim.matutils的用法示例。

在下文中一共展示了matutils.unitvec方法的16個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於我們的係統推薦出更棒的Python代碼示例。

示例1: similarity_label

點讚 6

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def similarity_label(self, words, normalization=True):

"""

you can calculate more than one word at the same time.

"""

if self.model==None:

raise Exception('no model.')

if isinstance(words, string_types):

words=[words]

vectors=np.transpose(self.model.wv.__getitem__(words))

if normalization:

unit_vector=unitvec(vectors,ax=0) # 這樣寫比原來那樣速度提升一倍

#unit_vector=np.zeros((len(vectors),len(words)))

#for i in range(len(words)):

# unit_vector[:,i]=matutils.unitvec(vectors[:,i])

dists=np.dot(self.Label_vec_u, unit_vector)

else:

dists=np.dot(self.Label_vec, vectors)

return dists

開發者ID:Coldog2333，項目名稱:Financial-NLP，代碼行數:20，

示例2: __getitem__

點讚 6

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def __getitem__(self, bow, eps=1e-12):

"""

Return tf-idf representation of the input vector and/or corpus.

"""

# if the input vector is in fact a corpus, return a transformed corpus as a result

is_corpus, bow = utils.is_corpus(bow)

if is_corpus:

return self._apply(bow)

# unknown (new) terms will be given zero weight (NOT infinity/huge weight,

# as strict application of the IDF formula would dictate)

vector = [(termid, self.wlocal(tf) * self.idfs.get(termid))

for termid, tf in bow if self.idfs.get(termid, 0.0) != 0.0]

# and finally, normalize the vector either to unit length, or use a

# user-defined normalization function

if self.normalize is True:

vector = matutils.unitvec(vector)

elif self.normalize:

vector = self.normalize(vector)

# make sure there are no explicit zeroes in the vector (must be sparse)

vector = [(termid, weight) for termid, weight in vector if abs(weight) > eps]

return vector

#endclass TfidfModel

開發者ID:largelymfs，項目名稱:topical_word_embeddings，代碼行數:27，

示例3: calculate_text_similar

點讚 6

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def calculate_text_similar(vec_ques, matrix_org_norm, matrix_org_index, top_vec):

"""

最相似的句子，句向量與矩陣點乘

:param vec:

:param matrix:

:param keys:

:param topn:

:return:

"""

# 問句向量標準化, Scale a vector to unit length. The only exception is the zero vector, which is returned back unchanged.

vec_ques_mean = matutils.unitvec(np.array([vec_ques]).mean(axis=0)).astype(numpy_type)

# 矩陣點乘, 即問句與標準問句庫裏邊的問句點乘,

matrix_vec_dot = np.dot(matrix_org_norm, vec_ques_mean)

# 相似度排序

most_similar_sentence_vec_sort = matutils.argsort(matrix_vec_dot, topn=top_vec, reverse=True)

# 獲取最相似標準問句的index和得分score

index_score = []

for t in most_similar_sentence_vec_sort[:top_vec]:

index_score.append([matrix_org_index[t], float(matrix_vec_dot[t])])

return index_score

開發者ID:yongzhuo，項目名稱:nlp_xiaojiang，代碼行數:22，

示例4: shift_clip_pmi

點讚 6

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def shift_clip_pmi(pmimtr, k_shift=1.0):

"""

Turns a pmi matrix into a PPMI matrix by setting all negative values to 0 and then shifting by a factor of

-log(k).

:param pmimtr: The matrix of PMI values.

:param k_shift: The shift factor.

:return: A PPMI matrix.

"""

logger.info("shifting PMI scores by log(k) with k=%s" % (k_shift, ))

pmimtr -= np.log(k_shift) # shifted PMI = log(#(w, c) * D / (#w * #c)) - log(k)

logger.info("clipping PMI scores to be non-negative PPMI")

pmimtr.clip(0.0, out=pmimtr) # SPPMI = max(0, log(#(w, c) * D / (#w * #c)) - log(k))

logger.info("normalizing PPMI word vectors to unit length")

for i, vec in enumerate(pmimtr):

pmimtr[i] = matutils.unitvec(vec)

return matutils.corpus2csc(matutils.Dense2Corpus(pmimtr, documents_columns=False)).T

開發者ID:clips，項目名稱:dutchembeddings，代碼行數:23，

示例5: similarity_3_contexts

點讚 6

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def similarity_3_contexts(self, t, p):

(bef, bet, aft) = (0, 0, 0)

if t.bef_vector is not None and p.bef_vector is not None:

bef = dot(

matutils.unitvec(t.bef_vector), matutils.unitvec(p.bef_vector)

)

if t.bet_vector is not None and p.bet_vector is not None:

bet = dot(

matutils.unitvec(t.bet_vector), matutils.unitvec(p.bet_vector)

)

if t.aft_vector is not None and p.aft_vector is not None:

aft = dot(

matutils.unitvec(t.aft_vector), matutils.unitvec(p.aft_vector)

)

return self.config.alpha*bef + \

self.config.beta*bet + \

self.config.gamma*aft

開發者ID:davidsbatista，項目名稱:BREDS，代碼行數:23，

示例6: similarity_cluster

點讚 6

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def similarity_cluster(self, p1, p2):

count = 0

score = 0

if self.config.alpha == 0 and self.config.gamma == 0:

p1.merge_all_tuples_bet()

p2.merge_all_tuples_bet()

for v_bet1 in p1.bet_uniques_vectors:

for v_bet2 in p2.bet_uniques_vectors:

if v_bet1 is not None and v_bet2 is not None:

score += dot(

matutils.unitvec(asarray(v_bet1)),

matutils.unitvec(asarray(v_bet2))

)

count += 1

else:

for t1 in p1.tuples:

for t2 in p2.tuples:

score += self.similarity_3_contexts(t1, t2)

count += 1

return float(score) / float(count)

開發者ID:davidsbatista，項目名稱:BREDS，代碼行數:23，

示例7: similarity

點讚 6

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def similarity(self, d1: int, d2: int) -> float:

"""Compute cosine similarity between two sentences from the training set.

Parameters

----------

d1 : int

index of sentence

d2 : int

index of sentence

Returns

-------

float

The cosine similarity between the vectors of the two sentences.

"""

return dot(matutils.unitvec(self[d1]), matutils.unitvec(self[d2]))

開發者ID:oborchers，項目名稱:Fast_Sentence_Embeddings，代碼行數:19，

示例8: safe_renew_label_vec

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def safe_renew_label_vec(self):

"""

initialize word vectors of words in label_dict.

origin version(safe)

"""

self.Label_vec=np.empty((len(self.Label_dict),self.len_vector))

self.Label_vec_u=np.empty((len(self.Label_dict),self.len_vector))

for i in range(len(self.Label_index)):

try:

self.Label_vec[i,:]=self.model.wv.__getitem__(self.Label_index[i])

self.Label_vec_u[i,:]=matutils.unitvec(self.model.wv.__getitem__(self.Label_index[i]))

except:

self.Label_vec[i,:]=np.zeros((1,self.len_vector)) # debug期間先這樣處理吧

self.Label_vec_u[i,:]=np.zeros((1,self.len_vector))

開發者ID:Coldog2333，項目名稱:Financial-NLP，代碼行數:16，

示例9: renew_label_vec

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def renew_label_vec(self):

"""

initialize word vectors of words in label_dict.

fast version(unstable)

!Attention! : use it only when you make sure that all words in Label_index can calculate the word vector.

"""

self.Label_vec=self.model.wv.__getitem__(self.Label_index)

self.Label_vec_u=unitvec(self.Label_vec)

開發者ID:Coldog2333，項目名稱:Financial-NLP，代碼行數:10，

示例10: unitvec

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def unitvec(vector, ax=1):

v=vector*vector

if len(vector.shape)==1:

sqrtv=np.sqrt(np.sum(v))

elif len(vector.shape)==2:

sqrtv=np.sqrt([np.sum(v, axis=ax)])

else:

raise Exception('It\'s too large.')

if ax==1:

result=np.divide(vector,sqrtv.T)

elif ax==0:

result=np.divide(vector,sqrtv)

return result

開發者ID:Coldog2333，項目名稱:Financial-NLP，代碼行數:15，

示例11: most_similar

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def most_similar(self, positive=[], negative=[], topn=10):

if isinstance(positive, string_types) and not negative:

# allow calls like most_similar('dog'), as a shorthand for most_similar(['dog'])

positive = [positive]

# add weights for each word, if not already present; default to 1.0 for positive and -1.0 for negative words

positive = [(word, 1.0) if isinstance(word, string_types + (ndarray,))

else word for word in positive]

negative = [(word, -1.0) if isinstance(word, string_types + (ndarray,))

else word for word in negative]

# compute the weighted average of all words

all_words, mean = set(), []

for word, weight in positive + negative:

if isinstance(word, ndarray):

mean.append(weight * word)

elif word in self.vocab:

mean.append(weight * self.syn0norm[self.vocab[word].index])

all_words.add(self.vocab[word].index)

else:

raise KeyError("word '%s' not in vocabulary" % word)

if not mean:

raise ValueError("cannot compute similarity with no input")

mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL)

dists = dot(self.syn0norm, mean)

if not topn:

return dists

best = argsort(dists)[::-1][:topn + len(all_words)]

# ignore (don't return) words from the input

result = [(self.index2word[sim], float(dists[sim]), self.syn0[sim]) for sim in best if sim not in all_words]

return result[:topn]

開發者ID:loretoparisi，項目名稱:word2vec-twitter，代碼行數:36，

示例12: __getitem__

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def __getitem__(self, query):

"""Get similarities of document `query` to all documents in the corpus.

**or**

If `query` is a corpus (iterable of documents), return a matrix of similarities

of all query documents vs. all corpus document. Using this type of batch

query is more efficient than computing the similarities one document after

another.

"""

is_corpus, query = utils.is_corpus(query)

if self.normalize:

# self.normalize only works if the input is a plain gensim vector/corpus (as

# advertised in the doc). in fact, input can be a numpy or scipy.sparse matrix

# as well, but in that case assume tricks are happening and don't normalize

# anything (self.normalize has no effect).

if matutils.ismatrix(query):

import warnings

# warnings.warn("non-gensim input must already come normalized")

else:

if is_corpus:

query = [matutils.unitvec(v) for v in query]

else:

query = matutils.unitvec(query)

result = self.get_similarities(query)

if self.num_best is None:

return result

# if the input query was a corpus (=more documents), compute the top-n

# most similar for each document in turn

if matutils.ismatrix(result):

return [matutils.full2sparse_clipped(v, self.num_best) for v in result]

else:

# otherwise, return top-n of the single input document

return matutils.full2sparse_clipped(result, self.num_best)

開發者ID:largelymfs，項目名稱:topical_word_embeddings，代碼行數:38，

示例13: __getitem__

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def __getitem__(self, bow):

"""

Return log entropy representation of the input vector and/or corpus.

"""

# if the input vector is in fact a corpus, return a transformed corpus

is_corpus, bow = utils.is_corpus(bow)

if is_corpus:

return self._apply(bow)

# unknown (new) terms will be given zero weight (NOT infinity/huge)

vector = [(term_id, math.log(tf + 1) * self.entr.get(term_id))

for term_id, tf in bow if term_id in self.entr]

if self.normalize:

vector = matutils.unitvec(vector)

return vector

開發者ID:largelymfs，項目名稱:topical_word_embeddings，代碼行數:17，

示例14: add_documents

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def add_documents(self, corpus):

"""

Extend the index with new documents.

Internally, documents are buffered and then spilled to disk when there's

`self.shardsize` of them (or when a query is issued).

"""

min_ratio = 1.0 # 0.5 to only reopen shards that are <50% complete

if self.shards and len(self.shards[-1]) < min_ratio * self.shardsize:

# The last shard was incomplete (

self.reopen_shard()

for doc in corpus:

if isinstance(doc, numpy.ndarray):

doclen = len(doc)

elif scipy.sparse.issparse(doc):

doclen = doc.nnz

else:

doclen = len(doc)

if doclen < 0.3 * self.num_features:

doc = matutils.unitvec(matutils.corpus2csc([doc], self.num_features).T)

else:

doc = matutils.unitvec(matutils.sparse2full(doc, self.num_features))

self.fresh_docs.append(doc)

self.fresh_nnz += doclen

if len(self.fresh_docs) >= self.shardsize:

self.close_shard()

if len(self.fresh_docs) % 10000 == 0:

logger.info("PROGRESS: fresh_shard size=%i" % len(self.fresh_docs))

開發者ID:largelymfs，項目名稱:topical_word_embeddings，代碼行數:30，

示例15: __init__

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def __init__(self, corpus, num_best=None, dtype=numpy.float32, num_features=None, chunksize=256):

"""

`num_features` is the number of features in the corpus (will be determined

automatically by scanning the corpus if not specified). See `Similarity`

class for description of the other parameters.

"""

if num_features is None:

logger.warning("scanning corpus to determine the number of features (consider setting `num_features` explicitly)")

num_features = 1 + utils.get_max_id(corpus)

self.num_features = num_features

self.num_best = num_best

self.normalize = True

self.chunksize = chunksize

if corpus is not None:

if self.num_features <= 0:

raise ValueError("cannot index a corpus with zero features (you must specify either `num_features` or a non-empty corpus in the constructor)")

logger.info("creating matrix for %s documents and %i features" %

(len(corpus), num_features))

self.index = numpy.empty(shape=(len(corpus), num_features), dtype=dtype)

# iterate over corpus, populating the numpy index matrix with (normalized)

# document vectors

for docno, vector in enumerate(corpus):

if docno % 1000 == 0:

logger.debug("PROGRESS: at document #%i/%i" % (docno, len(corpus)))

# individual documents in fact may be in numpy.scipy.sparse format as well.

# it's not documented because other it's not fully supported throughout.

# the user better know what he's doing (no normalization, must

# explicitly supply num_features etc).

if isinstance(vector, numpy.ndarray):

pass

elif scipy.sparse.issparse(vector):

vector = vector.toarray().flatten()

else:

vector = matutils.unitvec(matutils.sparse2full(vector, num_features))

self.index[docno] = vector

開發者ID:largelymfs，項目名稱:topical_word_embeddings，代碼行數:40，

示例16: similarity_matrix

點讚 5

# 需要導入模塊: from gensim import matutils [as 別名]

# 或者: from gensim.matutils import unitvec [as 別名]

def similarity_matrix(query, doc, embeddings, OOV_dict):

vector_size = embeddings.vector_size

q_mat = np.zeros((len(query), vector_size))#, dtype=np.float32)

d_mat = np.zeros((vector_size, len(doc)))#, dtype=np.float32)

for i, word in enumerate(query):

q_mat[i, :] = unitvec(get_word_vec(word, embeddings, OOV_dict))

for j, word in enumerate(doc):

d_mat[:, j] = unitvec(get_word_vec(word, embeddings, OOV_dict))

similarity_matrix = np.dot(q_mat, d_mat)

#similarity_matrix = similarity_matrix.astype(np.float)

return similarity_matrix

開發者ID:ucasir，項目名稱:NPRF，代碼行數:16，

注：本文中的gensim.matutils.unitvec方法示例整理自Github/MSDocs等源碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。