#!/usr/bin/python3
import kashgari
from kashgari.embeddings import BERTEmbedding
import logging
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import jieba
import sys
chinese_bert_file='./data/chinese_L-12_H-768_A-12'
def cal_cosine():
bert = BERTEmbedding(chinese_bert_file,
task=kashgari.CLASSIFICATION,
sequence_length=10)
# call for single embed
sen1 = input('sentence1:')
sen2 = input('sentence2:')
seg_list1 = jieba.cut(sen1, cut_all=False)
seg_list2 = jieba.cut(sen2, cut_all=False)
seg_list1 = list(seg_list1)
seg_list2 = list(seg_list2)
embed_tensor1 = bert.embed_one(seg_list1)
embed_tensor2 = bert.embed_one(seg_list2)
embedding1 = np.zeros(shape=(1,3072))
embedding2 = np.zeros(shape=(1,3072))
for i in range(embed_tensor1.shape[0
Python计算文本余弦相似度
最新推荐文章于 2024-08-30 21:46:36 发布