python匹配两个字符串相似度
import jieba
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def matching_similarity(self, str1, str2):
seg1 = jieba.lcut(str1)
seg2 = jieba.lcut(str2)
seg_str1 = " ".join(seg1)
seg_str2 = " ".join(seg2)
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform([seg_str1, seg_str2])
similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
return similarity
if __name__ == '__main__':
str1 = "龙华区锦湖大厦C栋C403-3,民治街道新牛社区工业东路"
str2 = "龙华区民治街道新牛社区工业东路锦湖大厦C栋C403-3"
matching_similarity(str1, str2)