# -*- coding: utf-8 -*- #!/usr/bin/python import sys def cal_score(first_str, second_str): len_vv = [[0]*50]*50 len1 = len(first_str) len2 = len(second_str) for i in range(1, len1+1): for j in range(1, len2+1): if first_str[i-1] == second_str[j-1]: len_vv[i][j] = 1 + len_vv[i-1][j-1] else: len_vv[i][j] = max(len_vv[i-1][j], len_vv[i][j-1]) #score = float((float(len_vv[len1][len2]))*2 / float(len1 + len2)) #length = float(float(len_vv[len1][len2])) return len_vv file_path = 'D:\share\lcs_input.data' file = open(file_path, 'r', encoding='UTF-8') for line in file: ss = line.strip().split('\t') first_str = ss[0].strip() second_str = ss[1].strip() result = cal_score(first_str, second_str) len1 = len(first_str) len2 = len(second_str) score = float((float(result[len1][len2])) * 2 / float(len1 + len2)) length = float(float(result[len1][len2])) print('\t'.join([first_str, second_str, str(length), '\t', str(score)])) # print('\t'.join([first_str, second_str, str(sim_score)]))
Similarity Computing based on LCS
最新推荐文章于 2020-12-10 14:44:00 发布