有时候可能需要对比两个字符串的相似度是多少,网上搜了一下,没有找到abap版本的,于是自己写了一个。我将功能封装成了一个类方法
参数如下所示:
其中关联类型ZDRATE类型数据元素类型如下所示:
METHOD string_similarity_percent.
************************************************************************
* 作者:胡杰
* 创建日期:2021.03.09 10:22:24
* 开发编号:
* 程序描述:计算两个字符串的相似度
************************************************************************
* 版本号 日期 作者 备注
************************************************************************
* 001.
* 002.
************************************************************************
*参考资料介绍
* Levenshtein 距离,又称编辑距离,指的是两个字符串之间,由一个转换成另一个所需的最少编辑操作次数。
*
* 许可的编辑操作包括将一个字符替换成另一个字符,插入一个字符,删除一个字符。
*
* 编辑距离的算法是首先由俄国科学家Levenshtein提出的,故又叫Levenshtein Distance。
* 算法参考链接https://www.iteye.com/blog/wdhdmx-1343856
DATA(lv_len1) = strlen( iv_str1 )."计算字符串的长度
DATA(lv_len2) = strlen( iv_str2 ).
DATA:lv_max TYPE i.
TYPES:BEGIN OF ty_out,
str TYPE char2,
END OF ty_out.
DATA:lt_table1 TYPE TABLE OF ty_out,
ls_table1 TYPE ty_out,
lt_table2 TYPE TABLE OF ty_out,
ls_table2 TYPE ty_out.
"定义行和列所需要的内表
TYPES:BEGIN OF ty_leven,
str TYPE char2,
num TYPE i,
END OF ty_leven.
DATA:lt_row TYPE TABLE OF ty_leven,
ls_row TYPE ty_leven,
lt_col TYPE TABLE OF ty_leven,
ls_col TYPE ty_leven.
FIELD-SYMBOLS:<fs> TYPE ty_leven.
"定义内表存储行列的移动值
TYPES:BEGIN OF ty_steps,
row TYPE i, "行
col TYPE i, "列
value TYPE i, "需要移动的步数
END OF ty_steps.
DATA:lt_steps TYPE TABLE OF ty_steps,
ls_steps TYPE ty_steps.
FIELD-SYMBOLS:<steps> TYPE ty_steps.
"如果任何一个字符串的长度都是0的话,那么相似度为0
IF lv_len1 = 0 OR lv_len2 = 0.
ev_percent = 0.
RETURN.
ENDIF.
DATA:lv_string1 TYPE string,
lv_string2 TYPE string.
lv_string1 = iv_str1.
lv_string2 = iv_str2.
"先将字符串转换为内表
CALL FUNCTION 'CONVERT_STRING_TO_TABLE'
EXPORTING
i_string = lv_string1
i_tabline_length = 1
" i_unicode =
TABLES
et_table = lt_table1.
CALL FUNCTION 'CONVERT_STRING_TO_TABLE'
EXPORTING
i_string = lv_string2
i_tabline_length = 1
" i_unicode =
TABLES
et_table = lt_table2.
MOVE-CORRESPONDING lt_table1 TO lt_row.
MOVE-CORRESPONDING lt_table2 TO lt_col.
LOOP AT lt_row ASSIGNING <fs>.
<fs>-num = sy-tabix."行数
ENDLOOP.
LOOP AT lt_col ASSIGNING <fs>.
<fs>-num = sy-tabix."行数
ENDLOOP.
"注意:字符串的长度可能并不等于内表的行数,因此这里将长度修改一下
lv_len1 = lines( lt_col ).
lv_len2 = lines( lt_row ).
"删除最后可能是空的那一行
READ TABLE lt_col INTO ls_col INDEX lv_len1.
IF ls_col-str IS INITIAL.
DELETE lt_col INDEX lv_len1.
lv_len1 = lv_len1 - 1.
ENDIF.
READ TABLE lt_row INTO ls_row INDEX lv_len2.
IF ls_row-str IS INITIAL.
DELETE lt_row INDEX lv_len2.
lv_len2 = lv_len2 - 1.
ENDIF.
"计算最长的长度
IF lv_len1 > lv_len2.
lv_max = lv_len1.
ELSE.
lv_max = lv_len2.
ENDIF.
DATA:lv_left TYPE i, "左边
lv_up TYPE i, "上边
lv_lu TYPE i, "左上角
lv_value TYPE i. "步数
DATA:lv_min TYPE i."辅助变量,获取最小值
DO lv_len1 TIMES."循环列
READ TABLE lt_col INTO ls_col INDEX sy-index.
DO lv_len2 TIMES."循环行
READ TABLE lt_row INTO ls_row INDEX sy-index.
IF ls_col-num = 1.
IF ls_row-num = 1 AND ls_col-num = 1.
lv_up = ls_row-num + 1."上面的值加1
lv_left = ls_col-num + 1."左边的值加1
IF ls_row-str = ls_col-str.
lv_lu = 0.
ELSE.
lv_lu = 1.
ENDIF.
lv_value = lv_lu.
ls_steps-row = ls_row-num."行
ls_steps-col = ls_col-num."列
ls_steps-value = lv_value."步数
APPEND ls_steps TO lt_steps.
CLEAR ls_steps.
ELSE.
DATA:lv_row_tmp TYPE i, "临时的行
lv_col_tmp TYPE i. "临时的列
lv_row_tmp = ls_row-num - 1.
READ TABLE lt_steps INTO ls_steps WITH KEY row = lv_row_tmp
col = ls_col-num.
lv_up = ls_steps-value + 1."上面的值
lv_left = ls_row-num + 1."左边的值
"判断值是否相等来计算左上方的值
IF ls_row-str = ls_col-str.
lv_lu = ls_row-num - 1.
ELSE.
lv_lu = ls_row-num - 1 + 1."为了保持逻辑清晰,这里写出完整计算过程
ENDIF.
"计算出最小值
IF lv_up < lv_left.
lv_min = lv_up.
ELSE.
lv_min = lv_left.
ENDIF.
IF lv_min > lv_lu.
lv_min = lv_lu.
ENDIF.
ls_steps-row = ls_row-num."行
ls_steps-col = ls_col-num."列
ls_steps-value = lv_min.
APPEND ls_steps TO lt_steps.
CLEAR:ls_steps.
ENDIF.
ELSE.
IF ls_row-num = 1.
lv_up = ls_col-num + 1."上面的值
lv_col_tmp = ls_col-num - 1.
READ TABLE lt_steps INTO ls_steps WITH KEY row = ls_row-num
col = lv_col_tmp.
lv_left = ls_steps-value + 1."左边的值
IF ls_row-str = ls_col-str.
lv_lu = ls_col-num - 1.
ELSE.
lv_lu = ls_col-num - 1 + 1."左上的值 “为了保持逻辑清晰,这里写出完整计算逻辑
ENDIF.
"计算出最小值
IF lv_up < lv_left.
lv_min = lv_up.
ELSE.
lv_min = lv_left.
ENDIF.
IF lv_min > lv_lu.
lv_min = lv_lu.
ENDIF.
ls_steps-row = ls_row-num."行
ls_steps-col = ls_col-num."列
ls_steps-value = lv_min.
APPEND ls_steps TO lt_steps.
CLEAR:ls_steps.
ELSE.
lv_row_tmp = ls_row-num - 1.
READ TABLE lt_steps INTO ls_steps WITH KEY row = lv_row_tmp
col = ls_col-num.
lv_up = ls_steps-value + 1."上面的值
lv_col_tmp = ls_col-num - 1.
READ TABLE lt_steps INTO ls_steps WITH KEY row = ls_row-num
col = lv_col_tmp.
lv_left = ls_steps-value + 1."左边的值
READ TABLE lt_steps INTO ls_steps WITH KEY row = lv_row_tmp
col = lv_col_tmp.
IF ls_row-str = ls_col-str.
lv_lu = ls_steps-value.
ELSE.
lv_lu = ls_steps-value + 1.
ENDIF.
"计算出最小值
IF lv_up < lv_left.
lv_min = lv_up.
ELSE.
lv_min = lv_left.
ENDIF.
IF lv_min > lv_lu.
lv_min = lv_lu.
ENDIF.
ls_steps-row = ls_row-num."行
ls_steps-col = ls_col-num."列
ls_steps-value = lv_min.
APPEND ls_steps TO lt_steps.
CLEAR:ls_steps.
ENDIF.
ENDIF.
ENDDO.
ENDDO.
READ TABLE lt_steps INTO ls_steps WITH KEY row = lv_len2
col = lv_len1.
ev_percent = 1 - ( ls_steps-value / lv_max ).
ENDMETHOD.