import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
public class EditorDistance {
private static Properties properties;
private static Set<String> objectNames;
public static Map<String,String> newMatchLib = new HashMap<>();
public int minDistance(String word1, String word2) {
word1 = word1.toLowerCase();
word2 = word2.toLowerCase();
int m=word1.length();
int n=word2.length();
int[][] mem = new int[m][n];
for(int[] arr: mem){
Arrays.fill(arr, -1);
}
return calDistance(word1, word2, mem, m-1, n-1);
}
private int calDistance(String word1, String word2, int[][] mem, int i, int j){
if(i<0){
return j+1;
}else if(j<0){
return i+1;
}
if(mem[i][j]!=-1){
return mem[i][j];
}
if(word1.charAt(i)==word2.charAt(j)){
mem[i][j]=calDistance(word1, word2, mem, i-1, j-1);
}else{
int prevMin = Math.min(calDistance(word1, word2, mem, i, j-1), calDistance(word1, word2, mem, i-1, j));
prevMin = Math.min(prevMin, calDistance(word1, word2, mem, i-1, j-1));
mem[i][j]=1+prevMin;
}
return mem[i][j];
}
public static float Levenshtein(String str1, String str2)
{
str1 = str1.toLowerCase();
str2 = str2.toLowerCase();
char[] char1 = str1.toCharArray();
char[] char2 = str2.toCharArray();
int len1 = char1.length;
int len2 = char2.length;
int[][] dif = new int[len1 + 1][len2 + 1];
for (int a = 0; a <= len1; a++)
{
dif[a][0] = a;
}
for (int a = 0; a <= len2; a++)
{
dif[0][a] = a;
}
int temp;
for (int i = 1; i <= len1; i++)
{
for (int j = 1; j <= len2; j++)
{
if (char1[i - 1] == char2[j - 1])
{
temp = 0;
}
else
{
temp = 1;
}
int[] nums = {dif[i - 1][j - 1] + temp,dif[i][j - 1] + 1,dif[i - 1][j] + 1};
dif[i][j] = Min(nums);
}
}
float similarity = 1 - (float)dif[len1][len2] / Math.max(len1, len2);
return similarity;
}
private static int Min(int[] nums){
int min = Integer.MAX_VALUE;
for(int item : nums){
if (min > item)
min = item;
}
return min;
}
//if type is String/Id
private static String getObjectNameByParametersName(String fieldName,Set<String> sObjectNames){
String objectName = StringUtils.EMPTY;
//check if in the map
if(properties==null){
properties = new Properties();
}
if(properties.containsKey(fieldName)){
return properties.get(fieldName).toString();
}
Map<Float, String> map = new HashMap<>();
// Integer min = Integer.MAX_VALUE;
Float maxVal = Float.MIN_VALUE;
for(String sObjectName:sObjectNames){
Float levenshtein = EditorDistance.Levenshtein(fieldName, sObjectName);
// Integer minDistance = obj.minDistance(fieldName, sObjectName);
map.put(levenshtein, sObjectName);
if(maxVal<levenshtein){
maxVal = levenshtein;
}
}
if(map.containsKey(maxVal)){
objectName = map.get(maxVal);
// save the info: fieldName=objectName
newMatchLib.put(fieldName, objectName);
}
return objectName;
}
private static void getCurrentMatchLib(){
if(properties==null){
try{
properties = UnitTestBotUtils.loadPropertiesFromFile("matchlib.txt");
}catch(Exception e){
e.printStackTrace();
}
}
}
private static void getSObjectLib(){
if(objectNames==null){
objectNames = UnitTestBotUtils.getSObjects();
}
}
public static void saveMatchLib(){
try{
Set<String> keys = newMatchLib.keySet();
if(keys.size()==0){
return;
}
Properties properties = UnitTestBotUtils.loadPropertiesFromFile("matchlib.txt");
for(String key:keys){
properties.put(key, newMatchLib.get(key));
}
UnitTestBotUtils.writePropertiesToFile(properties,"matchlib.txt");
}catch(Exception e){
e.printStackTrace();
ErrorModel.insertErrorMsg(e.getMessage());
}
}
public static String getMatchObject(String field){
getCurrentMatchLib();
getSObjectLib();
String name = getObjectNameByParametersName(field,objectNames);
return name;
}
public static void main(String[] args) throws IOException {
//
EditorDistance obj = new EditorDistance();
String str = "Account";
String str1 = "MyAcc";
// Signal:
// accId
// accountId
//
// Multi:
// accIds
// accountIds
// accIdList
// accountIdList
// accIdSet
// accountIdSet
System.out.println("String: myAccId-->"+str);
System.out.println("Min Distance: "+obj.minDistance("myAccId", str));
System.out.println("Levenshtein: "+EditorDistance.Levenshtein("myAccId", str));
System.out.println("======================");
System.out.println("String: myAccId-->"+"Macro");
System.out.println("Min Distance: "+obj.minDistance("myAccId", "Macro"));
System.out.println("Levenshtein: "+EditorDistance.Levenshtein("myAccId", "Macro"));
System.out.println("======================");
System.out.println("String: acc-->"+str1);
System.out.println("Min Distance: "+obj.minDistance("acc", str1));
System.out.println("Levenshtein: "+EditorDistance.Levenshtein("acc", str1));
System.out.println("======================");
System.out.println("String: accountId-->"+str);
System.out.println("Min Distance: "+obj.minDistance("accountId", str));
System.out.println("Levenshtein: "+EditorDistance.Levenshtein("accountId", str));
System.out.println("======================");
System.out.println("String: accIds-->"+str);
System.out.println("Min Distance: "+obj.minDistance("accIds", str));
System.out.println("Levenshtein: "+EditorDistance.Levenshtein("accIds", str));
System.out.println("======================");
System.out.println("String: accountIds-->"+str);
System.out.println("Min Distance: "+obj.minDistance("accountIds", str));
System.out.println("Levenshtein: "+EditorDistance.Levenshtein("accountIds", str));
System.out.println("======================");
System.out.println("Test: "+EditorDistance.getMatchObject("accountId"));
System.out.println("Test: "+EditorDistance.getMatchObject("accId"));
System.out.println("Test: "+EditorDistance.getMatchObject("accIds"));
System.out.println("Test: "+EditorDistance.getMatchObject("myAccId"));
}
}
print:
String: myAccId-->Account
Min Distance: 6
Levenshtein: 0.14285713
======================
String: myAccId-->Macro
Min Distance: 4
Levenshtein: 0.4285714
======================
String: acc-->MyAcc
Min Distance: 2
Levenshtein: 0.6
======================
String: accountId-->Account
Min Distance: 2
Levenshtein: 0.7777778
======================
String: accIds-->Account
Min Distance: 4
Levenshtein: 0.4285714
======================
String: accountIds-->Account
Min Distance: 3
Levenshtein: 0.7
======================
此实例中主要是用的相似度匹配,对于最小编辑距离的算法并未使用。算法引用于外部。