代码如下
/** * 从段落中查找与目标关键词最相似的关键词,并返回匹配到的关键词信息,可设置level * @param strSource 原字符串 * @param strCompared 比较字符串(标准字符串,被除数) * @param level 评分阀值 * @param matchRangeIndex 比较字符串比原字符串字符位置多多少个限制 * @return */ public static SimilarInfo getSimilarDetailsInfo(String strSource, String strCompared, int level, int matchRangeIndex){ SimilarInfo similarInfo=new SimilarInfo(); similarInfo.setStrCompared(strCompared); similarInfo.setStrSource(strSource); HashMap<Integer,List<MatchInfo>> mapMatches=new HashMap<>(); int count=strSource.length(); int targetCount=strCompared.length(); int maxSameCount=0; //遍历count次 for(int i=0;i<count;i++){ String strMatches=""; int nowSameCount=0; int c=0; int lastIndex=0;//记录上一次匹配的目标索引 int lastSrcIndex=0;//记录上一次匹配的源字符串索引 int firstTargetIndex=0; int firstSrcIndex=0; int targetStartIndex=0; int targetEndIndex=0; int sourceStartIndex=0; int sourceEndIndex=0; Boolean isFirst=true; //遍历每一次的原字符串所有字段 for(int j=i;j<strSource.length();j++){ if(nowSameCount>0){ if(j>lastSrcIndex+1+matchRangeIndex){ break; } } char charSource=strSource.charAt(j); for(;c<strCompared.length();c++){ char charCompare=strCompared.charAt(c); if(charSource==charCompare){ if(isFirst==true){ isFirst=false; firstTargetIndex=c; firstSrcIndex=j; //记录第一个匹配的索引 targetStartIndex=c; sourceStartIndex=j; } //记录最后一个匹配的索引 targetEndIndex=c; sourceEndIndex=j; lastSrcIndex=j; strMatches+=charCompare; nowSameCount++; lastIndex=++c;//如果匹配,手动加1 break; } } c=lastIndex;//遍历完目标字符串,记录当前匹配索引 } if(nowSameCount>maxSameCount){ maxSameCount=nowSameCount; } if(!isEmpty(strMatches)){ List<MatchInfo> list; if(mapMatches.containsKey(nowSameCount)){ list=mapMatches.get(nowSameCount); } else{ list=new ArrayList<>(); } MatchInfo matchInfo=new MatchInfo(); matchInfo.setMatchStr(strMatches); matchInfo.setTargetStartIndex(targetStartIndex); matchInfo.setTargetEndIndex(targetEndIndex); matchInfo.setSourceStartIndex(sourceStartIndex); matchInfo.setSourceEndIndex(sourceEndIndex); Boolean isHas=false; for(MatchInfo item : list){ if(item.getMatchStr().equals(matchInfo.getMatchStr()) &&item.getTargetStartIndex().equals( matchInfo.getTargetStartIndex()) &&item.getTargetEndIndex().equals(matchInfo.getTargetEndIndex()) &&item.getSourceStartIndex().equals(matchInfo.getSourceStartIndex()) &&item.getSourceEndIndex().equals(matchInfo.getSourceEndIndex())){ isHas=true; } } if(!isHas) { list.add(matchInfo); mapMatches.put(nowSameCount, list); } } } if(mapMatches.containsKey(maxSameCount)){ List<MatchInfo> matchInfoList=mapMatches.get(maxSameCount); for(MatchInfo item :matchInfoList){ int srcStartIndex=0; int srcEndIndex=0; if(item.getTargetStartIndex()==0){ srcStartIndex=item.getSourceStartIndex(); } else{ srcStartIndex=item.getSourceStartIndex()-item.getTargetStartIndex(); } if(item.getTargetEndIndex()==targetCount-1){ srcEndIndex=item.getSourceEndIndex(); } else{ srcEndIndex=item.getSourceEndIndex()+(targetCount-1-item.getTargetEndIndex()); } srcStartIndex=srcStartIndex<0?0:srcStartIndex; String sourceStr=strSource.substring(srcStartIndex,srcEndIndex+1); item.setSourceStr(sourceStr); } similarInfo.setMatchInfo(mapMatches.get(maxSameCount)); } double dLv= (double)100*maxSameCount/targetCount; int realLevel=(int) Math.rint(dLv); Boolean isSimilar; if(realLevel>=level){ isSimilar= true; }else{ isSimilar= false; } similarInfo.setRealScore(realLevel); similarInfo.setIsSimilar(isSimilar); return similarInfo; }
效果图如下: