import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
*
The termCompare method in FuzzyTermEnum uses Levenshtein distance to
* calculate the distance between the given term and the comparing term.
*
*
If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
* Otherwise, this method uses the following logic to calculate similarity.
*
* similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
*
* where distance is the Levenshtein distance for the two words.
*
*
*/
@Override
protected final AcceptStatus accept(BytesRef term) {
if (StringHelper.startsWith(term, prefixBytesRef)) {
UnicodeUtil.UTF8toUTF32(term, utf32);
final int distance = calcDistance(utf32.ints, realPrefixLength, utf32.length - realPrefixLength);
//Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
if (distance == Integer.MIN_VALUE){
return AcceptStatus.NO;
}
//no need to calc similarity, if raw is true and distance > maxEdits
if (raw == true && distance > maxEdits){
return AcceptStatus.NO;
}
final float similarity = calcSimilarity(distance, (utf32.length - realPrefixLength), text.length);
//if raw is true, then distance must also be <= maxEdits by now
//given the previous if statement
if (raw == true ||
(raw == false && similarity > minSimilarity)) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
} else {
return AcceptStatus.END;
}
}