java unicode32_Java UnicodeUtil.UTF8toUTF32方法代码示例

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类

/**

*

The termCompare method in FuzzyTermEnum uses Levenshtein distance to

* calculate the distance between the given term and the comparing term.

*

*

If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.

* Otherwise, this method uses the following logic to calculate similarity.

*

 
 

* similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));

*

* where distance is the Levenshtein distance for the two words.

*

*

*/

@Override

protected final AcceptStatus accept(BytesRef term) {

if (StringHelper.startsWith(term, prefixBytesRef)) {

UnicodeUtil.UTF8toUTF32(term, utf32);

final int distance = calcDistance(utf32.ints, realPrefixLength, utf32.length - realPrefixLength);

//Integer.MIN_VALUE is the sentinel that Levenshtein stopped early

if (distance == Integer.MIN_VALUE){

return AcceptStatus.NO;

}

//no need to calc similarity, if raw is true and distance > maxEdits

if (raw == true && distance > maxEdits){

return AcceptStatus.NO;

}

final float similarity = calcSimilarity(distance, (utf32.length - realPrefixLength), text.length);

//if raw is true, then distance must also be <= maxEdits by now

//given the previous if statement

if (raw == true ||

(raw == false && similarity > minSimilarity)) {

boostAtt.setBoost((similarity - minSimilarity) * scale_factor);

return AcceptStatus.YES;

} else {

return AcceptStatus.NO;

}

} else {

return AcceptStatus.END;

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值