clucene的highlighter

 clucene做高亮也有相应的highlighter包。

 

cppunit测试样例中有各种搜索的高亮,包括短语、模糊、范围等查询类型

 

 //testSimpleHighlighter();
 //testGetBestFragmentsSimpleQuery();
 //testGetFuzzyFragments();
 //testGetWildCardFragments();
 //testGetMidWildCardFragments();
 //testGetRangeFragments();
 //testGetBestFragmentsPhrase();
 //testGetBestFragmentsMultiTerm();
 //testGetBestFragmentsWithOr();

 

 

RAMDirectory* hl_ramDir = NULL;
StandardAnalyzer* hl_analyzer = NULL;


TCHAR* hl_FIELD_NAME = _T("contents");
Query* hl_originalquery = NULL;
Query* hl_query = NULL;
Query* hl_rewrittenquery = NULL;
IndexReader* hl_reader=NULL;
Searcher* hl_searcher=NULL;
Hits* hl_hits = NULL;

 

 

void setupHighlighter()
{

 hl_ramDir = _CLNEW RAMDirectory();

 hl_analyzer = _CLNEW StandardAnalyzer;

 IndexWriter writer(hl_ramDir, hl_analyzer, true);
 
 for (int i = 0; hl_texts[i] != NULL; i++)
 {
  Document d;
  d.add( *_CLNEW Field(hl_FIELD_NAME, hl_texts[i], true, true, true) );
  writer.addDocument(&d);
 }

 writer.optimize();
 writer.close();

 hl_reader = IndexReader::open(hl_ramDir);
}

 

void testSimpleHighlighter(){
 doSearching(_T("Kennedy") );
 QueryScorer scorer(hl_query);
 Highlighter highlighter(&scorer);
 SimpleFragmenter fragmenter(40);

 highlighter.setTextFragmenter(&fragmenter);
 int maxNumFragmentsRequired = 2;

 for (int i = 0; i < hl_hits->length(); i++)
 {
  const TCHAR* text = hl_hits->doc(i).get(hl_FIELD_NAME);
  StringReader reader( text );
  TokenStream* tokenStream=hl_analyzer->tokenStream(hl_FIELD_NAME, &reader);

  TCHAR* result = highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired, _T("...") );
  
  setlocale(LC_ALL, "chs");

  wprintf(L"%s/n",result);

  _CLDELETE_CARRAY(result);
  _CLDELETE(tokenStream);
 }
 //Not sure we can assert anything here - just running to check we dont throw any exceptions
}

 

void TestHighLighter()
{
 setupHighlighter();
 testSimpleHighlighter();

 //testGetBestFragmentsSimpleQuery();
 //testGetFuzzyFragments();
 //testGetWildCardFragments();
 //testGetMidWildCardFragments();
 //testGetRangeFragments();
 //testGetBestFragmentsPhrase();
 //testGetBestFragmentsMultiTerm();
 //testGetBestFragmentsWithOr();
}

索引的语句是

 

const TCHAR* hl_texts[6] =
{
 _T("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"),
 _T("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy"),
 _T("JFK has been shot"),
 _T("John Kennedy has been shot"),
 _T("This text has a typo in referring to Keneddy"),
 NULL
};

 

查询词为"Kinnedy"

 

简单的高亮结果如:

 

John <B>Kennedy</B> has been shot
 to <B>Kennedy</B>...This piece of text refers to <B>Kennedy</B> at
 <B>kennedy</B> has been shot

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值