import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import dev.lzq.search.lucene4x.commons.Manager;
public class TestMoreLikeThisQuery
{
public static void main(String[] args) throws IOException
{
IndexReader reader = Manager.getIndexReader();
IndexSearcher searcher = Manager.getIndexSearcher();
int refDocId = 1;
Document refDoc = searcher.doc(refDocId);
System.out.println("关联文档: 【" + refDoc.get("Title") + "】" + refDoc.get("Introduction"));
MoreLikeThis mlt = new MoreLikeThis(Manager.getIndexReader());
mlt.setFieldNames(new String[]{"Search_Field_Title", "Introduction"});
// 默认值是2,建议自己做限制,否则可能查不出结果
mlt.setMinTermFreq(1);
// 默认值是5,建议自己做限制,否则可能查不出结果
mlt.setMinDocFreq(1);
mlt.setAnalyzer(Manager.getAnalyzer());
// query实际质上是BooleanQuery。
// MoreLikeThis的原理是根据你提供的参考Document 和 Fields, 利用Analyzer给该Document的Fileds做分词, 然后拼接BooleanQuery做Occur.SHOULD搜索
Query query = mlt.like(refDocId);
TopDocs tds = searcher.search(query, 10);
ScoreDoc[] sds = tds.scoreDocs;
for(ScoreDoc sd : sds)
{
Document doc = searcher.doc(sd.doc);
System.out.println("相似文档: 【" + doc.get("Title") + "】 " + doc.get("Introduction"));
}
}
}
其中,Manager类的代码我没贴出来, IndexReader, IndexSearcher, Analyzer其实就是普通的新建对象,没什么封装的东西。
转载于:https://blog.51cto.com/lizhuquan0769/1789848