- package com.jadyer.lucene;
- import java.io.File;
- import java.io.IOException;
- import java.util.Random;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.NumericField;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TermQuery;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- import com.jadyer.custom.MyNameScoreQuery;
- /**
- * 【Lucene3.6.2入门系列】第08节_高级搜索之自定义评分
- * @create Aug 19, 2013 12:13:14 PM
- * @author 玄玉<http://blog.csdn.net/jadyer>
- */
- public class AdvancedSearchByScore {
- private Directory directory;
- private IndexReader reader;
- public AdvancedSearchByScore(){
- /**文件大小*/
- int[] sizes = {90, 10, 20, 10, 60, 50};
- /**文件名*/
- String[] names = {"Michael.java", "Scofield.ini", "Tbag.txt", "Jack", "Jade", "Jadyer"};
- /**文件内容*/
- String[] contents = {"my java blog is http://blog.csdn.net/jadyer",
- "my Java Website is http://www.jadyer.cn",
- "my name is jadyer",
- "I am a Java Developer",
- "I am from Haerbin",
- "I like java of Lucene"};
- IndexWriter writer = null;
- Document doc = null;
- try {
- directory = FSDirectory.open(new File("myExample/01_index/"));
- writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
- writer.deleteAll();
- for(int i=0; i<sizes.length; i++){
- doc = new Document();
- doc.add(new NumericField("size", Field.Store.YES, true).setIntValue(sizes[i]));
- doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
- doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
- //添加一个评分域,专门在自定义评分时使用
- //此时默认为Field.Store.NO和Field.Index.ANALYZED_NO_NORMS
- doc.add(new NumericField("fileScore").setIntValue(new Random().nextInt(600)));
- writer.addDocument(doc);
- }
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- if(null != writer){
- try {
- writer.close();
- } catch (IOException ce) {
- ce.printStackTrace();
- }
- }
- }
- }
- /**
- * 获取IndexReader实例
- */
- private IndexReader getIndexReader(){
- try {
- if(reader == null){
- reader = IndexReader.open(directory);
- }else{
- //if the index was changed since the provided reader was opened, open and return a new reader; else,return null
- //如果当前reader在打开期间index发生改变,则打开并返回一个新的IndexReader,否则返回null
- IndexReader ir = IndexReader.openIfChanged(reader);
- if(ir != null){
- reader.close(); //关闭原reader
- reader = ir; //赋予新reader
- }
- }
- return reader;
- }catch(Exception e) {
- e.printStackTrace();
- }
- return null; //发生异常则返回null
- }
- /**
- * 自定义评分搜索
- */
- public void searchByCustomScoreQuery(){
- IndexSearcher searcher = new IndexSearcher(this.getIndexReader());
- // //创建一个评分域
- // FieldScoreQuery fsq = new FieldScoreQuery("fileScore", FieldScoreQuery.Type.INT);
- // //创建自定义的CustomScoreQuery对象
- // Query query = new MyCustomScoreQuery(new TermQuery(new Term("content", "java")), fsq);
- Query query = new MyNameScoreQuery(new TermQuery(new Term("content", "java")));
- try {
- TopDocs tds = searcher.search(query, 10);
- for(ScoreDoc sd : tds.scoreDocs){
- Document doc = searcher.doc(sd.doc);
- System.out.print("文档编号=" + sd.doc + " 文档权值=" + doc.getBoost() + " 文档评分=" + sd.score + " ");
- System.out.println("size=" + doc.get("size") + " name=" + doc.get("name"));
- }
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- if(searcher != null){
- try {
- searcher.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- }
- /**
- * 测试一下评分效果
- */
- public static void main(String[] args) {
- new AdvancedSearchByScore().searchByCustomScoreQuery();
- }
- }
下面是我们自定义的评分类MyCustomScoreQuery.java
- package com.jadyer.custom;
- import java.io.IOException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.function.CustomScoreProvider;
- import org.apache.lucene.search.function.CustomScoreQuery;
- import org.apache.lucene.search.function.ValueSourceQuery;
- /**
- * 自定义评分的步骤
- * @see 1)创建一个类继承于CustomScoreQuery
- * @see 2)覆盖CustomScoreQuery.getCustomScoreProvider()方法
- * @see 3)创建一个类继承于CustomScoreProvider
- * @see 4)覆盖CustomScoreProvider.customScore()方法(我们的自定义评分主要就是在此方法中完成的)
- * @create Aug 6, 2013 10:30:46 AM
- * @author 玄玉<http://blog.csdn.net/jadyer>
- */
- public class MyCustomScoreQuery extends CustomScoreQuery {
- private static final long serialVersionUID = -2373017691291184609L;
- public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {
- //ValueSourceQuery参数就是指专门用来做评分的Query,即评分域的FieldScoreQuery
- super(subQuery, valSrcQuery);
- }
- @Override
- protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException {
- //如果直接返回super的,就表示使用原有的评分规则,即通过[原有的评分*传入的评分域所获取的评分]来确定最终评分
- //return super.getCustomScoreProvider(reader);
- return new MyCustomScoreProvider(reader);
- }
- private class MyCustomScoreProvider extends CustomScoreProvider {
- public MyCustomScoreProvider(IndexReader reader) {
- super(reader);
- }
- @Override
- public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
- //subQueryScore--表示默认文档的打分,valSrcScore--表示评分域的打分
- //该方法的返回值就是文档评分,即ScoreDoc.score获取的结果
- System.out.println("subQueryScore=" + subQueryScore + " valSrcScore=" + valSrcScore);
- return subQueryScore/valSrcScore;
- }
- }
- }
下面是自定义的采用特殊文件名作为评分标准的评分类MyNameScoreQuery.java
- package com.jadyer.custom;
- import java.io.IOException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.search.FieldCache;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.function.CustomScoreProvider;
- import org.apache.lucene.search.function.CustomScoreQuery;
- /**
- * 采用特殊文件名作为评分标准
- * @create Aug 6, 2013 2:17:13 PM
- * @author 玄玉<http://blog.csdn.net/jadyer>
- */
- public class MyNameScoreQuery extends CustomScoreQuery {
- private static final long serialVersionUID = -2813985445544972520L;
- public MyNameScoreQuery(Query subQuery) {
- //由于这里是打算根据文件名来自定义评分,所以重写构造方法时不必传入评分域的ValueSourceQuery
- super(subQuery);
- }
- @Override
- protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException {
- return new FilenameScoreProvider(reader);
- }
- private class FilenameScoreProvider extends CustomScoreProvider {
- String[] filenames;
- public FilenameScoreProvider(IndexReader reader) {
- super(reader);
- try {
- //在IndexReader没有关闭之前,所有的数据都会存储到一个预缓存中(缺点是占用大量内存)
- //所以我们可以通过预缓存获取name域的值(获取到的是name域所有值,故使用数组)
- this.filenames = FieldCache.DEFAULT.getStrings(reader, "name");
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- @Override
- public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
- //由于FilenameScoreQuery构造方法没有传入ValueSourceQuery,故此处ValueSourceQuery默认为1.0
- System.out.println("subQueryScore=" + subQueryScore + " valSrcScore=" + valSrcScore);
- if(filenames[doc].endsWith(".java") || filenames[doc].endsWith(".ini")){
- //只加大java文件和ini文件的评分
- return subQueryScore*1.5f;
- }else{
- return subQueryScore/1.5f;
- }
- }
- }
- }