Lucene Suggest的使用

参考

源码分析:

public class AnalyzingInfixSuggester extends Lookup implements Closeable {

  //建立索引的域名
  /** Field name used for the indexed text. */
  protected final static String TEXT_FIELD_NAME = "text";

  //查找关键词的域名
  /** Field name used for the indexed text, as a
   *  StringField, for exact lookup. */
  protected final static String EXACT_TEXT_FIELD_NAME = "exacttext";

  //用于过滤的域名
  /** Field name used for the indexed context, as a
   *  StringField and a SortedSetDVField, for filtering. */
  protected final static String CONTEXTS_FIELD_NAME = "contexts";

  /** Analyzer used at search time */
  protected final Analyzer queryAnalyzer;
  /** Analyzer used at index time */
  protected final Analyzer indexAnalyzer;
  final Version matchVersion;

  //最小前缀长度,防止数量太多,内存溢出
  private final Directory dir;
  final int minPrefixChars;

  //是否全部关键词都需要匹配上
  private final boolean allTermsRequired;
  private final boolean highlight;

  private final boolean commitOnBuild;

  /** Used for ongoing NRT additions/updates. */
  private IndexWriter writer;

  /** {@link IndexSearcher} used for lookups. */
  protected SearcherManager searcherMgr;

  ....
}

建立索引

  @Override
  public void build(InputIterator iter) throws IOException {

    if (searcherMgr != null) {
      searcherMgr.close();
      searcherMgr = null;
    }

    if (writer != null) {
      writer.close();
      writer = null;
    }

    boolean success = false;
    try {
      // First pass: build a temporary normal Lucene index,
      // just indexing the suggestions as they iterate:
      writer = new IndexWriter(dir,
                               getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
      //long t0 = System.nanoTime();

      // TODO: use threads?
      BytesRef text;
      while ((text = iter.next()) != null) {
        BytesRef payload;
        if (iter.hasPayloads()) {
          payload = iter.payload();
        } else {
          payload = null;
        }

        add(text, iter.contexts(), iter.weight(), payload);
      }

      //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
      if (commitOnBuild) {
        commit();
      }
      searcherMgr = new SearcherManager(writer, null);
      success = true;
    } finally {
      if (success == false && writer != null) {
        writer.rollback();
        writer = null;
      }
    }
  }

InputIterator

public interface InputIterator extends BytesRefIterator {

  //权重
  /** A term's weight, higher numbers mean better suggestions. */
  public long weight();

  //payload是用来存储一个额外信息,并以字节byte[]的形式写入索引中,当搜索返回后,你可以通过LookupResult结果对象的payload属性获取到该值
  /** An arbitrary byte[] to record per suggestion.  See
   *  {@link LookupResult#payload} to retrieve the payload
   *  for each suggestion. */
  public BytesRef payload();

  /** Returns true if the iterator has payloads */
  public boolean hasPayloads();

  /** 
   * A term's contexts context can be used to filter suggestions.
   * May return null, if suggest entries do not have any context
   * */
  public Set<BytesRef> contexts();

  /** Returns true if the iterator has contexts */
  public boolean hasContexts();

  ...
}
package com.whf.suggest;

import java.io.Serializable;

public class Fruit implements Serializable {

    //水果名字
    private String name=null;
    //水果销量
    private Integer num=null;
    //水果产地
    private String regions=null;

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public Integer getNum() {
        return num;
    }

    public void setNum(Integer num) {
        this.num = num;
    }

    public String getRegions() {
        return regions;
    }

    public void setRegions(String regions) {
        this.regions = regions;
    }

    public Fruit(String name, Integer num, String regions) {
        super();
        this.name = name;
        this.num = num;
        this.regions = regions;
    }

    public Fruit() {
        super();
    }

}

package com.whf.suggest;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRef;

public class FruitIterator implements InputIterator {

    private Iterator<Fruit> iter=null;
    private static final String CHARSET="UTF-8";
    private Fruit fruit=null;

    public FruitIterator(Iterator<Fruit> iter) {
        super();
        this.iter = iter;
    }

    @Override
    public BytesRef next() throws IOException {
        if(iter.hasNext()){
            fruit=iter.next();
            return new BytesRef(fruit.getName().getBytes(CHARSET));
        }else{
            return null;
        }
    }

    /**
     * 权重值
     * 影响排序
     */
    @Override
    public long weight() {
        return fruit.getNum();
    }

    /**
     * 将Fruit对象序列化存入payload
     */
    @Override
    public BytesRef payload() {
        try {
            ByteArrayOutputStream bo=new ByteArrayOutputStream();
            ObjectOutputStream os=new ObjectOutputStream(bo);
            os.writeObject(fruit);
            os.close();
            return new BytesRef(bo.toByteArray());
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    @Override
    public boolean hasPayloads() {
        return true;
    }

    /**
     * 将产地描述存入Context,一般用于数据过滤
     */
    @Override
    public Set<BytesRef> contexts() {
        try {
            Set<BytesRef> set = new HashSet<BytesRef>();
            set.add(new BytesRef(fruit.getRegions().getBytes(CHARSET)));
            return set;
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return null;
    }

    @Override
    public boolean hasContexts() {
        return true;
    }

}
    package com.whf.suggest;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;

/**
 * Lucene关键字提示测试
 * 
 * @author Lanxiaowei
 * 
 */
public class Main {

    //先以contexts为过滤条件进行过滤,再以name为关键字进行筛选,根据weight值排序返回前2条
    //第3个布尔值即是否每个Term都要匹配,第4个参数表示是否需要关键字高亮
    private static void lookup(AnalyzingInfixSuggester suggester, String name,
            String region) throws IOException {
        HashSet<BytesRef> contexts = new HashSet<BytesRef>();
        contexts.add(new BytesRef(region.getBytes("UTF-8")));
        List<LookupResult> results = suggester.lookup(name, contexts, 10, true, true);
//      List<LookupResult> results = suggester.lookup(name, 10, false, false);
        System.out.println("-- \"" + name + "\" (" + region + "):");
        for (LookupResult result : results) {
            System.out.println(result.key);
        }
        System.out.println();
    }

    public static void main(String[] args) {
        try {
            RAMDirectory indexDir = new RAMDirectory();
            StandardAnalyzer analyzer = new StandardAnalyzer();
            AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(indexDir, analyzer);

//          //创建Product测试数据
            List<Fruit> list=new ArrayList<Fruit>();
            list.add(new Fruit("apple", 10, "china"));
            list.add(new Fruit("watermelon", 12, "english"));
            list.add(new Fruit("pear", 13, "usa"));
            list.add(new Fruit("banana", 11, "china"));
            list.add(new Fruit("applely", 15, "china"));
            list.add(new Fruit("peel", 9, "china"));
            list.add(new Fruit("way", 8, "usa"));

            // 创建测试索引
            suggester.build(new FruitIterator(list.iterator()));

            // 开始搜索
//          lookup(suggester, "ap", "english");
//          lookup(suggester, "b", "china");
//          lookup(suggester, "w", "usa");
            lookup(suggester, "app", "china");
        } catch (IOException e) {
            System.err.println("Error!");
        }
    }
}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值