Lucene学习二:封装操作接口,实现删除、更新索引

索引文件可以理解为一个数据库,既然是数据库那么就因该包含增删改查,在此我们先定义两个接口IndexInterfac和QueryInterface分别表示索引相关的操作和查询相关的操作

IndexInterface代码如下:

import java.io.IOException;

import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.Query;

public interface IndexInterface {
  void create(Long id, String title) throws IOException;
  
  void updateOne(Long id, String title) throws IOException;
  
  /**
   * delete all index from lucene
   * @throws IOException
   */
  void delete() throws IOException;
  
  /**
   * This way will delete index which hit key in field
   * @param field
   * @param key
   * @throws IOException
   * @throws ParseException 
   */
  void delete(String field, String key) throws IOException, ParseException;
  
  void delete(Query[] querys) throws IOException;
  
  void delete(Term term) throws IOException;
  
  void delete(Term[] terms) throws IOException;
}

创建索引的时候可以分两种情况:丢弃现有索引重新创建索引、在现有索引上增加新索引,增加会引起重复,之后我会介绍如何防止重复添加

QueryInterface代码:

import java.io.IOException;

import org.apache.lucene.queryparser.classic.ParseException;

public interface QueryInterface {
  Object search(String key, String field) throws IOException, ParseException; // 返回值需要自己封装
}



索引实现类:

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.hao.search.service.IndexInterface;

public class IndexService implements IndexInterface {

  private Analyzer analyzer = new IKAnalyzer();
  
  public IndexService(String path) {
    this.path = path;
  }
  
  private String path; // 索引文件目录
  
  public void create(Long id, String title) throws IOException {
    Directory dir = FSDirectory.open(new File(path));
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, writerConfig);
    
    Document document = new Document();
    document.add(new StringField("id", id.toString(), Field.Store.YES)); // 业务记录id,唯一,例如商品编号
    document.add(new TextField("title", title, Field.Store.YES)); // 待分词搜索字段
    
    indexWriter.addDocument(document); 
    indexWriter.close();

  }
  // 
  public void append(Long id, String title) throws IOException {
    Directory dir = FSDirectory.open(new File(path));
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, writerConfig);
    
    Document document = new Document();
    document.add(new StringField("id", id.toString(), Field.Store.YES));
    document.add(new TextField("title", title, Field.Store.YES));
    
    indexWriter.addDocument(document);
    indexWriter.close();

  }
  
  public void delete() throws IOException {
    Directory dir = FSDirectory.open(new File(path)); 
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, writerConfig);
    indexWriter.deleteAll(); // 删除全部索引
    indexWriter.close();
  }

  public void delete(Long id) throws IOException {
    delete(new Term("id", id.toString())); // 这里是按照id进行删除
  }
  
  public void delete(String field, String key) throws IOException, ParseException {
    Directory dir = FSDirectory.open(new File(path)); 
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, writerConfig);
    
    QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);  
    Query query = parser.parse(key);
    
    indexWriter.deleteDocuments(query);
    indexWriter.close();
    
  }

  public void delete(Query[] querys) throws IOException {
    // TODO Auto-generated method stub
    
  }

  public void delete(Term term) throws IOException {
    Directory dir = FSDirectory.open(new File(path)); 
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, writerConfig);
    indexWriter.deleteDocuments(term);
    indexWriter.close();
    
  }

  public void delete(Term[] terms) throws IOException {
    // TODO Auto-generated method stub
    
  }

  public void updateOne(Long id, String title) throws IOException {
    Directory dir = FSDirectory.open(new File(path));
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    IndexWriter indexWriter = new IndexWriter(dir, writerConfig);
    
    Document document = new Document();
    document.add(new StringField("id", id.toString(), Field.Store.YES));
    document.add(new TextField("title", title, Field.Store.YES));
    
    indexWriter.updateDocument(new Term("id", id.toString()), document);// 按照id进行更新,若不存在就会新创建,建议使用此方法追加索引
    indexWriter.close();

    
  }

}

StringField 与 TextField都是Field的子类,两者都会被建立索引,但StringField不会被分词,例如商品系统中图片地址,商品编号等

查询实现类:

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.hao.search.service.QueryInterface;

public class QueryService implements QueryInterface{

  Analyzer analyzer = new IKAnalyzer();
  
  public QueryService(String path) {
    this.path = path;
  }
  
  private String path = "d:/test/lucene";
  public Object search(String key, String field) throws IOException, ParseException {
    
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("d:/test/lucene"))));  
    QueryParser queryParser = new QueryParser(Version.LUCENE_47, field, analyzer);
    queryParser.setDefaultOperator(QueryParser.Operator.AND); // 设置分词结果集之间的关系,这里设置为全包含
    Query query = queryParser.parse(key);
    QueryScorer scorer = new QueryScorer(query);  
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);// 得到得分的片段,就是得到一段包含所查询的关键字的摘要
    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
    "<b><font color='red'>", "</font></b>");// 对查询的数据格式化;无参构造器的默认是将关键字加粗
    Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);// 根据得分和格式化
    highlighter.setTextFragmenter(fragmenter);// 设置成高亮
    
    ScoreDoc after = null;
    TopDocs topDocs = searcher.searchAfter(after, query, 10);
    ScoreDoc[] pageDocs = topDocs.scoreDocs;
    for (ScoreDoc doc : pageDocs) {
      Document d = searcher.doc(doc.doc); 
      try {
        String str = highlighter.getBestFragment(analyzer, field, d.get(field)) ; // 对命中关键词加高亮,此处可以封装成对象列表返回
        System.out.println(str);
      } catch (InvalidTokenOffsetsException e) {
        e.printStackTrace();
      }
      after = pageDocs[topDocs.scoreDocs.length - 1]; 
      System.out.println("id: "+ d.get(field) +"内容:"+d.get("title"));
    }
    
    return null;
  }

}
测试类

import java.io.IOException;

import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;

import com.hao.search.service.IndexInterface;
import com.hao.search.service.QueryInterface;
import com.hao.search.serviceImpl.IndexService;
import com.hao.search.serviceImpl.QueryService;

/**
 * Hello world!
 *
 */
public class App {
    static String path = "d:/test/lucene";
    public static void main( String[] args ) {
//      delete();
//      create(); // 先创建索引,在执行查询
      query("入门", "title");
//      delete(3l);
    }
    
    public static void delete(Long id) {
      IndexInterface index = new IndexService(path);
      try {
        index.delete(new Term("id", id.toString()));
        System.out.println("delete success");
      } catch (IOException e1) {
        e1.printStackTrace();
        System.out.println("delete errro");
      }
      
    }
    
    public static void delete() {
      IndexInterface index = new IndexService(path);
      try {
        index.delete();
        System.out.println("delete success");
      } catch (IOException e1) {
        e1.printStackTrace();
        System.out.println("delete errro");
      }
      
    }
    
    public static Object query(String id, String field) {
      QueryInterface query = new QueryService(path);
      try {
        query.search(id, field);
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (ParseException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      return null;
    }
    
    public static void create() {
      IndexInterface index = new IndexService(path);
      try {
        index.updateOne(1l, "字符串和三元运算符字符串相加优先级的问题");
        index.updateOne(2l, "eclipse下maven更新的意外bug总结");
        index.updateOne(3l, "如何使用搜索技巧来成为一名高效的程序员");
        index.updateOne(4l, "入门级Demo,创建索引和查询高亮显示");
        index.updateOne(5l, "批量爬QQ用户信息,利用QQ查询功能进行获取QQ用户信息");
        index.updateOne(6l, "Python爬虫开发(三):数据存储以及多线程");
        index.updateOne(7l, "Python爬虫开发(一):零基础入门");
        index.updateOne(8l, "Python爬虫开发(二):整站爬虫与Web挖掘");
        index.updateOne(9l, "Python爬虫开发(三-续):快速线程池爬虫");
        index.updateOne(10l, "Python爬虫开发(五):反爬虫措施以及爬虫编写注意事项");
        index.updateOne(11l, "Python爬虫开发(四):动态加载页面的解决方案与爬虫代理");
        index.updateOne(12l, "挖洞经验 | 价值1万美金的谷歌内部主机信息泄露漏洞");
        index.updateOne(13l, "如何确认Google用户的具体电子邮件地址(已提交Google漏洞奖励计划)");
        index.updateOne(14l, "欧洲国家电网的噩梦:攻击太阳能板就能导致大规模停电?");
        index.updateOne(15l, "Java反序列化危机已过,这次来的是.Net反序列化漏洞");
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
}
输出结果:

<b><font color='red'>入门</font></b>级Demo,创建索引和查询高亮显示
id: 入门级Demo,创建索引和查询高亮显示内容:入门级Demo,创建索引和查询高亮显示
Python爬虫开发(一):零基础<b><font color='red'>入门</font></b>
id: Python爬虫开发(一):零基础入门内容:Python爬虫开发(一):零基础入门




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值