lucene入门 hello word

package cn.itcast.lucene.helloword;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.junit.Test;

import cn.itcast.lucene.utils.FileAndDocumentUtils;

public class HelloWorld {
 // 索引文件
 // String filePath = "D:\\javacode\\lucenesDemo\\datasource\\IndexWriter addDocument's a javadoc .txt";

 String filePath = "D:\\javacode\\lucenesDemo\\datasource\\小笑话_总统的房间 Room .txt";
 // 索引库的位置
 String indexPath = "D:\\javacode\\lucenesDemo\\luceneIndex";
 // 默认的分词器 创建索引和查询都用同一个分词器
 // Analyzer analyzer = new StandardAnalyzer();
 Analyzer analyzer = new MMAnalyzer();

 @Test
 public void createIndex() throws CorruptIndexException, IOException {
  // 把文件转成lucene索引库里面的document对象 file ---> document
  Document doc = FileAndDocumentUtils.file2Document(new File(filePath));

  // IndexWriter对索引库进行增删改操作 查询不包括 要对文件进行操作 必须要有io流 索引都要对他进行关闭操作
  // analyzer 代表索引库对应的是那个分词器
  // 增加 true 表示索引库不存在 就创建 false 没有也不创建 但是到时候就会报错
  // MaxFieldLength.LIMITED 表示对一个文件进行索引 只对其前10000个词进行索引 每个字段中有多少个词进行索引
  IndexWriter indexWriter = new IndexWriter(indexPath, analyzer, true, MaxFieldLength.LIMITED);
  // 把文件添加到索引库中
  indexWriter.addDocument(doc);
  // indexWriter.deleteDocuments(query)
  // indexWriter.updateDocument(term, doc)
  // indexWriter.optimize();
  indexWriter.close();
 }

 @Test
 public void search() throws Exception {

  String searchString = "房间";
  // 把要搜索的文本解析为query对象 和hibernate里面的相似
  // 这样才能打印结果
  String[] fields = { "name", "content" };

  Map<String, Float> boosts = new HashMap<String, Float>();
  boosts.put("name", 3f);
  // boosts.put("content", 1.0f); 默认为1.0f
  // QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer,boosts);
  // Multi多重的 Parser解剖器
  QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
  Query query = queryParser.parse(searchString);
  Filter filter = null;
  // 对索引库进行查询操作IndexSearcher
  // indexPath 索引库的位置
  IndexSearcher indexSearcher = new IndexSearcher(indexPath);

  // -------------------------TermQuery查询------------------------------
  /*
   * IndexSearcher indexSearcher = new IndexSearcher(indexPath); Term term = new Term("name", "房间"); Query query = new
   * TermQuery(term); Filter filter = null; TopDocs topDocs = indexSearcher.search(query, filter, 10000); // 10000
   * 一次性在索引库中查询多少个结果 默认值是50
   */

  // -------------RangeQuery----------------------
  // IndexSearcher indexSearcher = new IndexSearcher(indexPath);
  // Term lowerTerm = new Term("size", NumberTools.longToString(0200));
  // Term upperTerm = new Term("size", NumberTools.longToString(1000));
  // Term lowerTerm = new Term("size", "050");
  // Term upperTerm = new Term("size", "500");
  // Query query = new RangeQuery(lowerTerm, upperTerm, false);//false 代表是否包含边界
  // Filter filter = null;
  // TopDocs topDocs = indexSearcher.search(query, filter, 10000);

  // ============WildcardQuery 通配符查询
  // IndexSearcher indexSearcher = new IndexSearcher(indexPath);
  // Term term = new Term("name", "roo?");
  // // Term term = new Term("name", "ro*"); // 前缀查询 PrefixQuery
  // // Term term = new Term("name", "*o*");
  // // Term term = new Term("name", "房*");
  // Query query = new WildcardQuery(term);
  // Filter filter = null;
  // TopDocs topDocs = indexSearcher.search(query, filter, 10000);

  // ==========短语查询 PhraseQuery

  // IndexSearcher indexSearcher = new IndexSearcher(indexPath);
  // PhraseQuery query = new PhraseQuery();
  // // phraseQuery.add(new Term("content", "绅士"), 1); // 1 代表绅士的位置
  // // phraseQuery.add(new Term("content", "饭店"), 4);
  // query.add(new Term("content", "绅士"));
  // query.add(new Term("content", "饭店"));
  // query.setSlop(2);// 设置指定词之间隔了几个词
  // Filter filter = null;
  // TopDocs topDocs = indexSearcher.search(query, filter, 10000);
  // IndexSearcher indexSearcher = new IndexSearcher(indexPath);
  // PhraseQuery query1 = new PhraseQuery();
  // query1.add(new Term("content", "绅士"));
  // query1.add(new Term("content", "饭店"));
  // query1.setSlop(2);
  //
  // // 条件2
  // Term lowerTerm = new Term("size", NumberTools.longToString(500));
  // Term upperTerm = new Term("size", NumberTools.longToString(1000));
  // Query query2 = new RangeQuery(lowerTerm, upperTerm, true);
  //
  // // 组合
  // BooleanQuery query = new BooleanQuery();
  // query.add(query1, Occur.MUST);
  // query.add(query2, Occur.SHOULD);
  // Filter filter = null;

  // 在搜索的时候按照指定的方式排序
  // 排序方式 Sort
  // Sort sort = new Sort();
  // sort.setSort(new SortField("size", true));
  // TopDocs topDocs = indexSearcher.search(query, filter, 10000, sort);

  TopDocs topDocs = indexSearcher.search(query, filter, 10000);

  // ================================准备高亮器
  Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
  Scorer scorer = new QueryScorer(query);
  Highlighter highlighter = new Highlighter(formatter, scorer);
  Fragmenter fragmenter = new SimpleFragmenter(50);// 摘要的前多少个字符
  highlighter.setTextFragmenter(fragmenter);

  // ===================================

  System.out.println("一工有的记录数是:" + topDocs.totalHits);
  for (ScoreDoc scoreDocs : topDocs.scoreDocs) {
   int docSn = scoreDocs.doc;// 文档内部编号
   Document doc = indexSearcher.doc(docSn);// 根据文档编号取出相应的文档
   // ====================把值设置为高亮
   String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
   if (hc == null) {
    // 如果没有找到 就找前50个字符
    // 疑问 如果没找到 我们可以找标题啊
    String content = doc.get("content");
    int index = Math.min(50, content.length());
    hc = content.substring(0, index);
   }
   doc.getField("content").setValue(hc);

   FileAndDocumentUtils.printDocumentInfo(doc);

  }

 }

 @Test
 public void delete() throws Exception {
  String searchString = "document";
  // 吧要搜索的文本解析为query对象 和hibernate里面的相似
  // 这样才能打印结果
  String[] fields = { "name", "content" };
  // Multi多重的 Parser解剖器
  QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
  Query query = queryParser.parse(searchString);
  IndexWriter indexWriter = new IndexWriter(indexPath, analyzer, true, MaxFieldLength.LIMITED);
  indexWriter.deleteDocuments(query);
  indexWriter.close();

 }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值