前言
全文检索是比较常见的一种查询方式,在互联网行业更为常见。以luence为入门点,进行学习。这篇博客只是个人的学习笔记,不正确的地方希望指出。
luence检索的思路简单总结
开发代码
package com;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
/**
* @author lf
* @Title: LuenceTest
* @Description: TODO
* @date 2019/5/21 11:13
*/
public class LuenceTest {
String indexPath = "D:\\pyg_work\\project\\my-test\\src\\main\\resources\\luenceIndexFile";//索引库位置(文件夹)
StandardAnalyzer standardAnalyzer = new StandardAnalyzer();//分词器
String filePath = "D:\\pyg_work\\project\\my-test\\src\\main\\resources\\myWorlds.txt";//资源文件位置
/**
* * @Description: 采集文档,创建索引库
* * @date 2019/5/21 11:13
*
*/
@Test
public void createIndex() throws Exception {
//1.file -->doc(采集资源数据)
File file = new File(filePath);
Document doc = new Document();
doc.add(new Field("name",file.getName(),Field.Store.YES, Field.Index.ANALYZED));//Field.Index.ANALYZED 建立索引,对该字段分词
doc.add(new Field("content",readFile(file),Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("size",file.length()+"",Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Index.NOT_ANALYZED 建立索引,但是不对该字段分词
doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES, Field.Index.NO));//Field.Index.NO 不建立索引
//2.建立索引 indexWriter 维护索引库的对象(对document对象进行增删改)
boolean createIndex = true; //是否重新创建索引库
IndexWriter indexWriter = new IndexWriter(indexPath, standardAnalyzer, createIndex, IndexWriter.MaxFieldLength.UNLIMITED);//IndexWriter.MaxFieldLength.UNLIMITED) 指对前多少个词进行建立索引
indexWriter.addDocument(doc);//文档添加到索引库
indexWriter.close();
}
private String readFile(File file) throws Exception{
BufferedReader reader = new BufferedReader(new FileReader(file));
StringBuffer content = new StringBuffer();
for(String line=null; (line = reader.readLine())!=null;){
content.append(line).append("\n");
}
return content.toString();
}
/**
* * @Description: 从索引库中搜索
* * @date 2019/5/21 11:14
*
*/
@Test
public void search() throws Exception{
//1.构建查询对象
String queryString = "world";//需要查询的信息
String[] fileds = {"name","content"};//需要查询的索引字段
QueryParser queryParser = new MultiFieldQueryParser(fileds,standardAnalyzer);
Query query = queryParser.parse(queryString);
//2.进行查询
IndexSearcher indexSearcher = new IndexSearcher(indexPath); //指定索引库位置 构建索引库查询操作对象
Filter filter = null; //过滤器
int docSize = 100000; //一次查询文档数目
TopDocs topDocs = indexSearcher.search(query, filter, docSize);
System.out.println("总共有条【"+topDocs.totalHits+"】匹配结果");
//3.打印结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs){
int docIndex = scoreDoc.doc; //文档内部编号
Document doc = indexSearcher.doc(docIndex);//根据文档编号获取对应文档
System.out.println("------------------------");
System.out.println("name:"+doc.get("name"));
System.out.println("content:"+doc.get("content"));
System.out.println("size:"+doc.get("size"));
System.out.println("path:"+doc.get("path"));
}
}
}