Luence入门

最新推荐文章于 2024-05-03 21:05:18 发布

各位别卷了

最新推荐文章于 2024-05-03 21:05:18 发布

阅读量637

点赞数 1

分类专栏： Luence

本文链接：https://blog.csdn.net/ALearrring/article/details/90405599

版权

Luence 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

前言

全文检索是比较常见的一种查询方式，在互联网行业更为常见。以luence为入门点，进行学习。这篇博客只是个人的学习笔记，不正确的地方希望指出。

luence检索的思路简单总结

在这里插入图片描述

开发代码

package com;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;

/**
 * @author lf
 * @Title: LuenceTest
 * @Description: TODO
 * @date 2019/5/21 11:13
 */
public class LuenceTest {

    String indexPath = "D:\\pyg_work\\project\\my-test\\src\\main\\resources\\luenceIndexFile";//索引库位置(文件夹)
    StandardAnalyzer standardAnalyzer = new StandardAnalyzer();//分词器
    String filePath = "D:\\pyg_work\\project\\my-test\\src\\main\\resources\\myWorlds.txt";//资源文件位置
    /**
     * 　　* @Description: 采集文档，创建索引库
     * 　　* @date 2019/5/21 11:13
     *
     */
    @Test
    public void createIndex() throws Exception {
        //1.file -->doc（采集资源数据）
        File file = new File(filePath);
        Document doc = new Document();
        doc.add(new Field("name",file.getName(),Field.Store.YES, Field.Index.ANALYZED));//Field.Index.ANALYZED 建立索引，对该字段分词
        doc.add(new Field("content",readFile(file),Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("size",file.length()+"",Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Index.NOT_ANALYZED 建立索引，但是不对该字段分词
        doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES, Field.Index.NO));//Field.Index.NO 不建立索引

        //2.建立索引 indexWriter 维护索引库的对象（对document对象进行增删改）
        boolean createIndex = true; //是否重新创建索引库
        IndexWriter indexWriter = new IndexWriter(indexPath, standardAnalyzer, createIndex, IndexWriter.MaxFieldLength.UNLIMITED);//IndexWriter.MaxFieldLength.UNLIMITED) 指对前多少个词进行建立索引
        indexWriter.addDocument(doc);//文档添加到索引库
        indexWriter.close();
    }

    private String readFile(File file) throws Exception{
        BufferedReader reader = new BufferedReader(new FileReader(file));
        StringBuffer content = new StringBuffer();
        for(String line=null; (line = reader.readLine())!=null;){
            content.append(line).append("\n");
        }
        return content.toString();
    }

    /**
     * 　　* @Description: 从索引库中搜索
     * 　　* @date 2019/5/21 11:14
     *
     */
    @Test
    public void search() throws Exception{
        //1.构建查询对象
        String queryString = "world";//需要查询的信息
        String[] fileds = {"name","content"};//需要查询的索引字段
        QueryParser queryParser = new MultiFieldQueryParser(fileds,standardAnalyzer);
        Query query = queryParser.parse(queryString);

        //2.进行查询
        IndexSearcher indexSearcher = new IndexSearcher(indexPath); //指定索引库位置 构建索引库查询操作对象
        Filter filter = null; //过滤器
        int docSize = 100000; //一次查询文档数目
        TopDocs topDocs = indexSearcher.search(query, filter, docSize);
        System.out.println("总共有条【"+topDocs.totalHits+"】匹配结果");

        //3.打印结果
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for(ScoreDoc scoreDoc : scoreDocs){
            int docIndex = scoreDoc.doc; //文档内部编号
            Document doc = indexSearcher.doc(docIndex);//根据文档编号获取对应文档

            System.out.println("------------------------");
            System.out.println("name:"+doc.get("name"));
            System.out.println("content:"+doc.get("content"));
            System.out.println("size:"+doc.get("size"));
            System.out.println("path:"+doc.get("path"));
        }

    }
}