Lucene——查询索引

贵沫末
已于 2024-08-07 11:51:22 修改
阅读量20
点赞数
分类专栏： lucene 文章标签： lucene java
于 2021-08-25 15:01:18 首次发布
本文链接：https://blog.csdn.net/weixin_42789698/article/details/119911653
版权
lucene 专栏收录该内容
4 篇文章 0 订阅
订阅专栏
查询索引

package com.gykalc.rediscluster.lucene.index;

import com.gykalc.rediscluster.lucene.ikanalyzer.IKAnalyzer6x;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;

public class SearchIndex {

    /**
     * 多域查询（多个Field进行查询）
     */
    @Test
    public void searchMulti() throws IOException, ParseException {
        // 第一步：索引文件路径
        Path indexdir = Paths.get("indexdir");
        FSDirectory dir = FSDirectory.open(indexdir);
        // 第二步：搜索对象创建searcher
        // 对接一个输入流
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        // 搜索使用的分词器IK
        Analyzer ikAnalyzer6x = new IKAnalyzer6x();
        // 第三步：构建查询条件，指定查询多个域名称
        String[] fields = {"title", "sell_point"};
        // 利用多域对象fields和分词器构造查询条件
        // 查询条件解析器，可以收集查询的环境，生成查询条件对象
        MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, ikAnalyzer6x);
        // 这条件可以使用查询，只要title和sell_point两个域中有一个包含“平板”这个词项，就能搜索到
        Query query = parser.parse("平板");
        // 打印query对象
        System.out.println(query.toString());
        // 第四步：获取数据，for循环遍历，默认情况下，每个doc返回时
        // 封装到一个topDoc的对象中，底层包装了一个数组，评分
        // doc；循环数组，调用api获取数据
        TopDocs topDocs = searcher.search(query, 10);// 返回所有数据前10条
        // 封装了获取doc的所有条件的docs对象
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc: scoreDocs) {
            // 获取document的文件
            Document doc = searcher.doc(scoreDoc.doc);
            System.out.println(doc.get("id"));
            System.out.println(doc.get("title"));
            System.out.println(doc.get("sell_point"));
        }
    }

    /**
     * 词项查询（单域查询）
     */
    @Test
    public void termQuery() throws IOException {
        // 第一步：索引文件路径
        Path indexdir = Paths.get("indexdir");
        FSDirectory dir = FSDirectory.open(indexdir);
        // 第二步：搜索对象创建searcher
        // 对接一个输入流
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        // 搜索使用的分词器IK
        Analyzer ikAnalyzer6x = new IKAnalyzer6x();
        // 词项查询条件
        Term term = new Term("title", "平板");
        Query query = new TermQuery(term);
        // 打印query对象
        System.out.println(query.toString());
        // 第四步：获取数据，for循环遍历，默认情况下，每个doc返回时
        // 封装到一个topDoc的对象中，底层包装了一个数组，评分
        // doc；循环数组，调用api获取数据
        TopDocs topDocs = searcher.search(query, 10);// 返回所有数据前10条
        // 封装了获取doc的所有条件的docs对象
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc: scoreDocs) {
            // 获取document的文件
            Document doc = searcher.doc(scoreDoc.doc);
            System.out.println(doc.get("id"));
            System.out.println(doc.get("title"));
            System.out.println(doc.get("sell_point"));
        }
    }

    /**
     * 布尔查询
     */
    @Test
    public void booleanQuery() throws IOException {
        // 第一步：索引文件路径
        Path indexdir = Paths.get("indexdir");
        FSDirectory dir = FSDirectory.open(indexdir);
        // 第二步：搜索对象创建searcher
        // 对接一个输入流
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        // 搜索使用的分词器IK
        Analyzer ikAnalyzer6x = new IKAnalyzer6x();
        // 布尔查询条件,创建多个查询的查询条件
        Query query1 = new TermQuery(new Term("title", "达内"));
        Query query2 = new TermQuery(new Term("sell_point", "好用"));
        // 指定逻辑关系
        /**
         * 1．MUST和MUST：取得两个个查询子句的交集。
         * 2．MUST和MUST_NOT：表示查询结果中不能包含MUST_NOT所对应得查询子句的检索结果。
         * 3．SHOULD与MUST_NOT：连用时，功能同MUST和MUST_NOT。
         * 4．SHOULD与MUST连用时，结果为MUST子句的检索结果,但是SHOULD可影响排序。
         * 5．SHOULD与SHOULD：表示“或”关系，最终检索结果为所有检索子句的并集。
         * 6．MUST_NOT和MUST_NOT：无意义，检索无结果。
         */
        BooleanClause bc1 = new BooleanClause(query1, BooleanClause.Occur.SHOULD);
        BooleanClause bc2 = new BooleanClause(query2, BooleanClause.Occur.SHOULD);
        BooleanQuery query = new BooleanQuery.Builder().add(bc1).add(bc2).build();
        System.out.println(query.toString());
        // 第四步：获取数据，for循环遍历，默认情况下，每个doc返回时
        // 封装到一个topDoc的对象中，底层包装了一个数组，评分
        // doc；循环数组，调用api获取数据
        TopDocs topDocs = searcher.search(query, 10);// 返回所有数据前10条
        // 封装了获取doc的所有条件的docs对象
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc: scoreDocs) {
            // 获取document的文件
            Document doc = searcher.doc(scoreDoc.doc);
            System.out.println(doc.get("id"));
            System.out.println(doc.get("title"));
            System.out.println(doc.get("sell_point"));
        }
    }

    /**
     * 范围查询
     */
    @Test
    public void rangeQuery() throws IOException {
        // 第一步：索引文件路径
        Path indexdir = Paths.get("indexdir");
        FSDirectory dir = FSDirectory.open(indexdir);
        // 第二步：搜索对象创建searcher
        // 对接一个输入流
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        // 搜索使用的分词器IK
        Analyzer ikAnalyzer6x = new IKAnalyzer6x();
        // 范围查询
        Query query = DoublePoint.newRangeQuery("price", 2000, 3000);
        System.out.println(query.toString());
        // 第四步：获取数据，for循环遍历，默认情况下，每个doc返回时
        // 封装到一个topDoc的对象中，底层包装了一个数组，评分
        // doc；循环数组，调用api获取数据
        TopDocs topDocs = searcher.search(query, 10);// 返回所有数据前10条
        // 封装了获取doc的所有条件的docs对象
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc: scoreDocs) {
            // 获取document的文件
            Document doc = searcher.doc(scoreDoc.doc);
            System.out.println(doc.get("id"));
            System.out.println(doc.get("title"));
            System.out.println(doc.get("sell_point"));
            System.out.println(doc.get("price"));
        }
    }

    /**
     * 前缀查询
     */
    @Test
    public void prefixQuery() throws IOException {
        // 第一步：索引文件路径
        Path indexdir = Paths.get("indexdir");
        FSDirectory dir = FSDirectory.open(indexdir);
        // 第二步：搜索对象创建searcher
        // 对接一个输入流
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        // 搜索使用的分词器IK
        Analyzer ikAnalyzer6x = new IKAnalyzer6x();
        // 前缀查询
        Term term = new Term("title", "达内");
        Query query = new PrefixQuery(term);
        System.out.println(query.toString());
        // 第四步：获取数据，for循环遍历，默认情况下，每个doc返回时
        // 封装到一个topDoc的对象中，底层包装了一个数组，评分
        // doc；循环数组，调用api获取数据
        TopDocs topDocs = searcher.search(query, 10);// 返回所有数据前10条
        // 封装了获取doc的所有条件的docs对象
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc: scoreDocs) {
            // 获取document的文件
            Document doc = searcher.doc(scoreDoc.doc);
            System.out.println(doc.get("id"));
            System.out.println(doc.get("title"));
            System.out.println(doc.get("sell_point"));
            System.out.println(doc.get("price"));
        }
    }

    /**
     * 多关键字查询
     */
    @Test
    public void multiTermQuery() throws IOException {
        // 第一步：索引文件路径
        Path indexdir = Paths.get("indexdir");
        FSDirectory dir = FSDirectory.open(indexdir);
        // 第二步：搜索对象创建searcher
        // 对接一个输入流
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        // 搜索使用的分词器IK
        Analyzer ikAnalyzer6x = new IKAnalyzer6x();
        // 多关键字查询
        PhraseQuery.Builder builder = new PhraseQuery.Builder();
        // 添加多个关键字
        builder.add(new Term("title", "达内"));
        Query query = builder.build();
        System.out.println(query.toString());
        // 第四步：获取数据，for循环遍历，默认情况下，每个doc返回时
        // 封装到一个topDoc的对象中，底层包装了一个数组，评分
        // doc；循环数组，调用api获取数据
        TopDocs topDocs = searcher.search(query, 10);// 返回所有数据前10条
        // 封装了获取doc的所有条件的docs对象
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc: scoreDocs) {
            // 获取document的文件
            Document doc = searcher.doc(scoreDoc.doc);
            System.out.println(doc.get("id"));
            System.out.println(doc.get("title"));
            System.out.println(doc.get("sell_point"));
            System.out.println(doc.get("price"));
        }
    }

    @Test
    public void fuzzyQuery() throws IOException {
        // 第一步：索引文件路径
        Path indexdir = Paths.get("indexdir");
        FSDirectory dir = FSDirectory.open(indexdir);
        // 第二步：搜索对象创建searcher
        // 对接一个输入流
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        // 搜索使用的分词器IK
        Analyzer ikAnalyzer6x = new IKAnalyzer6x();
        // 模糊查询
        Query query = new FuzzyQuery(new Term("title", "格达"));
        System.out.println(query.toString());
        // 第四步：获取数据，for循环遍历，默认情况下，每个doc返回时
        // 封装到一个topDoc的对象中，底层包装了一个数组，评分
        // doc；循环数组，调用api获取数据
        TopDocs topDocs = searcher.search(query, 10);// 返回所有数据前10条
        // 封装了获取doc的所有条件的docs对象
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc: scoreDocs) {
            // 获取document的文件
            Document doc = searcher.doc(scoreDoc.doc);
            System.out.println(doc.get("id"));
            System.out.println(doc.get("title"));
            System.out.println(doc.get("sell_point"));
            System.out.println(doc.get("price"));
        }
    }
}