lucene+spring boot 基础入门代码

最新推荐文章于 2024-08-16 15:33:24 发布

Guzhang1216

最新推荐文章于 2024-08-16 15:33:24 发布

阅读量249

点赞数

分类专栏： java 文章标签：学习

本文链接：https://blog.csdn.net/Guzhang1216/article/details/101619502

版权

java 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

依赖

   <dependencies>
        <!-- Junit单元测试 -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <!-- lucene核心库 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>4.10.2</version>
        </dependency>
        <!-- Lucene的查询解析器 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>4.10.2</version>
        </dependency>
        <!-- lucene的默认分词器库 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>4.10.2</version>
        </dependency>
        <!-- lucene的高亮显示 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>4.10.2</version>
        </dependency>

        <dependency>
            <groupId>com.janeluo</groupId>
            <artifactId>ikanalyzer</artifactId>
            <version>2012_u6</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
        </plugins>
    </build>

扩展配置(词库更新不及时,可以省略,没有也能用)

创建索引

 @Test
    public void testCreateIndex(){


        try {
            //文档创建
            Document document = new Document();

            //添加一个字段，类型为long名称为id，值为10086
            document.add(new StringField("id","12306", Field.Store.YES));
            TextField textField = new TextField("title", "谷歌地图在全球范围还是有很大市场的", Field.Store.YES);

            //setBoost这个可以设置得分,越大得分排名越高
            textField.setBoost(10000);
            document.add(textField);
           //document.add(new StoredField("info","啦啦啦啦"));//只存储，不创建索引（不能用来搜索）

            //目录定义,使用open方法调用底层的工厂类，根据环境动态实现，最最佳匹配
            Directory directory = FSDirectory.open(new File("indexDir"));


            //分词器，由于中文分词器过时，所以使用标准分词器替代
            Analyzer analyzer = new IKAnalyzer();

            //索引写出的配置，需要指定版本和分词器
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST,analyzer);

            //设置目标索引库的打开方式，是追加，还是覆盖（重建），默认是追加或（重建）
            //indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//每次都新建

            //索引写出工具的创建，指定写出到哪里去，并且使用什么样的配置
            IndexWriter indexWriter = new IndexWriter(directory,indexWriterConfig);

            indexWriter.addDocument(document);

            indexWriter.commit();

            //io流
            indexWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

批量创建索引

// 批量创建索引
    @Test
    public void testBatchCreateIndex() throws Exception{
        // 创建文档的集合
        Collection<Document> docs = new ArrayList<>();
        // 创建文档对象
        Document document1 = new Document();
        document1.add(new StringField("id", "1", Field.Store.YES));
        document1.add(new TextField("title", "谷歌地图之父跳槽facebook", Field.Store.YES));
        docs.add(document1);
        // 创建文档对象
        Document document2 = new Document();
        document2.add(new StringField("id", "2", Field.Store.YES));
        document2.add(new TextField("title", "谷歌地图之父加盟FaceBook", Field.Store.YES));
        docs.add(document2);
        // 创建文档对象
        Document document3 = new Document();
        document3.add(new StringField("id", "3", Field.Store.YES));
        document3.add(new TextField("title", "谷歌地图创始人拉斯离开谷歌加盟Facebook", Field.Store.YES));
        docs.add(document3);
        // 创建文档对象
        Document document4 = new Document();
        document4.add(new StringField("id", "4", Field.Store.YES));
        document4.add(new TextField("title", "谷歌地图之父跳槽Facebook与Wave项目取消有关", Field.Store.YES));
        docs.add(document4);
        // 创建文档对象
        Document document5 = new Document();
        document5.add(new StringField("id", "5", Field.Store.YES));
        document5.add(new TextField("title", "谷歌地图之父拉斯加盟社交网站Facebook", Field.Store.YES));
        docs.add(document5);

        // 索引目录类,指定索引在硬盘中的位置
        Directory directory = FSDirectory.open(new File("indexDir"));
        // 引入IK分词器
        Analyzer analyzer = new IKAnalyzer();
        // 索引写出工具的配置对象
        IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, analyzer);
        // 设置打开方式：OpenMode.APPEND 会在索引库的基础上追加新索引。
        // OpenMode.CREATE会先清空原来数据，再提交新的索引
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        // 创建索引的写出工具类。参数：索引的目录和配置信息
        IndexWriter indexWriter = new IndexWriter(directory, conf);
        // 把文档集合交给IndexWriter
        indexWriter.addDocuments(docs);
        // 提交
        indexWriter.commit();
        // 关闭
        indexWriter.close();
    }

删除索引(更新就是删除+新增)

 @Test
    public void testDeleteIndex() throws IOException {


        //目录定义,使用open方法调用底层的工厂类，根据环境动态实现，最最佳匹配
        Directory directory = FSDirectory.open(new File("indexDir"));


        //分词器，由于中文分词器过时，所以使用标准分词器替代
        Analyzer analyzer = new IKAnalyzer();

        //索引写出的配置，需要指定版本和分词器
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST,analyzer);

        //设置目标索引库的打开方式，是追加，还是覆盖（重建），默认是追加或（重建）
        //indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//每次都新建

        //索引写出工具的创建，指定写出到哪里去，并且使用什么样的配置
        IndexWriter indexWriter = new IndexWriter(directory,indexWriterConfig);


        //删除所有
        //indexWriter.deleteAll();

        //先查询再删除,词条式，要求，词条必须存在，并且删除的字段类型不能是数值类型
        //indexWriter.deleteDocuments(new Term("title","谷歌地图"));
        //indexWriter.deleteDocuments(new Term("id","3"));


        //根据需求创建query对象，根据query对象，查询内容，查询到多少，删除多少
        Query query = null;

        indexWriter.deleteDocuments(query);


        indexWriter.commit();

        //io流
        indexWriter.close();
    }

基础查询

封装的查询方法

public void commonSearch(Query query) throws Exception {

        //要指定目录
        Directory directory = FSDirectory.open(new File("indexDir"));

        //创建一个索引读取的对象，从索引库中读取索引
        IndexReader indexReader = DirectoryReader.open(directory);

        //索引的搜索对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);


        //search搜索方法，传入，传入搜索条件，得到搜索的结果，
        //query 表示搜索的条件
        //n 需要的前n个，数量的指定和最终的结果，会有出入，如果查询命中的个数大于n，则显示n个，如果小于n，有多少显示多少，不会报错
        TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE);

        //totalHits查询命中的数量，
        int totalHits = topDocs.totalHits;

        //scoreDocs是查询文档的集合结果  doc表示文档的编号，score，表示得分

        ScoreDoc[] scoreDocs = topDocs.scoreDocs;


        System.out.println("命中文档个数:" + totalHits);

        for (ScoreDoc scoreDoc : scoreDocs) {
            System.out.println("============================================");
            int docID = scoreDoc.doc;//文档id
            float score = scoreDoc.score;//文档得分

            //根据文档的id获取到对应的文档
            Document document = indexSearcher.doc(docID);

            //从文档中取出对应的属性，对应的值
            String id = document.get("id");
            String title = document.get("title");

            System.out.println("score:" + score + " id:" + id + " title:" + title);
        }
    }

查询

    @Test
    public void testQueryIndex() throws Exception {

        //查询条件的转换封装工具,这里的分词器一定要和创建索引用的分词器一致
        QueryParser queryParser = new QueryParser("title", new IKAnalyzer());

        Query query = queryParser.parse("谷歌地图");


        commonSearch(query);
    }


    /*
     * 注意：Term(词条)是搜索的最小单位，不可再分词。值必须是字符串！
     */
    @Test
    public void testTermQuery() throws Exception {
        // 创建词条查询对象
        Query query = new TermQuery(new Term("title", "谷歌"));
        commonSearch(query);
    }


    /* 通配符查询：
     * 	? 可以代表任意一个字符
     * 	* 可以任意多个任意字符
     */
    @Test
    public void testWildCardQuery() throws Exception {
        // 创建查询对象
        Query query = new WildcardQuery(new Term("title", "*谷歌"));
        commonSearch(query);
    }

    /*
     * 测试模糊查询
     */
    @Test
    public void testFuzzyQuery() throws Exception {
        // 创建模糊查询对象:允许用户输错。但是要求错误的最大编辑距离不能超过2
        // 编辑距离：一个单词到另一个单词最少要修改的次数 facebool --> facebook 需要编辑1次，编辑距离就是1
        // Query query = new FuzzyQuery(new Term("title","fscevool"));
        // 可以手动指定编辑距离，但是参数必须在0~2之间
        Query query = new FuzzyQuery(new Term("title","facabooc"),1);
        commonSearch(query);
    }


    /*
     * 数值范围查询
     */
    @Test
    public void testNumericRangQuery() throws Exception {

        //5个参数，分别表示
       Query query = NumericRangeQuery.newLongRange("id",12306L,12306L,true,true);
        commonSearch(query);
    }


    /*
     * 组合查询
     *
     * MUST VS MUST   &&  交集
     *
     * SHOULD VS SHOULD  || 并集
     *
     * MUST VS MUST_NOT  ! 取反
     */
    @Test
    public void testBooleanQuery() throws Exception {

        //5个参数，分别表示
        Query query1 = NumericRangeQuery.newLongRange("id",2L,4L,true,true);
        Query query2 = NumericRangeQuery.newLongRange("id",3L,5L,true,true);

        BooleanQuery query = new BooleanQuery();

        query.add(query1, BooleanClause.Occur.MUST);
        query.add(query2, BooleanClause.Occur.MUST);

        commonSearch(query);
    }

高级查询

高亮查询`

@Test
    public void highlightSearch() throws Exception {

        //要指定目录
        Directory directory = FSDirectory.open(new File("indexDir"));

        //创建一个索引读取的对象，从索引库中读取索引
        IndexReader indexReader = DirectoryReader.open(directory);

        //索引的搜索对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        //查询条件的转换封装工具,这里的分词器一定要和创建索引用的分词器一致
        QueryParser queryParser = new QueryParser("title", new IKAnalyzer());

        Query query = queryParser.parse("谷歌地图");


        //search搜索方法，传入，传入搜索条件，得到搜索的结果，
        //query 表示搜索的条件
        //n 需要的前n个，数量的指定和最终的结果，会有出入，如果查询命中的个数大于n，则显示n个，如果小于n，有多少显示多少，不会报错
        TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE);

        //totalHits查询命中的数量，
        int totalHits = topDocs.totalHits;

        //scoreDocs是查询文档的集合结果  doc表示文档的编号，score，表示得分

        ScoreDoc[] scoreDocs = topDocs.scoreDocs;


        System.out.println("命中文档个数:" + totalHits);

        //准备高亮工具
        //指定格式化的格式，<em></em>
        Formatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
        Scorer scorer = new QueryScorer(query);
        Highlighter highlighter = new Highlighter(formatter, scorer);

        for (ScoreDoc scoreDoc : scoreDocs) {
            System.out.println("============================================");
            int docID = scoreDoc.doc;//文档id
            float score = scoreDoc.score;//文档得分

            //根据文档的id获取到对应的文档
            Document document = indexSearcher.doc(docID);

            //从文档中取出对应的属性，对应的值
            String id = document.get("id");

            String result = document.get("title");

            //对result结果进行二次分词并且在分词结果基础上，如果词条和搜索词条一致，则高亮
            String highlighterTitle = highlighter.getBestFragment(new IKAnalyzer(), "title", result);

            System.out.println("score:" + score + " id:" + id + " title:" + highlighterTitle);
        }
    }

排序查询

  @Test
    public void sortSearch() throws Exception {

        //要指定目录
        Directory directory = FSDirectory.open(new File("indexDir"));

        //创建一个索引读取的对象，从索引库中读取索引
        IndexReader indexReader = DirectoryReader.open(directory);

        //索引的搜索对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        //查询条件的转换封装工具,这里的分词器一定要和创建索引用的分词器一致
        QueryParser queryParser = new QueryParser("title", new IKAnalyzer());

        Query query = queryParser.parse("谷歌地图");


        //search搜索方法，传入，传入搜索条件，得到搜索的结果，
        //query 表示搜索的条件
        //n 需要的前n个，数量的指定和最终的结果，会有出入，如果查询命中的个数大于n，则显示n个，如果小于n，有多少显示多少，不会报错
        //排序的条件
        Sort sort = new Sort(new SortField("id", SortField.Type.STRING));

        TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE, sort);

        //totalHits查询命中的数量，
        int totalHits = topDocs.totalHits;

        //scoreDocs是查询文档的集合结果  doc表示文档的编号，score，表示得分

        ScoreDoc[] scoreDocs = topDocs.scoreDocs;


        System.out.println("命中文档个数:" + totalHits);


        for (ScoreDoc scoreDoc : scoreDocs) {
            System.out.println("============================================");
            int docID = scoreDoc.doc;//文档id
            float score = scoreDoc.score;//文档得分

            //根据文档的id获取到对应的文档
            Document document = indexSearcher.doc(docID);

            //从文档中取出对应的属性，对应的值
            String id = document.get("id");

            String result = document.get("title");


            System.out.println("score:" + score + " id:" + id + " title:" + result);
        }
    }

分页查询

 @Test
    public void pageSearch() throws Exception {
        int page = 5555;
        final Integer SIZE = 2;
        int start = (page - 1) * SIZE;
        int end = page * SIZE;


        //要指定目录
        Directory directory = FSDirectory.open(new File("indexDir"));

        //创建一个索引读取的对象，从索引库中读取索引
        IndexReader indexReader = DirectoryReader.open(directory);

        //索引的搜索对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        //查询条件的转换封装工具,这里的分词器一定要和创建索引用的分词器一致
        QueryParser queryParser = new QueryParser("title", new IKAnalyzer());

        Query query = queryParser.parse("谷歌地图");


        //search搜索方法，传入，传入搜索条件，得到搜索的结果，
        //query 表示搜索的条件
        //n 需要的前n个，数量的指定和最终的结果，会有出入，如果查询命中的个数大于n，则显示n个，如果小于n，有多少显示多少，不会报错
        //排序的条件
        Sort sort = new Sort(new SortField("id", SortField.Type.STRING));

        TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE, sort);

        //totalHits查询命中的数量，
        int totalHits = topDocs.totalHits;

        //scoreDocs是查询文档的集合结果  doc表示文档的编号，score，表示得分

        ScoreDoc[] scoreDocs = topDocs.scoreDocs;


        System.out.println("命中文档个数:" + totalHits);

        int totalPage = totalHits % SIZE == 0 ? totalHits / SIZE : totalHits / SIZE + 1;

        if (page>totalPage){
            System.out.println("抱歉没有，gun");
            return;
        }else if (page==totalPage){//最后一页，不应该算，最后一页的数量要以实际数量为准
            end = totalHits;

        }


        for (int i = start; i < end; i++) {

            ScoreDoc scoreDoc = scoreDocs[i];
            int docID = scoreDoc.doc;//文档id
            float score = scoreDoc.score;//文档得分

            //根据文档的id获取到对应的文档
            Document document = indexSearcher.doc(docID);

            //从文档中取出对应的属性，对应的值
            String id = document.get("id");

            String result = document.get("title");


            System.out.println("score:" + score + " id:" + id + " title:" + result);
        }
    }