Lucene：基本增删改查（Java方式）

最新推荐文章于 2024-08-12 22:47:50 发布

琦彦

最新推荐文章于 2024-08-12 22:47:50 发布

阅读量6.3k

点赞数 2

分类专栏： Elasticsearch：入门与深入文章标签： Lucene Elasticsearch 添加更新查询

本文链接：https://blog.csdn.net/fly910905/article/details/81456131

版权

Elasticsearch：入门与深入专栏收录该内容

8 篇文章 4 订阅

订阅专栏

Lucene相关Maven依赖


        <!-- lucene的核心 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>6.6.0</version>
        </dependency>

        <!-- lucene的分词器，有标准的英文相关的分词器，没有中文的 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>6.6.0</version>
        </dependency>

        <!-- 查询解析器 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>6.6.0</version>
        </dependency>

        <!-- 各种查询方式 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queries</artifactId>
            <version>6.6.0</version>
        </dependency>

        <!-- 关键字高亮 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>6.6.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-demo</artifactId>
            <version>6.6.0</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>

Lucene测试用例

实体对象

import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Store;

public class Article {

    /**
     * 主键
     */
    private Long id;
    /**
     * 标题
     */
    private String title;
    /**
     * 内容
     */
    private String content;
    /**
     * 作者
     */
    private String author;
    /**
     * 链接
     */
    private String url;

    public Article() {
    }

    public Article(Long id, String title, String content, String author,
                   String url) {
        super();
        this.id = id;
        this.title = title;
        this.content = content;
        this.author = author;
        this.url = url;
    }

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public String getUrl() {
        return url;
    }

    public void setUrl(String url) {
        this.url = url;
    }

    /**
     * 生成Lucene存储的格式
     */
    public Document toDocument() {
        //Lucene存储的格式（Map装的k,v）
        Document doc = new Document();
        //向文档中添加一个long类型的属性，建立索引
        doc.add(new LongPoint("id", id));
        //在文档中存储
        doc.add(new StoredField("id", id));

        //设置一个文本类型，会对内容进行分词，建立索引，并将内容在文档中存储
        doc.add(new TextField("title", title, Store.YES));
        //设置一个文本类型，会对内容进行分词，建立索引，存在文档中存储 / No代表不存储
        doc.add(new TextField("content", content, Store.YES));

        //StringField，不分词，建立索引，文档中存储
        doc.add(new StringField("author", author, Store.YES));

        //不分词，不建立索引，在文档中存储，
        doc.add(new StoredField("url", url));
        return doc;
    }

    /**
     * 解析Lucene存储的格式
     */
    public static Article parseArticle(Document doc) {
        Long id = Long.parseLong(doc.get("id"));
        String title = doc.get("title");
        String content = doc.get("content");
        String author = doc.get("author");
        String url = doc.get("url");
        Article article = new Article(id, title, content, author, url);
        return article;
    }

    @Override
    public String toString() {
        return "id : " + id + " , title : " + title + " , content : " + content + " , author : " + author + " , url : " + url;
    }


}

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.IOException;
import java.nio.file.Paths;


/**
 * @Title: Lucene增删改查
 * @ClassName: cn.edu360.lucene.LuceneCRUDTest.java
 * @Description: https://segmentfault.com/a/1190000010367206
 * https://github.com/DmitryKey/luke/releases
 * @Copyright 2016-2018  - Powered By 研发中心
 * @author: 王延飞
 */
public class LuceneCRUDTest {

    /**
     * @Title: 写入数据
     * @methodName: testCreate
     * @Description:
     * @author: 王延飞
     */
    @Test
    public void testCreate() throws IOException {

        Article article = new Article();
        article.setId(108L);
        article.setAuthor("FLY");
        article.setTitle("学习大数据");
        article.setContent("学数据，像毕老师一样牛！");
        article.setUrl("https://blog.csdn.net/fly910905/article/details/81190382");

        // String indexPath = "D:\\usr\\lucene\\index";
        String indexPath = "D:\\usr\\lucene\\index";
        FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath));
        //创建一个标准分词器，一个字分一次
        //Analyzer analyzer = new StandardAnalyzer();
        Analyzer analyzer = new IKAnalyzer(true);
        //写入索引的配置，设置了分词器
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        //指定了写入数据目录和配置
        IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig);
        //创建一个文档对象
        Document document = article.toDocument();
        System.out.println(document);
        //通过IndexWriter写入
        indexWriter.addDocument(document);
        indexWriter.close();
    }


    /**
     * @Title: 查询数据
     * @methodName: testSearch
     * @Description:
     * @author: 王延飞
     * @date: 2018-08-05 16:01
     */
    @Test
    public void testSearch() throws IOException, ParseException {


        String indexPath = "D:\\usr\\lucene\\index";
        //Analyzer analyzer = new StandardAnalyzer();
        Analyzer analyzer = new IKAnalyzer(true);
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        //索引查询器
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        String queryStr = "数据";
        //创建一个查询条件解析器
        QueryParser parser = new QueryParser("content", analyzer);
        //对查询条件进行解析
        Query query = parser.parse(queryStr);

        //TermQuery将查询条件当成是一个固定的词
        //Query query = new TermQuery(new Term("url", "http://www.edu360.cn/a10010"));
        //在【索引】中进行查找
        TopDocs topDocs = indexSearcher.search(query, 10);

        //获取到查找到的文文档ID和得分
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            //从索引中查询到文档的ID，
            int doc = scoreDoc.doc;
            //在根据ID到文档中查找文档内容
            Document document = indexSearcher.doc(doc);
            //将文档转换成对应的实体类
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }

    /**
     * @Title: 删除数据
     * @methodName: testDelete
     * @Description:
     * @author: 王延飞
     * @date: 2018-08-05 16:01
     */
    @Test
    public void testDelete() throws IOException, ParseException {


        String indexPath = "D:\\usr\\lucene\\index";
        Analyzer analyzer = new IKAnalyzer(true);
        FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath));
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig);

        //Term词条查找，内容必须完全匹配，不分词
        //indexWriter.deleteDocuments(new Term("content", "学好"));

        // 分词器查找
        //QueryParser parser = new QueryParser("title", analyzer);
        //Query query = parser.parse("大数据老师");

        //LongPoint是建立索引的
        // LongPoint.newRangeQuery()--范围查询
        //Query query = LongPoint.newRangeQuery("id", 99L, 120L);
        // LongPoint.newExactQuery()--精确查询
        Query query = LongPoint.newExactQuery("id", 105L);

        indexWriter.deleteDocuments(query);

        indexWriter.commit();
        indexWriter.close();
    }

    /**
     * @Title: 更新数据
     * @methodName: testUpdate
     * @Description: lucene的update比较特殊，update的代价太高，先删除，然后在插入
     * @author: 王延飞
     */
    @Test
    public void testUpdate() throws IOException, ParseException {


        String indexPath = "D:\\usr\\lucene\\index";
        StandardAnalyzer analyzer = new StandardAnalyzer();
        FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath));
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig);


        Article article = new Article();
        article.setId(106L);
        article.setAuthor("老王");
        article.setTitle("学好大数据，要找赵老师");
        article.setContent("迎娶白富美，走上人生巅峰！！！");
        article.setUrl("https://blog.csdn.net/fly910905/article/details/81190382");
        Document document = article.toDocument();

        indexWriter.updateDocument(new Term("author", "老王"), document);

        indexWriter.commit();
        indexWriter.close();
    }

    /**
     * @Title: 查询多个字段
     * @methodName: testMultiField
     * @Description:
     * @author: 王延飞
     */
    @Test
    public void testMultiField() throws IOException, ParseException {


        String indexPath = "D:\\usr\\lucene\\index";
        Analyzer analyzer = new IKAnalyzer(true);
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        String[] fields = {"title", "content"};
        //多字段的查询转换器
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
        Query query = queryParser.parse("老师");

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }


    /**
     * @Title: 查找全部的数据
     * @methodName: testMatchAll
     * @Description:
     * @author: 王延飞
     */
    @Test
    public void testMatchAll() throws IOException, ParseException {


        String indexPath = "D:\\usr\\lucene\\index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        Query query = new MatchAllDocsQuery();

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }

    /**
     * @Title: 布尔查询，可以组合多个查询条件
     * @methodName: testBooleanQuery
     * @Description:
     * @author: 王延飞
     */
    @Test
    public void testBooleanQuery() throws Exception {

        String indexPath = "D:\\usr\\lucene\\index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        Query query1 = new TermQuery(new Term("title", "老师"));
        Query query2 = new TermQuery(new Term("content", "丁"));
        BooleanClause bc1 = new BooleanClause(query1, BooleanClause.Occur.MUST);
        BooleanClause bc2 = new BooleanClause(query2, BooleanClause.Occur.MUST_NOT);
        BooleanQuery boolQuery = new BooleanQuery.Builder().add(bc1).add(bc2).build();
        System.out.println(boolQuery);

        TopDocs topDocs = indexSearcher.search(boolQuery, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }


    /**
     * @Title: QueryParser查询解析
     * @methodName: testQueryParser
     * @Description:
     * @author: 王延飞
     */
    @Test
    public void testQueryParser() throws Exception {

        String indexPath = "D:\\usr\\lucene\\index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        //创建一个QueryParser对象。参数1：默认搜索域 参数2：分析器对象。
        QueryParser queryParser = new QueryParser("title", new IKAnalyzer(true));

        //Query query = queryParser.parse("数据");
        Query query = queryParser.parse("title:学好 OR title:学习");
        System.out.println(query);

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }


    /**
     * @Title: 范围查询
     * @methodName: testRangeQuery
     * @Description:
     * @author: 王延飞
     */
    @Test
    public void testRangeQuery() throws Exception {

        String indexPath = "D:\\usr\\lucene\\index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);


        Query query = LongPoint.newRangeQuery("id", 107L, 108L);

        System.out.println(query);

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }
}