Lucene 全文索引

最新推荐文章于 2024-05-15 11:16:40 发布

炑旊

最新推荐文章于 2024-05-15 11:16:40 发布

阅读量145

点赞数 1

文章标签： lucene

本文链接：https://blog.csdn.net/muyanfang/article/details/120345504

版权

Lucene 全文索引

pom.xml配置
方法使用
实体类(根据具体需求修改)

pom.xml配置

    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-core</artifactId>
        <version>7.6.0</version>
    </dependency>
    <!-- 检索关键字高亮显示 -->
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-highlighter -->
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-highlighter</artifactId>
        <version>7.6.0</version>
    </dependency>
    <!-- 与查询比较的高性能单文档索引 高亮显示需要此jar -->
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-memory -->
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-memory</artifactId>
        <version>7.6.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-queryparser</artifactId>
        <version>7.6.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-demo -->
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-demo</artifactId>
        <version>7.6.0</version>
    </dependency>
    <!-- 一般分词器，适用于英文分词 -->
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-analyzers-common</artifactId>
        <version>7.6.0</version>
    </dependency>
    <!-- 中文分词器 -->
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-smartcn -->
    <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-analyzers-smartcn</artifactId>
        <version>7.6.0</version>
    </dependency>
    <!--编码转换 -->
    <!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
    <dependency>
        <groupId>commons-io</groupId>
        <artifactId>commons-io</artifactId>
        <version>2.4</version>
    </dependency>

方法使用

public class Lucene extends BaseExtController {
    private static final Logger log = Logger.getLogger(Lucene.class);
    final String filePath = PropKit.use("config.properties").get("lucenePath","");

    /**
     * 创建并在指定的位置写入lucene索引文件
     * @创建时间：2021年09月09日
     *
     * filePath:
     *         索引库的位置
     * luceneManagerList:
     *          创建索引数据集合
     */
    public void createIndex(List<LuceneManager> luceneManagerList) throws IOException {

        log.info("存储索引数据长度_"+luceneManagerList.size());
        IndexWriter writer = null;
        // 定义一个Document集合
        List<Document> documents = new ArrayList();
        // 定义document对象
        Document document;

        // 遍历查询出来的数据
        for (LuceneManager lucene : luceneManagerList) {
            // 实例化document对象
            document = new Document();

            if (!StrKit.isBlank(lucene.getId())){
                // 不进行分词，只进行索引、存储
                Field id = new StringField("id", lucene.getId(), Field.Store.YES);
                // 将Field放到document对象中
                document.add(id);
            }
            if (!StrKit.isBlank(lucene.getSid())){
                // 不进行分词，只进行索引、存储
                Field sid = new StringField("sid", lucene.getSid(), Field.Store.YES);
                // 将Field放到document对象中
                document.add(sid);
            }
            if (!StrKit.isBlank(lucene.getTitle())){
                // 进行分词、索引、存储
                Field title = new TextField("title", lucene.getTitle(), Field.Store.YES);
                document.add(title);
            }
            if (!StrKit.isBlank(lucene.getAbs())){
                // 进行分词、索引、存储
                Field abs = new TextField("abs", lucene.getAbs(), Field.Store.YES);
                document.add(abs);
            }
            if (!StrKit.isBlank(lucene.getDesc())){
                // 进行分词、索引、存储
                Field desc = new TextField("desc", lucene.getDesc(), Field.Store.YES);
                document.add(desc);
            }
            if (!StrKit.isBlank(lucene.getType())){
                // 不进行分词、索引，只存储
                Field type = new TextField("type", lucene.getType(), Field.Store.YES);
                document.add(type);
            }
            if (!StrKit.isBlank(lucene.getTime())){
                // 不进行分词、索引，只存储
                Field time = new TextField("time", lucene.getTime(), Field.Store.YES);
                document.add(time);
            }
            if (!StrKit.isBlank(lucene.getImg())){
                // 不进行分词、索引，只存储
                Field img = new StoredField("img", lucene.getImg());
                document.add(img);
            }
            if (!StrKit.isBlank(lucene.getTableName())){
                // 不进行分词、索引，只存储
                Field tableName = new StoredField("tableName", lucene.getTableName());
                document.add(tableName);
            }
            if (!StrKit.isBlank(lucene.getRemarks())){
                // 不进行分词、索引，只存储
                Field remarks = new StoredField("remarks", lucene.getRemarks());
                document.add(remarks);
            }

            // 将document放到documents集合中
            documents.add(document);
        }

        // 创建一个标准分词对象
        Analyzer analyzer = new StandardAnalyzer();
        // 索引库的位置
        Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(filePath));
        // 索引序列化的配置
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        try {
            // 创建索引序列化（写入流）对象
            writer = new IndexWriter(directory, config);

            // 将documents对象循环写入到指定的索引目录 中
            for (Document doc : documents) {
                writer.addDocument(doc);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            // 关闭流对象
            writer.close();
        }

        log.info("存储索引数据成功");
    }

    /**
     * 通过指定的lucene索引目录查询指定的数据
     * searchContent:
     *          搜索条件
     * searchName:
     *          检索类型，可以为空，默认检索题目和摘要
     * page_no:
     *          数据页码，可以为空，默认查询1条
     * page_size:
     *          数据分页，可以为空，默认查询10条
     *
     */
    public Page<Record> readerIndex(String searchName, String searchContent, Integer page_no, Integer page_size) throws ParseException, IOException {

        log.info("搜索数据字段____"+searchName+"---"+searchContent);
        IndexReader reader = null;
        List<LuceneManager> dataList = new ArrayList();
        MultiFieldQueryParser queryParser = null;
        if (StrKit.isBlank(searchName)){//未传入搜索类型，默认，搜索题目和摘要
            String[] fields = {"title","abs"};
            // 创建QueryParser对象，并指定要查询的索引域及分词对象
            queryParser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
        }else {//传入搜索类型，以搜索类型为主进行检索
            String[] fields = {};
            if (searchName.contains(",")){
                String[] split = searchName.split(",");
                fields = new String[split.length];
                for (int i = 0; i < split.length; i++) {
                    fields[i]=split[i];
                }
            }else {
                fields = new String[]{searchName};
            }
            // 创建QueryParser对象，并指定要查询的索引域及分词对象
            queryParser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
        }
        // 创建Query对象，指定查询条件
        Query query = queryParser.parse(searchContent);
        // 指定要查询的lucene索引目录,必须与存储的索引目录一致
        Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(filePath));
        com.jfinal.plugin.activerecord.Page<Record> page = null;

        // 格式化器
        Formatter formatter = new SimpleHTMLFormatter("<highLight>", "</highLight>");
        Scorer scorer = new QueryScorer(query);
        // 准备高亮工具
        Highlighter highlighter = new Highlighter(formatter, scorer);

        try {
            // 通过指定的查询目录文件地址创建IndexReader流对象
            reader = DirectoryReader.open(directory);
            // 通过IndexReader流对象创建IndexSearcher
            IndexSearcher searcher = new IndexSearcher(reader);

            TopDocs search = searcher.search(query, page_size * page_no + 1);

            // 获取查询关键字获得的数据总数
            long totalHits = search.totalHits;
            log.info("通过查询条件总共查询到的数据量为：" + totalHits);

            // 获取查询到的数据
            ScoreDoc[] scoreDocs = search.scoreDocs;

            int totalPage = page_no+1;
            int totalRow =  scoreDocs.length;
            if (totalRow <= page_size * page_no){
                totalPage = page_no;
            }

            for (int i = (page_no-1)*page_size; i < scoreDocs.length; i++) {

                int docId = scoreDocs[i].doc;

                LuceneManager luceneManager = new LuceneManager();
                // 根据索引查询数据
                Document document = searcher.doc(docId);

                // 查询到的数据
                luceneManager.setId(document.get("id"));
                luceneManager.setSid(document.get("sid"));
//                luceneManager.setTitle(document.get("title"));

                // 用高亮工具处理普通的查询结果,参数：分词器，要高亮的字段的名称，高亮字段的原始值
                String title = highlighter.getBestFragment(new StandardAnalyzer(), "title", document.get("title"));
                if (StrKit.notBlank(title) && title.length()>0){
                    luceneManager.setTitle(title);
                }else {
                    luceneManager.setTitle(document.get("title"));
                }


                String abs = highlighter.getBestFragment(new StandardAnalyzer(), "abs", document.get("abs"));
                if (StrKit.notBlank(abs) && abs.length()>0){
                    luceneManager.setAbs(abs);
                }else {
                    luceneManager.setAbs(document.get("abs"));
                }

//                luceneManager.setAbs(document.get("abs"));
                luceneManager.setDesc(document.get("desc"));
                luceneManager.setImg(document.get("img"));
                luceneManager.setTime(document.get("time"));
                luceneManager.setType(document.get("type"));
                luceneManager.setRemarks(document.get("remarks"));
                luceneManager.setTableName(document.get("tableName"));

                dataList.add(luceneManager);
            }

            page = new com.jfinal.plugin.activerecord.Page(dataList, page_no, page_size, totalPage, totalRow);

        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            // 关闭流
            reader.close();
        }

        return page;
    }

    /**
     * 根据查询条件删除lucene索引及文档数据
     * */
    public void deleteIndex(String name,String value) throws IOException, ParseException {

        IndexWriter writer = null;
        try {
            // 创建分词对象
            Analyzer analyzer = new StandardAnalyzer();
            // 指定lucene索引目录地址
            Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(filePath));
            // 创建配置对象
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            // 通过指定的索引目录地址、配置对象创建IndexWriter流对象
            writer = new IndexWriter(directory, config);
            // 删除索引目录的数据
            writer.deleteDocuments(new Term(name, value));
        }catch (Exception e){
            e.printStackTrace();
        }finally {
            // 关闭流对象
            writer.close();
        }


    }

    /**
     * 删除全部lucene索引及文档数据
     * */
    public void deleteAllIndex() throws IOException, ParseException {

        IndexWriter writer = null;
        try{
            // 创建分词对象
            Analyzer analyzer = new StandardAnalyzer();
            // 指定lucene索引目录地址
            Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(filePath));
            // 创建配置对象
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            // 通过指定的索引目录地址、配置对象创建IndexWriter流对象
            writer = new IndexWriter(directory, config);

            // 清空索引及存档的所有数据
            writer.deleteAll();
        }catch (Exception e){
            e.printStackTrace();
        }finally {
            // 关闭流对象
            writer.close();
        }

    }

    /* *
     * 更新索引
     * */
    public void updateIndex(String name,String newValue,String oldValue) throws IOException, ParseException {

        IndexWriter writer = null;
        try {
            //创建分词对象
            Analyzer analyzer = new StandardAnalyzer();
            //指定索引目录地址
            Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(filePath));
            //创建配置对象
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            //通过配置、目录参数创建流对象
            writer = new IndexWriter(directory, config);

            //创建document对象
            Document doc = new Document();
            //添加索引数据到doc对象
            /*doc.add(new TextField("id", "7", Store.YES));
            //将新添加的doc数据替换索引中已经有的id为2的数据
            //执行步骤如下：查询数据--》查询到或未查询到--》替换/添加doc数据
            writer.updateDocument(new Term("id", "2"), doc);*/
            doc.add(new TextField(name, newValue, Field.Store.YES));
            writer.updateDocument(new Term(name, oldValue), doc);
        }catch (Exception e){
            e.printStackTrace();
        }finally {
            // 关闭流对象
            writer.close();
        }

    }

    public List<LuceneManager> queryIndex(String name,String value) throws IOException, ParseException {

        IndexReader reader = null;
        List<LuceneManager> dataList = new ArrayList();
        try {
            Analyzer analyzer = new StandardAnalyzer();
            // 创建指定查询域、分词对象创建QueryParser对象
            QueryParser parser = new QueryParser(name, analyzer);
            // 指定查询条件，创建query对象
            Query query = parser.parse(new StringBuilder(name + ":").append(value).toString());
            Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(filePath));
            // 创建reader流对象，打开指定的lucene索引目录
            reader = DirectoryReader.open(directory);
            // 通过创建的流对象创建查询对象searcher
            IndexSearcher searcher = new IndexSearcher(reader);
            // 通过查询对象查询指定条件的数据
            TopDocs topDocs = searcher.search(query, 100);
            // 获取查询的数据总量
            long totalHits = topDocs.totalHits;
            // 将查询返回的索引数据赋值给scoreDocs对象
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;

            // 如果查询的数据不存在，输出提示信息
            if (scoreDocs == null || scoreDocs.length == 0) {
                log.info("未查询到" + name + "为" + value + "的数据，删除" + name + "为" + value + "的数据成功");
            } else {
                // 如果查询的数据未删除成功，则输出相应的数据
                for (ScoreDoc doc : scoreDocs) {
                    // 获取查询到的索引id
                    int docId = doc.doc;
                    // 通过查询到的索引id查询索引中对应的文档域中的内容
                    Document document = searcher.doc(docId);

                    LuceneManager luceneManager = new LuceneManager();
                    // 输出查询到的数据
                    luceneManager.setId(document.get("id"));
                    luceneManager.setSid(document.get("sid"));
                    luceneManager.setTitle(document.get("title"));
                    luceneManager.setAbs(document.get("abs"));
                    luceneManager.setDesc(document.get("desc"));
                    luceneManager.setImg(document.get("img"));
                    luceneManager.setTime(document.get("time"));
                    luceneManager.setType(document.get("type"));
                    luceneManager.setRemarks(document.get("remarks"));
                    luceneManager.setTableName(document.get("tableName"));

                    dataList.add(luceneManager);
                }
            }

        } catch (Exception e){
            e.printStackTrace();
        }finally {
            // 关闭流对象
            reader.close();
        }

        return dataList;
    }

}

实体类(根据具体需求修改)

public class LuceneManager implements Serializable {

    private static final long serialVersionUID = 1;
    private String id;
    private String sid;//随机id
    private String title;//标题
    private String desc;//文章
    private String time;//时间
    private String img;//图片url
    private String abs;//摘要
    private String type;//类型
    private String tableName;//表名存储，用于表区分
    private String remarks;

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getSid() {
        return sid;
    }

    public void setSid(String sid) {
        this.sid = sid;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getDesc() {
        return desc;
    }

    public void setDesc(String desc) {
        this.desc = desc;
    }

    public String getTime() {
        return time;
    }

    public void setTime(String time) {
        this.time = time;
    }

    public String getImg() {
        return img;
    }

    public void setImg(String img) {
        this.img = img;
    }

    public String getAbs() {
        return abs;
    }

    public void setAbs(String abs) {
        this.abs = abs;
    }

    public String getRemarks() {
        return remarks;
    }

    public void setRemarks(String remarks) {
        this.remarks = remarks;
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type;
    }

    public String getTableName() {
        return tableName;
    }

    public void setTableName(String tableName) {
        this.tableName = tableName;
    }
}

炑旊

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Lucene 全文索引

Lucene 全文索引pom.xml配置方法使用实体类(根据具体需求修改)pom.xml配置  <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</a
复制链接

扫一扫