lucene检索

最新推荐文章于 2024-04-28 19:45:27 发布

你猜我猜上青天

最新推荐文章于 2024-04-28 19:45:27 发布

阅读量52

点赞数

分类专栏：杂记文章标签：搜索引擎

原文链接：https://blog.csdn.net/jam_fanatic/article/details/82901728

版权

杂记专栏收录该内容

14 篇文章 0 订阅

订阅专栏

https://blog.csdn.net/jam_fanatic/article/details/82901728

package com.aaa;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import com.chenlb.mmseg4j.analysis.MMSegAnalyzer;

/**
 * 基于MMSegAnalyzer分词器,磁盘存储的搜索引擎
 *
 * @author Administrator
 *
 */
public class LuceneMmseg4jUtil {

    // 分词器
    Analyzer analyzer = new MMSegAnalyzer();
    // 存放索引的路径
    String direct = "D:\\index";

    /**
     * 创建索引并进行存储
     * @param title
     * @param content
     * @throws IOException
     */
    public void index(String title, String content) throws IOException {

        Directory directory = FSDirectory.open(Paths.get(direct));
        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        // 设置创建索引模式(在原来的索引的基础上创建或新增)
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        /**
         * 添加索引，在之前的索引基础上追加
         * iwConfig.setOpenMode(OpenMode.APPEND);
         * 创建索引，删除之前的索引
         * iwConfig.setOpenMode(OpenMode.CREATE);
         **/
        IndexWriter iwriter = new IndexWriter(directory, iwConfig);
        // 创建一个存储对象
        Document doc = new Document();
        // 添加字段
        doc.add(new TextField("title", title, Field.Store.YES));
        doc.add(new TextField("content", content, Field.Store.YES));
        // 新添加一个doc对象
        iwriter.addDocument(doc);
        // 创建的索引数目
        int numDocs = iwriter.numDocs();
        System.out.println("共索引了: " + numDocs + " 个对象");
        // 提交事务
        iwriter.commit();
        // 关闭事务
        iwriter.close();

    }

    /**
     * 高亮处理
     *
     * @param query
     * @param fieldName
     * @param fieldContent
     * @return
     * @throws IOException
     * @throws InvalidTokenOffsetsException
     */
    public String displayHtmlHighlight(Query query, String fieldName, String fieldContent)
            throws IOException, InvalidTokenOffsetsException {
        // 设置高亮标签,可以自定义,这里我用html将其显示为红色
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
        // 评分
        QueryScorer scorer = new QueryScorer(query);
        // 创建Fragmenter
        Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
        // 高亮分析器
        Highlighter highlight = new Highlighter(formatter, scorer);
        highlight.setTextFragmenter(fragmenter);
        // 调用高亮方法
        String str = highlight.getBestFragment(analyzer, fieldName, fieldContent);
        return str;
    }

    /**
     * 查询方法
     *
     * @param text
     * @return
     * @throws IOException
     * @throws ParseException
     * @throws InvalidTokenOffsetsException
     */
    public List<Map<String, Object>> search(String text)
            throws IOException, ParseException, InvalidTokenOffsetsException {

        // 得到存放索引的位置
        Directory directory = FSDirectory.open(Paths.get(direct));
        DirectoryReader ireader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(ireader);
        // 在content中进行搜索
        QueryParser parser = new QueryParser("content", analyzer);
        // 搜索含有text的内容
        Query query = parser.parse(text);
        // 搜索标题和显示条数(10)
        TopDocs tds = searcher.search(query, 10);

        List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
        Map<String, Object> map = null;

        // 在内容中查获找
        for (ScoreDoc sd : tds.scoreDocs) {
            // 获取title
            String title = searcher.doc(sd.doc).get("title");
            // 获取content
            String content = searcher.doc(sd.doc).get("content");
            // 内容添加高亮
            QueryParser qp = new QueryParser("content", analyzer);
            // 将匹配到的text添加高亮处理
            Query q = qp.parse(text);
            String html_content = displayHtmlHighlight(q, "content", content);

            map = new HashMap<String, Object>();
            map.put("title", title);
            map.put("content", html_content);
            list.add(map);
        }

        return list;
    }

    /**
     * 删除索引方法
     *
     * @param filed
     * @param keyWord
     * @throws IOException
     */
    public void delete(String filed, String keyWord) throws IOException {

        Directory directory = FSDirectory.open(Paths.get(direct));
        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        // 在原来的索引的基础上创建或新增
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        IndexWriter iwriter = new IndexWriter(directory, iwConfig);
        // 删除filed中含有keyWord的索引
        iwriter.deleteDocuments(new Term(filed, keyWord));
        // 提交事务
        iwriter.commit();
        // 关闭事务
        iwriter.close();

    }

    @Test
    // 创建索引测试
    public void indexTest() throws IOException {
        LuceneMmseg4jUtil lucene = new LuceneMmseg4jUtil();
        lucene.index("我的简介", "大家好,我叫小铭,我的专业是网络工程");
        lucene.index("我的专业", "我的专业是网络工程");
    }

    @Test
    // 查询测试
    public void searchTest() throws IOException, ParseException, InvalidTokenOffsetsException {
        LuceneMmseg4jUtil lucene = new LuceneMmseg4jUtil();
        List<Map<String, Object>> list = lucene.search("网络工程");
        for(Map<String,Object> map:list) {
            System.out.println(map);
        }
    }

    @Test
    // 删除索引测试
    public void deleteTest() throws IOException {
        LuceneMmseg4jUtil lucene = new LuceneMmseg4jUtil();
        lucene.delete("title", "简介");
    }

    public static void main(String[] args) throws IOException {
        LuceneMmseg4jUtil lucene = new LuceneMmseg4jUtil();
        lucene.index("测试", "大家好,我叫小铭,我的专业是网络工程");
        lucene.index("测试1", "我的专业是网络工程");
    }
}

 <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
<dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>7.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>7.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>7.2.1</version>
        </dependency>
        <dependency>
            <groupId>com.chenlb.mmseg4j</groupId>
            <artifactId>mmseg4j-solr</artifactId>
            <version>2.3.0</version>
        </dependency>

你猜我猜上青天

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene检索

https://blog.csdn.net/jam_fanatic/article/details/82901728package com.aaa;import java.io.IOException;import java.nio.file.Paths;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import org.apache.lucen
复制链接

扫一扫