Lucene6.1检索引擎

最新推荐文章于 2024-11-14 20:02:43 发布

Chris-Green

最新推荐文章于 2024-11-14 20:02:43 发布

阅读量172

点赞数

分类专栏： Lucene 文章标签： lucene

Lucene 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

1、导入依赖包：

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-core</artifactId>
    <version>6.1.0</version>
</dependency>

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-queryparser</artifactId>
    <version>6.1.0</version>
</dependency>

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-analyzers-common</artifactId>
    <version>6.1.0</version>
</dependency>

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-queries</artifactId>
    <version>6.1.0</version>
</dependency>

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-highlighter</artifactId>
    <version>6.1.0</version>
</dependency>

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-analyzers-smartcn</artifactId>
    <version>6.1.0</version>
</dependency>

2、需求：检索F:/data路径下的文件（关键字"啊啊啊"）

public class LuceneTest {
    public static void main(String[] args) {
        Index newIndex = new Index();
        newIndex.index();
        Search newSearch = new Search();
        newSearch.search("啊啊啊");
    }

    static class Index {
        //创建索引写入对象
        IndexWriter indexWriter = null;

        // 建立索引
        public void index() {
            try {
                // 1、创建Directory, 索引存放的位置
                Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/index"));
                // 2、创建创建lucene的分词器，主要用于进行分词，比如识别你好，中国，甚至一些以前没有，但现在出先的词
                Analyzer analyzer = new StandardAnalyzer();
                //创建索引写入配置
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
                indexWriter = new IndexWriter(directory, indexWriterConfig);
                indexWriter.deleteAll();//清除以前的index
                //要搜索的File路径
                File dFile = new File("F:/data");
                File[] files = dFile.listFiles();
                for (File file : files) {
                    // 3、创建Document对象, 存储索引
                    Document document = new Document();
                    // 4、为Document添加Field
                    // 第三个参数是FieldType 但是定义在TextField中作为静态变量，看API也不好知道怎么写
                    document.add(new Field("content", new FileReader(file), TextField.TYPE_NOT_STORED));
                    document.add(new Field("filename", file.getName(), TextField.TYPE_STORED));
                    document.add(new Field("filepath", file.getAbsolutePath(), TextField.TYPE_STORED));

                    // 5、通过IndexWriter添加文档到索引中
                    indexWriter.addDocument(document);
                }
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                try {
                    if (indexWriter != null) {
                        indexWriter.close();
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    /**
     * 创建Directory
     * 创建IndexReader
     * 根据IndexReader创建IndexSearch
     * 创建搜索的Query
     * 根据searcher搜索并且返回TopDocs
     * 根据TopDocs获取ScoreDoc对象
     * 根据searcher和ScoreDoc对象获取具体的Document对象
     * 根据Document对象获取需要的值
     */
    static class Search {
        /**
         * 搜索
         */
        public void search(String keyWord) {
            DirectoryReader directoryReader = null;
            try {
                // 1、创建Directory, 索引存放的位置
                Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/index"));
                // 2、创建IndexReader
                directoryReader = DirectoryReader.open(directory);
                // 3、根据IndexReader创建IndexSearch
                IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

                // 4、创建搜索的Query
                Analyzer analyzer = new StandardAnalyzer();
                // 创建parser来确定要搜索文件的内容，第一个参数为搜索的域
                QueryParser queryParser = new QueryParser("content", analyzer);
                // 创建Query表示搜索域为content包含UIMA的文档
                Query query = queryParser.parse(keyWord);

                // 5、根据searcher搜索并且返回TopDocs
                TopDocs topDocs = indexSearcher.search(query, 10);//查询符合query条件的前n个记录
                System.out.println("查找到的文档总共有：" + topDocs.totalHits);

                // 6、根据TopDocs获取ScoreDoc对象
                ScoreDoc[] scoreDocs = topDocs.scoreDocs;
                for (ScoreDoc scoreDoc : scoreDocs) {

                    // 7、根据searcher和ScoreDoc对象获取具体的Document对象
                    Document document = indexSearcher.doc(scoreDoc.doc);

                    // 8、根据Document对象获取需要的值
                    System.out.println(document.get("filename") + " " + document.get("filepath"));
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    if (directoryReader != null) {
                        directoryReader.close();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    }

}