用Lucene创建索引

最新推荐文章于 2022-05-28 16:27:58 发布

扑愣蛾子

最新推荐文章于 2022-05-28 16:27:58 发布

阅读量202

点赞数

分类专栏： lucene

本文链接：https://blog.csdn.net/u013129606/article/details/61672259

版权

lucene 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

创建maven项目pom.xml文件

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.pactera</groupId>
    <artifactId>pactera-lucene</artifactId>
    <version>0.0.1-SNAPSHOT</version>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.10</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.6.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>4.10.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>4.10.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>4.10.2</version>
        </dependency>
        <dependency>
            <groupId>cn.itcast.lucene.analyzer</groupId>
            <artifactId>ik-analyzer</artifactId>
            <version>2012-4.x</version>
        </dependency>
    </dependencies>
</project>

测试

//测试创建索引
    @Test
    public void testIndexWriter() throws IOException{
        //创建索引目录
        Directory directory = FSDirectory.open(new File("d:\\directory"));

        //创建标准分词器
        Analyzer analyzer = new StandardAnalyzer();
        //索引配置
        IndexWriterConfig indexWriterConfig = 
                new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
        indexWriterConfig.setOpenMode(OpenMode.CREATE);
        //写索引
        IndexWriter indexWriter = new IndexWriter(directory,indexWriterConfig);
        //创建文档对象
        Document doc = new Document();
        doc.add(new IntField("id", 18, Store.YES));
        doc.add(new TextField("title", "我们都是党的接班人yes or no?", Store.YES));
        doc.add(new LongField("price", 6388L, Store.YES));
        doc.add(new StringField("pic", "www.baidu.com", Store.YES));

        //添加文档
        indexWriter.addDocument(doc);
        indexWriter.commit();
        indexWriter.close();
    }

在指定索引目录下查看索引
这里写图片描述

这种文件可以通过两种方式查看
第一种使用工具
这里写图片描述

用工具打开指定的目录就可以看到是怎么创建索引的
这里写图片描述

使用标准分词器汉字按单个字全部被拆分了

用lucene提供的TokenStream查看

@Test
    public void testTokenStream() throws IOException{
        //创建标准分词器
        Analyzer analyzer = new StandardAnalyzer();
        //词汇列表
        TokenStream tokenStream = analyzer.tokenStream("title", "我们都是党的接班人yes or no?");

        //tokenStream指针指向开始位置
        tokenStream.reset();

        //设置分词偏移量引用
        OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);

        //设置分词词语引用
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

        //遍历词汇列表
        while(tokenStream.incrementToken()){
            //分词开始位置
            System.out.println("分词开始位置：" + offsetAttribute.startOffset());
            //分词词语
            System.out.println("最小分词单元：" + charTermAttribute);
            //分词结束位置
            System.out.println("分词结束位置：" + offsetAttribute.endOffset());
        }
    }