lucene的简单应用

最新推荐文章于 2018-09-27 21:29:24 发布
july_young
最新推荐文章于 2018-09-27 21:29:24 发布
阅读量129
点赞数
分类专栏： java
java 专栏收录该内容
73 篇文章 0 订阅
订阅专栏
添加相关包的引用：

以上jar包可以在lucene4.0中找到。以下是测试代码，直接运行看结果。

package lucene;

import java.io.File;
import java.io.StringReader;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class MyFirstLucene {
    // 创建索引
    @Test
    public void testIndex() throws Exception {
        // 新建一个索引库（我放在D盘某文件夹内）
        Directory directory = FSDirectory.open(new File("F:\\index"));
        // 新建分析器对象
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
        // 新建配置对象
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        // 创建一个IndexWriter对象（参数一个索引库，一个配置）
        IndexWriter indexWriter = new IndexWriter(directory, config);
        // 创建域对象
        File f = new File("F:\\666");
        File[] list = f.listFiles();
        for (File file : list) {
            // 创建一个文档对象
            Document document = new Document();
            // 文件名称
            String file_name = file.getName();
            Field fileNameField = new TextField("fileName", file_name, Store.YES);
            // 文件大小
            long file_size = FileUtils.sizeOf(file);
            Field fileSizeField = new LongField("fileSize", file_size, Store.YES);
            // 文件路径
            String file_path = file.getPath();
            Field filePathField = new StoredField("filePath", file_path);
            // 文件内容
            String file_content = FileUtils.readFileToString(file);
            Field fileContentField = new TextField("fileContent", file_content, Store.YES);

            // 添加到document
            document.add(fileNameField);
            document.add(fileSizeField);
            document.add(filePathField);
            document.add(fileContentField);

            // 创建索引
            indexWriter.addDocument(document);
        }

        // 关闭资源
        indexWriter.close();

    }
    @Test
    public void testSearch() throws Exception {
        // 第一步：创建一个Directory对象，也就是索引库存放的位置。
        Directory directory = FSDirectory.open(new File("F:\\index"));
        // 第二步：创建一个indexReader对象，需要指定Directory对象。
        IndexReader indexReader = DirectoryReader.open(directory);
        // 第三步：创建一个indexSearcher对象，需要指定IndexReader对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        // 第四步：创建一个TermQuery对象，指定查询的域和查询的关键词。
        Query query = new TermQuery(new Term("fileName", "spring"));
        // 第五步：执行查询（显示条数）
        TopDocs topDocs = indexSearcher.search(query, 10);
        // 第六步：返回查询结果。遍历查询结果并输出。
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            // 文件名称
            String fileName = document.get("fileName");
            System.out.println(fileName);
            // 文件内容
            String fileContent = document.get("fileContent");
            System.out.println(fileContent);
            // 文件大小
            String fileSize = document.get("fileSize");
            System.out.println(fileSize);
            // 文件路径
            String filePath = document.get("filePath");
            System.out.println(filePath);
            System.out.println("------------");
        }
        // 第七步：关闭IndexReader对象
        indexReader.close();

    }

    // 查看分析器的分词效果
    @Test
    public void testTokenStream() throws Exception {
        // 创建一个分析器对象
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);// 获得tokenStream对象
        // 第一个参数：域名，可以随便给一个
        // 第二个参数：要分析的文本内容
        TokenStream tokenStream = analyzer.tokenStream("test", new StringReader("高富帅可以用二维表结构来逻辑表达实现的数据"));
        // 添加一个引用，可以获得每个关键词
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        // 添加一个偏移量的引用，记录了关键词的开始位置以及结束位置
        OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
        // 将指针调整到列表的头部
        tokenStream.reset();
        // 遍历关键词列表，通过incrementToken方法判断列表是否结束
        while (tokenStream.incrementToken()) {
            // 关键词的起始位置
            System.out.println("start->" + offsetAttribute.startOffset());
            // 取关键词
            System.out.println(charTermAttribute);
            // 结束位置
            System.out.println("end->" + offsetAttribute.endOffset());
        }
        tokenStream.close();
    }

    // 查看分析器的分词效果
    @Test
    public void testTokenStream2() throws Exception {
        Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_40);
        TokenStream tokenStream = analyzer.tokenStream("test",new StringReader(
                "When it comes to a secure website and passwords it is all in your hands to create a password that a hacker simply cannot crack. However, this will require that you be creative and use everything at your fingertips to create the strongest password possible for a secure web site.\n" +
                        "Tip #1 - Use All Spaces\n" +
                        "No matter how many characters are available for your password you should be sure to use every one of them. The more characters available for your password and the more you use makes it that much harder to figure out the combination. Always make use of all characters available for a strong and secure password"));
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            System.out.println("start->" + offsetAttribute.startOffset());
            System.out.println(charTermAttribute);
            System.out.println("end->" + offsetAttribute.endOffset());
        }
        tokenStream.close();
    }
}
july_young
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
lucene的简单应用

添加相关包的引用：以上jar包可以在lucene4.0中找到。以下是测试代码，直接运行看结果。package lucene;import java.io.File;import java.io.StringReader;import org.apache.commons.io.FileUtils;import org.apache.lucene.analysis.Analyz...
复制链接

扫一扫
专栏目录