lucene

最新推荐文章于 2024-06-05 09:00:00 发布

_Snails_

最新推荐文章于 2024-06-05 09:00:00 发布

阅读量388

点赞数

分类专栏： java 文章标签： lucene

本文链接：https://blog.csdn.net/sima64/article/details/48225679

版权

java 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

lucene基本使用:

/**
 * @throws Exception
 * 使用 IndexWriter 对数据建立索引。
 */
@Test
public void testIndexWriter() throws Exception{
    //存放索引的位置。
    File path = new File("dir");
    Directory directory = FSDirectory.open(path);

    //lucene 当前使用的版本。
    Version matchVersion = Version.LUCENE_45;
    //分词器，对文本进行分词。。。
    Analyzer analyzer = new StandardAnalyzer(matchVersion);
    //索引写入的配置。
    IndexWriterConfig indexConfig = new IndexWriterConfig(matchVersion, analyzer);

    //构建用于操作索引的类。。
    IndexWriter indexWriter = new IndexWriter(directory, indexConfig);
    /**
     * 通过IndexWriter 来创建索引，
     * 索引库里面的数据遵守一定的结构（索引结构） Document
     * 
     * 索引 Document 里面也有很多的字段，
     *      1）字段的名称。
     *      2）字段的值。
     *      3）该字段在索引库当中是否存储。
     */
    Document doc = new Document();
    IndexableField dir = new IntField("dir", 1, Store.YES);
    IndexableField title = new StringField("title", "精通java", Store.YES);
    IndexableField content = new TextField("content", "比较精通java，你呢，你喜欢java吗？", Store.YES);
    doc.add(dir);
    doc.add(title);
    doc.add(content);
    indexWriter.addDocument(doc);
    indexWriter.close();
}
/**
 * @throws IOException
 * 使用 IndexSearcher 对数据进行搜索。
 */
@Test
public void testIndexSearcher() throws IOException{
    //索引存储的位置。
    Directory directory = FSDirectory.open(new File("dir"));
    IndexReader indexReader = DirectoryReader.open(directory);

    //通过 IndexSearcher 去检索目录。
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);

    //检索条件，通过定义条件来进行检索。。
    Query query = new TermQuery(new Term("content","java"));

    //检索先检索目录。。。找到符合 Query 条件的前面的 n 条记录。
    TopDocs topDocs = indexSearcher.search(query, 22);
    System.out.println("查询到的记录数："+ topDocs.totalHits);

    //搜索的记录放在数组里面。
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    for(ScoreDoc scoreDoc : scoreDocs){
        //docID 相当于 目录的页码。
        int docID = scoreDoc.doc;
        //根据页码找到相应的记录。
        Document document = indexReader.document(docID);
        System.out.println(document.get("dir"));
    }
    indexReader.close();
}

删除索引记录:

/**
 * @param fieldName
 * @param fieldValue
 * 根据传入的 fieldName 和 fieldValue 删除对应的索引记录
 */
public void delIndex(String fieldName,String fieldValue) throws IOException{
    IndexWriter indexWriter = LuceneUtils.getIndexWriter();
    Term term = new Term(fieldName, fieldValue);
    indexWriter.deleteDocuments(term);
    indexWriter.close();
}

更新索引:

/**
 * @param fieldName
 * @param fieldValue
 * @param article
 * 在修改 索引的时候，是先删除原来的索引，再添加需要修改的索引。
 */
public void update(String fieldName,String fieldValue,Article article) throws IOException{
    IndexWriter indexWriter = LuceneUtils.getIndexWriter();
    Term term = new Term(fieldName, fieldValue);
    Document doc = ArticleUtils.article2Document(article);
    indexWriter.updateDocument(term, doc);
    indexWriter.close();
}

查询索引并进行分页:

/**
 * @param keyWords
 * 根据关键字 keyWords 查询索引。
 * @param start
 * @param rows
 * 分页: 需要添加两个变量，start：开始的编号，rows：每页显示的数量
 */
public List<Article> findIndex(String keyWords,int start,int rows) throws Exception{
    IndexSearcher indexSearcher = LuceneUtils.getIndexSearcher();
    //根据content 和 title 来进行查询，
    String [] fields = {"content","title"};
    /**
     * MultiFieldQueryParser:另外一种 【查询条件】,该类 会将查询的关键字转化为单个的字。
     *  比如说：恐怖片会转换为：
     *    (content:恐 content:怖 content:片) (title:恐 title:怖 title:片)
     */
    MultiFieldQueryParser queryParser = new MultiFieldQueryParser(LuceneUtils.getMatchVersion(), fields, LuceneUtils.getAnalyzer());
    Query query = queryParser.parse(keyWords);

    //由于要进行分页显示，则不需要显示全部的数据，只需要显示所需要的数量就可以了。。。
    TopDocs topDocs = indexSearcher.search(query, start + rows);

    Article article = null;
    List<Article> articles = new ArrayList<Article>();
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    /**
     * 由于查到的记录数可能和需要显示的数量不一样，故应该取两者的最小值。
     */
    int resultEnd = Math.min(scoreDocs.length, rows+start);
    for(int i = 0 ; i<resultEnd ; i++){
        int docID = scoreDocs[i].doc;
        Document document = indexSearcher.doc(docID);
        article = ArticleUtils.document2Article(document);
        articles.add(article);
    }
    return articles;
}

查询的几种方法:

public static void main(String[] args) throws Exception {
    //1 根据 termQuery 进行查询。
    Query query = new TermQuery(new Term("title", ""));

    //2 根据 MultiFieldQueryParser 来进行查询。字符串搜索，会将所有的字符床进行拆分。
    String [] fields = {};
    QueryParser queryParser = 
            new MultiFieldQueryParser(LuceneUtils.getMatchVersion(),fields,LuceneUtils.getAnalyzer());
    query = queryParser.parse("");

    //3 查询所有的记录。
    query = new MatchAllDocsQuery();

    //4 查询某个范围内的记录，用来代替过滤最好，可以提高性能。
    query = NumericRangeQuery.newIntRange("id", 1, 10, true, false);

    //5 通配符查询，WildcardQuery ？ 代表一个字符，* 代表任意个字符。
    query = new WildcardQuery(new Term("author", "司?"));

    /*
     * 6 模糊查询，
     *      需要查询的条件
     *      最大可编辑数，可以取 0.1.2 ,即是可以错几个字符。
     */
    query = new FuzzyQuery(new Term("author", "司马大侠黄"),2);
    testQuery(query);
    //7.boolean类型的查询，可以将多种查询条件进行组合。
    BooleanQuery booleanQuery = new BooleanQuery();
    //表示必须满足query的条件。
    booleanQuery.add(query, Occur.MUST);
}

对查询的结果进行排序:

@Test
public void testSort() throws Exception{

    Directory dir = FSDirectory.open(new File(LuceneConstants.PATH));
    IOContext context = new IOContext();

    Directory directory = new RAMDirectory(dir,context);
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);

    String [] fields = {"title"};
    QueryParser queryParser = new MultiFieldQueryParser(LuceneUtils.getMatchVersion(), fields, LuceneUtils.getAnalyzer());

    Query query = queryParser.parse("恐怖");

    /*
     * 对查询的结果进行排序
     * 可以指明使用哪个字段进行排序。
     *      true：表示降序排列。
     */
    SortField field = new SortField("id", Type.INT,true);
    Sort sort = new Sort(field);
    TopDocs topDocs = indexSearcher.search(query, 100,sort);

    System.out.println(topDocs.totalHits);
    for(ScoreDoc scoreDoc : topDocs.scoreDocs){
        Document document = indexSearcher.doc(scoreDoc.doc);
        System.out.println("id : " + document.get("id"));
    }

}

索引库的优化:

/**
 * @author ronnie
 *  索引库的优化：
 *      
 */
public class TestOptimise {
    /**
     * @throws IOException
     * 1.可以通过indexWriterConfig 这个对象来进行优化。
     *      1）.在lucene4.0之后,的版本会针对索引进行自动优化.
     *      2).需要修改几个配置.
     * 2.可以排除停用词，被分词器过滤掉，也就不会建立索引，在创建索引的时候速度就会更快。
     * 
     * 3.将索引进行分类。
     */
    @Test
    public void testOptimise() throws IOException{
        Directory directory = FSDirectory.open(new File(LuceneConstants.PATH));
        IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.getMatchVersion(), LuceneUtils.getAnalyzer());

        //MergePolicy:设置合并规则。
        LogDocMergePolicy mergePolicy = new LogDocMergePolicy();

        /*
         * mergeFactor:
         *      当这个值越小，在创建索引的时候使用的内存较少，搜索时速度更快，创建索引的时候速度更慢。
         *      当这个值越大，在创建索引的时候使用的内存较大，搜索时速度更慢，创建索引的速度更快。
         *      smaller value:[2,10]
         */
        mergePolicy.setMergeFactor(3);
        config.setMergePolicy(mergePolicy);
        IndexWriter indexWriter = new IndexWriter(directory, config);
        indexWriter.close();
    }
    /**
     * @throws IOException 
     *  4.将索引放在内存里面。
     */
    @Test
    public void testOptimise2() throws IOException, ParseException{ 
        //索引放在硬盘上。
        Directory dir = FSDirectory.open(new File(LuceneConstants.PATH));
        IOContext context = new IOContext();

        //索引放在内存中。从硬盘中进行加载索引到内存中。
        Directory directory = new RAMDirectory(dir, context);
        IndexReader indexReader = DirectoryReader.open(directory);  
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        String [] fields = {"title"};
        QueryParser queryParser = new MultiFieldQueryParser(LuceneUtils.getMatchVersion(), fields, LuceneUtils.getAnalyzer());
        Query query = queryParser.parse("恐怖");
        TopDocs topDocs = indexSearcher.search(query, 100);
    }
}