Lucene 入门和简单封装

最新推荐文章于 2024-05-24 09:52:48 发布

wanhf11

最新推荐文章于 2024-05-24 09:52:48 发布

阅读量909

点赞数 1

分类专栏：全文检索

本文链接：https://blog.csdn.net/qq_17612199/article/details/50812000

版权

全文检索专栏收录该内容

8 篇文章 0 订阅

订阅专栏

package com.whf.demo;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

/**
 * document(包含多个Field) Field(一个属性) Analyzer(分词处理) IndexWriter(创建索引)
 * Directory(索引的存储的位置,FSDirectory和RAMDirectory) Term(String field->String
 * 待检索的关键词)
 * 
 * @author whf
 * 
 */
public class LuceneManager {

    private volatile static LuceneManager singleton = null;
    private volatile static IndexWriter writer = null;
    private volatile static IndexReader reader = null;
    private volatile static IndexSearcher searcher = null;

    private final Lock writerLock = new ReentrantLock();
    private final static Object obj = new Object();
    private static Version version = Version.LUCENE_CURRENT;

    /**
     * 单例构造函数
     * 
     * @return
     */
    private LuceneManager() {
    }

    /**
     * 获取LuceneTools单例实例（obj同步）
     * 
     * @return
     */
    public static LuceneManager getInstance() {
        if (null == singleton) {
            synchronized (obj) {
                if (null == singleton) {
                    singleton = new LuceneManager();
                }
            }
        }
        return singleton;
    }

    /**
     * 获取IndexWriter单例实例（writerLock同步） 写单例
     * 
     * @param dir
     * @param config
     * @return
     */
    public IndexWriter getIndexWriter(Directory dir, IndexWriterConfig config) {
        if (dir == null)
            throw new IllegalArgumentException("Directory can not be null.");
        if (config == null)
            throw new IllegalArgumentException(
                    "IndexWriterConfig can not be null.");
        try {
            writerLock.lock();
            if (writer == null) {
                if (IndexWriter.isLocked(dir)) {
                    throw new LockObtainFailedException(
                            "Directory of index had been locked.");
                }
                writer = new IndexWriter(dir, config);
            }
        } catch (LockObtainFailedException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            writerLock.unlock();
        }
        return writer;
    }

    /**
     * 获取IndexReader对象 写多例
     * 
     * @param dir
     * @param enableNRTReader
     *            是否开启NRTReader
     * @return
     */
    public IndexReader getIndexReader(Directory dir, boolean enableNRTReader) {
        if (dir == null)
            throw new IllegalArgumentException("Directory can not be null.");
        try {
            if (reader == null) {
                reader = DirectoryReader.open(dir);
            } else {
                if (enableNRTReader && reader instanceof DirectoryReader) {
                    // 开启近实时Reader,能立即看到动态添加/删除的索引变化
                    reader = DirectoryReader
                            .openIfChanged((DirectoryReader) reader);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return reader;
    }

    /**
     * 获取IndexReader对象(默认不启用NETReader)
     * 
     * @param dir
     * @return
     */
    public IndexReader getIndexReader(Directory dir) {
        return getIndexReader(dir, false);
    }

    /**
     * 获取IndexSearcher对象
     * 
     * @param reader
     *            IndexReader对象实例
     * @param executor
     *            如果你需要开启多线程查询，请提供ExecutorService对象参数
     * @return
     */
    public IndexSearcher getIndexSearcher(IndexReader reader,
            ExecutorService executor) {
        if (reader == null)
            throw new IllegalArgumentException(
                    "The indexReader can not be null.");
        if (searcher == null)
            searcher = new IndexSearcher(reader);
        return searcher;
    }

    /**
     * 获取IndexSearcher对象(不支持多线程查询)
     * 
     * @param reader
     *            IndexReader对象实例
     * @return
     */
    public IndexSearcher getIndexSearcher(IndexReader reader) {
        return getIndexSearcher(reader, null);
    }

    /**
     * 创建QueryParser对象 QueryParser parser=new QueryParser("字段名称","分析器实例"); Query
     * q=parser.parse("关键词");
     * parser.setDefaultOperator(QueryParser.Opertator.AND);
     * 同时含有多个关键字，如果是QueryParser.Opertator.OR表示或者 IndexSearcher searcher=new
     * IndexSearcher(reader); Hits hit=searcher.search(q);
     * 
     * @param field
     * @param analyzer
     * @return
     */
    public static QueryParser createQueryParser(String field, Analyzer analyzer) {
        return new QueryParser(field, analyzer);
    }

    /**
     * 创建QueryParser对象 QueryParser parser=new QueryParser("字段名称","分析器实例"); Query
     * q=parser.parse("关键词");
     * parser.setDefaultOperator(QueryParser.Opertator.AND);
     * 同时含有多个关键字，如果是QueryParser.Opertator.OR表示或者 IndexSearcher searcher=new
     * IndexSearcher(reader); Hits hit=searcher.search(q);
     * 
     * @param field
     * @param analyzer
     * @return
     */
    public static QueryParser createMultiFieldQueryParser(String[] fields,
            Analyzer analyzer) {
        return new MultiFieldQueryParser(fields, analyzer);
    }

    /**
     * 关闭IndexWriter
     * 
     * @param writer
     */
    public static void closeIndexWriter(IndexWriter writer) {
        if (writer != null) {
            try {
                writer.close();
                writer = null;
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 关闭IndexReader
     * 
     * @param reader
     */
    public static void closeIndexReader(IndexReader reader) {
        if (reader != null) {
            try {
                reader.close();
                reader = null;
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 关闭IndexReader和IndexWriter
     * 
     * @param reader
     * @param writer
     */
    public static void closeAll(IndexReader reader, IndexWriter writer) {
        closeIndexReader(reader);
        closeIndexWriter(writer);
    }
}

package com.whf.demo;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.whf.pojo.HighlighterParam;
import com.whf.pojo.Page;

public class LuceneUtil {

    private static final LuceneManager manager = LuceneManager.getInstance();
    private static Analyzer analyzer = new IKAnalyzer(true);
    private static Version version = Version.LUCENE_CURRENT;
    private static FSDirectory fsDirectory = null;
    private static RAMDirectory ramDirectory = null;

    /**
     * 打开索引目录
     * 
     * @param luceneDir
     * @return
     * @throws IOException
     */
    public static FSDirectory openFSDirectory(String luceneDir) {
        if (fsDirectory == null)
            try {
                File dir = new File(luceneDir);
                if (!dir.exists())
                    dir.mkdirs();
                fsDirectory = FSDirectory.open(Paths.get(luceneDir));
                // 注意：isLocked方法内部会试图去获取Lock,
                // 如果获取到Lock，会关闭它，否则return false表示索引目录没有被锁.
                // 这也就是为什么unlock方法被从IndexWriter类中移除的原因
                IndexWriter.isLocked(fsDirectory);
            } catch (IOException e) {
                e.printStackTrace();
            }
        return fsDirectory;
    }

    /**
     * 打开内存目录
     * 
     * @param luceneDir
     * @return
     * @throws IOException
     */
    public static RAMDirectory openRAMDirectory() {
        if (ramDirectory == null)
            return new RAMDirectory();
        else
            return ramDirectory;
    }

    /**
     * 关闭索引目录并销毁
     * 
     * @param directory
     * @throws IOException
     */
    public static void closeDirectory(Directory directory) {
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            directory = null;
        }
    }

    /**
     * 关闭IndexReader
     * 
     * @param reader
     */
    public static void closeIndexReader(IndexReader reader) {
        if (reader != null) {
            try {
                reader.close();
                reader = null;
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 关闭IndexWriter
     * 
     * @param writer
     */
    public static void closeIndexWriter(IndexWriter writer) {
        if (writer != null) {
            try {
                writer.close();
                writer = null;
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 关闭IndexReader和IndexWriter
     * 
     * @param reader
     * @param writer
     */
    public static void closeAll(IndexReader reader, IndexWriter writer) {
        closeIndexReader(reader);
        closeIndexWriter(writer);
    }

    /**
     * 获取IndexWriter
     * 
     * @param dir
     * @param config
     * @return
     */
    public static IndexWriter getIndexWrtier(Directory dir,
            IndexWriterConfig config) {
        return manager.getIndexWriter(dir, config);
    }

    /**
     * 获取IndexWriter
     * 
     * @param dir
     * @param config
     * @return
     */
    public static IndexWriter getIndexWrtier(String directoryPath,
            IndexWriterConfig config) {
        FSDirectory directory = openFSDirectory(directoryPath);
        return manager.getIndexWriter(directory, config);
    }

    /**
     * 获取IndexReader
     * 
     * @param dir
     * @param enableNRTReader
     *            是否开启NRTReader
     * @return
     */
    public static IndexReader getIndexReader(Directory dir,
            boolean enableNRTReader) {
        return manager.getIndexReader(dir, enableNRTReader);
    }

    /**
     * 获取IndexReader(默认不启用NRTReader)
     * 
     * @param dir
     * @return
     */
    public static IndexReader getIndexReader(Directory dir) {
        return manager.getIndexReader(dir);
    }

    /**
     * 获取IndexSearcher
     * 
     * @param reader
     *            IndexReader对象
     * @param executor
     *            如果你需要开启多线程查询，请提供ExecutorService对象参数
     * @return
     */
    public static IndexSearcher getIndexSearcher(IndexReader reader,
            ExecutorService executor) {
        return manager.getIndexSearcher(reader, executor);
    }

    /**
     * 获取IndexSearcher(不支持多线程查询)
     * 
     * @param reader
     *            IndexReader对象
     * @return
     */
    public static IndexSearcher getIndexSearcher(IndexReader reader) {
        return manager.getIndexSearcher(reader);
    }

    /**
     * 删除索引[注意：请自己关闭IndexWriter对象]
     * 
     * @param writer
     * @param field
     * @param value
     */
    public static void deleteIndex(IndexWriter writer, String field,
            String keyword) {
        try {
            writer.deleteDocuments(new Term[] { new Term(field, keyword) });
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 删除索引[注意：请自己关闭IndexWriter对象]
     * 
     * @param writer
     * @param term
     */
    public static void deleteIndexs(IndexWriter writer, Term[] terms) {
        try {
            writer.deleteDocuments(terms);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 删除索引[注意：请自己关闭IndexWriter对象]
     * 
     * @param writer
     * @param field
     * @param value
     */
    public static void deleteIndex(IndexWriter writer, Term term) {
        try {
            writer.deleteDocuments(new Term[] { term });
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 批量删除索引[注意：请自己关闭IndexWriter对象]
     * 
     * @param writer
     * @param querys
     */
    public static void deleteIndexs(IndexWriter writer, Query[] querys) {
        try {
            writer.deleteDocuments(querys);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 删除索引[注意：请自己关闭IndexWriter对象]
     * 
     * @param writer
     * @param query
     */
    public static void deleteIndex(IndexWriter writer, Query query) {
        try {
            writer.deleteDocuments(query);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 删除所有索引文档
     * 
     * @param writer
     */
    public static void deleteAllIndex(IndexWriter writer) {
        try {
            writer.deleteAll();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * Term term = new Term("id","1234567");
     * 先去索引文件里查找id为1234567的Doc,如果有就更新它(如果有多条，最后更新后只有一条)。如果没有就新增.
     * 数据库更新的时候，我们可以只针对某个列来更新，而lucene只能针对一行数据更新。
     * 
     * @param writer
     * @param term
     * @param document
     */
    public static void updateIndex(IndexWriter writer, Term term,
            Document document) {
        try {
            writer.updateDocument(term, document);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 添加索引文档
     * 
     * @param writer
     * @param doc
     */
    public static void addIndex(IndexWriter writer, Document document) {
        updateIndex(writer, null, document);
        // writer.addDocument(document);
    }

    /**
     * 批量添加索引文档
     * 
     * @param writer
     * @param doc
     */
    public static void addIndex(IndexWriter writer, List<Document> documents) {
        try {
            writer.addDocuments(documents);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 将内存的数据写至外存
     * 
     * @param writer
     * @param ramDirectory
     * @param fsramDirectory
     * @param analyzer
     */
    public static void OptimizeRAMToFSDirectory(IndexWriter writer,
            Directory fsDirectory, Analyzer analyzer) {
        try {
            writer.addIndexes(new Directory[] { fsDirectory });
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 索引文档查询
     * 
     * @param searcher
     * @param query
     * @param sort
     * @return
     */
    public static List<Document> query(IndexSearcher searcher, Query query,
            Integer rows, Sort sort, Filter filter) {
        TopDocs topDocs = null;
        List<Document> docList = null;
        try {
            if (sort != null) {
                if (filter != null) {
                    topDocs = searcher.search(query, filter,
                            (rows == null ? Integer.MAX_VALUE : rows), sort);
                } else {
                    topDocs = searcher.search(query,
                            (rows == null ? Integer.MAX_VALUE : rows), sort);
                }
            } else {
                if (filter != null) {
                    topDocs = searcher.search(query, filter,
                            (rows == null ? Integer.MAX_VALUE : rows));
                } else {
                    topDocs = searcher.search(query,
                            (rows == null ? Integer.MAX_VALUE : rows));
                }
            }
            ScoreDoc[] scores = topDocs.scoreDocs;
            int length = scores.length;
            if (length <= 0) {
                return Collections.emptyList();
            }
            docList = new ArrayList<Document>();
            for (int i = 0; i < length; i++) {
                System.out.println(scores[i].score + " " + scores[i].doc);
                Document doc = searcher.doc(scores[i].doc);
                docList.add(doc);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return docList;
    }

    /**
     * 返回索引文档的总数[注意：请自己手动关闭IndexReader]
     * 
     * @param reader
     * @return
     */
    public static int getIndexTotalCount(IndexReader reader) {
        return reader.numDocs();
    }

    /**
     * 返回索引文档中最大文档ID[注意：请自己手动关闭IndexReader]
     * 
     * @param reader
     * @return
     */
    public static int getMaxDocId(IndexReader reader) {
        return reader.maxDoc();
    }

    /**
     * 返回已经删除尚未提交的文档总数[注意：请自己手动关闭IndexReader]
     * 
     * @param reader
     * @return
     */
    public static int getDeletedDocNum(IndexReader reader) {
        return getMaxDocId(reader) - getIndexTotalCount(reader);
    }

    /**
     * 根据docId查询索引文档
     * 
     * @param reader
     *            IndexReader对象
     * @param docID
     *            documentId
     * @param fieldsToLoad
     *            需要返回的field
     * @return
     */
    public static Document findDocumentByDocId(IndexReader reader, int docID,
            Set<String> fieldsToLoad) {
        try {
            return reader.document(docID, fieldsToLoad);
        } catch (IOException e) {
            return null;
        }
    }

    /**
     * 根据docId查询索引文档
     * 
     * @param reader
     *            IndexReader对象
     * @param docID
     *            documentId
     * @return
     */
    public static Document findDocumentByDocId(IndexReader reader, int docID) {
        return findDocumentByDocId(reader, docID, null);
    }

    /**
     * @Title: createHighlighter
     * @Description: 创建高亮器
     * @param query
     *            索引查询对象
     * @param prefix
     *            高亮前缀字符串
     * @param stuffix
     *            高亮后缀字符串
     * @param fragmenterLength
     *            摘要最大长度
     * @return
     */
    public static Highlighter createHighlighter(Query query, String prefix,
            String stuffix, int fragmenterLength) {
        Formatter formatter = new SimpleHTMLFormatter((prefix == null || prefix
                .trim().length() == 0) ? "<font color=\"red\">" : prefix,
                (stuffix == null || stuffix.trim().length() == 0) ? "</font>"
                        : stuffix);
        Scorer fragmentScorer = new QueryScorer(query);
        Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
        Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50
                : fragmenterLength);
        highlighter.setTextFragmenter(fragmenter);
        return highlighter;
    }

    /**
     * @Title: highlight
     * @Description: 生成高亮文本
     * @param document
     *            索引文档对象
     * @param highlighter
     *            高亮器
     * @param analyzer
     *            索引分词器
     * @param field
     *            高亮字段
     * @return
     * @throws IOException
     * @throws InvalidTokenOffsetsException
     */
    public static String highlight(Document document, Highlighter highlighter,
            Analyzer analyzer, String field) throws IOException {
        List<IndexableField> list = document.getFields();
        for (IndexableField fieldable : list) {
            String fieldValue = fieldable.stringValue();
            if (fieldable.name().equals(field)) {
                try {
                    fieldValue = highlighter.getBestFragment(analyzer, field,
                            fieldValue);
                } catch (InvalidTokenOffsetsException e) {
                    fieldValue = fieldable.stringValue();
                }
                return (fieldValue == null || fieldValue.trim().length() == 0) ? fieldable
                        .stringValue() : fieldValue;
            }
        }
        return null;
    }

    /**
     * @Title: searchTotalRecord
     * @Description: 获取符合条件的总记录数
     * @param query
     * @return
     * @throws IOException
     */
    public static int searchTotalRecord(IndexSearcher search, Query query) {
        ScoreDoc[] docs = null;
        try {
            TopDocs topDocs = search.search(query, Integer.MAX_VALUE);
            if (topDocs == null || topDocs.scoreDocs == null
                    || topDocs.scoreDocs.length == 0) {
                return 0;
            }
            docs = topDocs.scoreDocs;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return docs.length;
    }

    /**
     * @Title: pageQuery
     * @Description: Lucene分页查询
     * @param searcher
     * @param query
     * @param page
     * @throws IOException
     */
    public static void pageQuery(IndexSearcher searcher, Directory directory,
            Query query, Page<Document> page) {
        int totalRecord = searchTotalRecord(searcher, query);
        // 设置总记录数
        page.setTotalRecord(totalRecord);
        TopDocs topDocs = null;
        try {
            topDocs = searcher.searchAfter(page.getAfterDoc(), query,
                    page.getPageSize());
        } catch (IOException e) {
            e.printStackTrace();
        }
        List<Document> docList = new ArrayList<Document>();
        ScoreDoc[] docs = topDocs.scoreDocs;
        int index = 0;
        for (ScoreDoc scoreDoc : docs) {
            int docID = scoreDoc.doc;
            Document document = null;
            try {
                document = searcher.doc(docID);
            } catch (IOException e) {
                e.printStackTrace();
            }
            if (index == docs.length - 1) {
                page.setAfterDoc(scoreDoc);
                page.setAfterDocId(docID);
            }
            docList.add(document);
            index++;
        }
        page.setItems(docList);
        closeIndexReader(searcher.getIndexReader());
    }

    /**
     * @Title: pageQuery
     * @Description: 分页查询[如果设置了高亮,则会更新索引文档]
     * @param searcher
     * @param directory
     * @param query
     * @param page
     * @param highlighterParam
     * @param writerConfig
     * @throws IOException
     */
    public static void pageQuery(IndexSearcher searcher, Directory directory,
            Query query, Page<Document> page,
            HighlighterParam highlighterParam, IndexWriterConfig writerConfig)
            throws IOException {
        IndexWriter writer = null;
        // 若未设置高亮
        if (null == highlighterParam || !highlighterParam.isHighlight()) {
            pageQuery(searcher, directory, query, page);
        } else {
            int totalRecord = searchTotalRecord(searcher, query);
            System.out.println("totalRecord:" + totalRecord);
            // 设置总记录数
            page.setTotalRecord(totalRecord);
            TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(), query,
                    page.getPageSize());
            List<Document> docList = new ArrayList<Document>();
            ScoreDoc[] docs = topDocs.scoreDocs;
            int index = 0;
            writer = getIndexWrtier(directory, writerConfig);
            for (ScoreDoc scoreDoc : docs) {
                int docID = scoreDoc.doc;
                Document document = searcher.doc(docID);
                String content = document.get(highlighterParam.getFieldName());
                if (null != content && content.trim().length() > 0) {
                    // 创建高亮器
                    Highlighter highlighter = LuceneUtil.createHighlighter(
                            query, highlighterParam.getPrefix(),
                            highlighterParam.getStuffix(),
                            highlighterParam.getFragmenterLength());
                    String text = highlight(document, highlighter, analyzer,
                            highlighterParam.getFieldName());
                    // 若高亮后跟原始文本不相同，表示高亮成功
                    if (!text.equals(content)) {
                        Document tempdocument = new Document();
                        List<IndexableField> indexableFieldList = document
                                .getFields();
                        if (null != indexableFieldList
                                && indexableFieldList.size() > 0) {
                            for (IndexableField field : indexableFieldList) {
                                if (field.name().equals(
                                        highlighterParam.getFieldName())) {
                                    tempdocument.add(new TextField(
                                            field.name(), text, Store.YES));
                                } else {
                                    tempdocument.add(field);
                                }
                            }
                        }
                        updateIndex(writer,
                                new Term(highlighterParam.getFieldName(),
                                        content), tempdocument);
                        document = tempdocument;
                    }
                }
                if (index == docs.length - 1) {
                    page.setAfterDoc(scoreDoc);
                    page.setAfterDocId(docID);
                }
                docList.add(document);
                index++;
            }
            page.setItems(docList);
        }
        closeIndexReader(searcher.getIndexReader());
        closeIndexWriter(writer);
    }

    /**
     * 创建QueryParser
     * 
     * @return
     */
    public static QueryParser createCustomQueryParser(String field,
            Analyzer analyzer) {
        return manager.createQueryParser(field, analyzer);
    }

    /**
     * 创建多field的QueryParser
     * 
     * @param fields
     * @param analyzer
     * @return
     */
    public static QueryParser createMultiFieldQueryParser(String[] fields,
            Analyzer analyzer) {
        return manager.createMultiFieldQueryParser(fields, analyzer);
    }

    /**
     * 可以在查询的时候获取TermVector
     * 
     * @return
     */
    public static Field storeVectorTextField(String name, String value,
            Store store) {
        FieldType type = new FieldType();
        type.setStored(true);
        type.setStoreTermVectors(true);
        type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        return new Field(name, value, type);
    }

}

package com.whf.pojo;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.CustomScoreProvider;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.search.Query;

public class MyCountQuery extends CustomScoreQuery{

    private String field=null;

    private class MyCountQueryScoreProvider extends CustomScoreProvider{

        private String field=null;

        public MyCountQueryScoreProvider(LeafReaderContext context) {
            super(context);
        }

        public MyCountQueryScoreProvider(LeafReaderContext context, String field) {
            super(context);
            this.field = field;
        }

        @Override
        public float customScore(int arg0, float arg1, float[] arg2)
                throws IOException {
            IndexReader reader = context.reader();
            Terms tv = reader.getTermVector(arg0, field);
            TermsEnum termsEnum = null;
            int numTerms = 0;
            if (tv != null) {
                termsEnum = tv.iterator();
                while ((termsEnum.next()) != null) {
                    numTerms++;
                }
            }
            return (float) (numTerms);
        }

    }

    public MyCountQuery(Query subQuery) {
        super(subQuery);
    }

    public MyCountQuery(Query subQuery, String field) {
        super(subQuery);
        this.field = field;
    }

    @Override
    protected CustomScoreProvider getCustomScoreProvider(
            LeafReaderContext context) throws IOException {
        return new MyCountQueryScoreProvider(context, this.field);
    }

}

package com.whf.pojo;

/**
 * @ClassName: HighlighterParam
 * @Description: 高亮器参数对象
 * @author Lanxiaowei
 * @date 2014-3-30 下午12:22:08
 */
public class HighlighterParam {
    /** 是否需要设置高亮 */
    private boolean highlight;
    /** 需要设置高亮的属性名 */
    private String fieldName;
    /** 高亮前缀 */
    private String prefix;
    /** 高亮后缀 */
    private String stuffix;
    /** 显示摘要最大长度 */
    private int fragmenterLength;

    public boolean isHighlight() {
        return highlight;
    }

    public void setHighlight(boolean highlight) {
        this.highlight = highlight;
    }

    public String getFieldName() {
        return fieldName;
    }

    public void setFieldName(String fieldName) {
        this.fieldName = fieldName;
    }

    public String getPrefix() {
        return prefix;
    }

    public void setPrefix(String prefix) {
        this.prefix = prefix;
    }

    public String getStuffix() {
        return stuffix;
    }

    public void setStuffix(String stuffix) {
        this.stuffix = stuffix;
    }

    public int getFragmenterLength() {
        return fragmenterLength;
    }

    public void setFragmenterLength(int fragmenterLength) {
        this.fragmenterLength = fragmenterLength;
    }

    public HighlighterParam(boolean highlight, String fieldName, String prefix,
            String stuffix, int fragmenterLength) {
        this.highlight = highlight;
        this.fieldName = fieldName;
        this.prefix = prefix;
        this.stuffix = stuffix;
        this.fragmenterLength = fragmenterLength;
    }

    public HighlighterParam(boolean highlight, String fieldName,
            int fragmenterLength) {
        this.highlight = highlight;
        this.fieldName = fieldName;
        this.fragmenterLength = fragmenterLength;
    }

    public HighlighterParam(boolean highlight, String fieldName, String prefix,
            String stuffix) {
        this.highlight = highlight;
        this.fieldName = fieldName;
        this.prefix = prefix;
        this.stuffix = stuffix;
    }

    public HighlighterParam() {
    }
}

package com.whf.pojo;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.CustomScoreProvider;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.search.Query;

public class MyCountQuery extends CustomScoreQuery{

    private String field=null;

    private class MyCountQueryScoreProvider extends CustomScoreProvider{

        private String field=null;

        public MyCountQueryScoreProvider(LeafReaderContext context) {
            super(context);
        }

        public MyCountQueryScoreProvider(LeafReaderContext context, String field) {
            super(context);
            this.field = field;
        }

        @Override
        public float customScore(int arg0, float arg1, float[] arg2)
                throws IOException {
            IndexReader reader = context.reader();
            Terms tv = reader.getTermVector(arg0, field);
            TermsEnum termsEnum = null;
            int numTerms = 0;
            if (tv != null) {
                termsEnum = tv.iterator();
                while ((termsEnum.next()) != null) {
                    numTerms++;
                }
            }
            return (float) (numTerms);
        }

    }

    public MyCountQuery(Query subQuery) {
        super(subQuery);
    }

    public MyCountQuery(Query subQuery, String field) {
        super(subQuery);
        this.field = field;
    }

    @Override
    protected CustomScoreProvider getCustomScoreProvider(
            LeafReaderContext context) throws IOException {
        return new MyCountQueryScoreProvider(context, this.field);
    }

}

package com.whf.pojo;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.ScoreDoc;

public class Page<T> {
    /** 当前第几页(从1开始计算) */
    private int currentPage;
    /** 每页显示几条 */
    private int pageSize;
    /** 总记录数 */
    private int totalRecord;
    /** 总页数 */
    private int totalPage;
    /** 分页数据集合[用泛型T来限定集合元素类型] */
    private Collection<T> items;
    /** 当前显示起始索引(从零开始计算) */
    private int startIndex;
    /** 当前显示结束索引(从零开始计算) */
    private int endIndex;
    /** 一组最多显示几个页码[比如Google一组最多显示10个页码] */
    private int groupSize;
    /** 左边偏移量 */
    private int leftOffset = 5;
    /** 右边偏移量 */
    private int rightOffset = 4;
    /** 当前页码范围 */
    private String[] pageRange;
    /** 分页数据 */
    private List<Document> docList;
    /** 上一页最后一个ScoreDoc对象 */
    private ScoreDoc afterDoc;
    /** 上一页最后一个ScoreDoc对象的Document对象ID */
    private int afterDocId;

    public void setRangeIndex() {
        int groupSize = getGroupSize();
        int totalPage = getTotalPage();
        if (totalPage < 2) {
            startIndex = 0;
            endIndex = totalPage - startIndex;
        } else {
            int currentPage = getCurrentPage();
            if (groupSize >= totalPage) {
                startIndex = 0;
                endIndex = totalPage - startIndex - 1;
            } else {
                int leftOffset = getLeftOffset();
                int middleOffset = getMiddleOffset();
                if (-1 == middleOffset) {
                    startIndex = 0;
                    endIndex = groupSize - 1;
                } else if (currentPage <= leftOffset) {
                    startIndex = 0;
                    endIndex = groupSize - 1;
                } else {
                    startIndex = currentPage - leftOffset - 1;
                    if (currentPage + rightOffset > totalPage) {
                        endIndex = totalPage - 1;
                    } else {
                        endIndex = currentPage + rightOffset - 1;
                    }
                }
            }
        }
    }

    public int getCurrentPage() {
        if (currentPage <= 0) {
            currentPage = 1;
        } else {
            int totalPage = getTotalPage();
            if (totalPage > 0 && currentPage > getTotalPage()) {
                currentPage = totalPage;
            }
        }
        return currentPage;
    }

    public void setCurrentPage(int currentPage) {
        this.currentPage = currentPage;
    }

    public int getPageSize() {
        if (pageSize <= 0) {
            pageSize = 10;
        }
        return pageSize;
    }

    public void setPageSize(int pageSize) {
        this.pageSize = pageSize;
    }

    public int getTotalRecord() {
        return totalRecord;
    }

    public void setTotalRecord(int totalRecord) {
        this.totalRecord = totalRecord;
    }

    public int getTotalPage() {
        int totalRecord = getTotalRecord();
        if (totalRecord == 0) {
            totalPage = 0;
        } else {
            int pageSize = getPageSize();
            totalPage = totalRecord % pageSize == 0 ? totalRecord / pageSize
                    : (totalRecord / pageSize) + 1;
        }
        return totalPage;
    }

    public void setTotalPage(int totalPage) {
        this.totalPage = totalPage;
    }

    public int getStartIndex() {
        return startIndex;
    }

    public void setStartIndex(int startIndex) {
        this.startIndex = startIndex;
    }

    public int getEndIndex() {
        return endIndex;
    }

    public void setEndIndex(int endIndex) {
        this.endIndex = endIndex;
    }

    public int getGroupSize() {
        if (groupSize <= 0) {
            groupSize = 10;
        }
        return groupSize;
    }

    public void setGroupSize(int groupSize) {
        this.groupSize = groupSize;
    }

    public int getLeftOffset() {
        leftOffset = getGroupSize() / 2;
        return leftOffset;
    }

    public void setLeftOffset(int leftOffset) {
        this.leftOffset = leftOffset;
    }

    public int getRightOffset() {
        int groupSize = getGroupSize();
        if (groupSize % 2 == 0) {
            rightOffset = (groupSize / 2) - 1;
        } else {
            rightOffset = groupSize / 2;
        }
        return rightOffset;
    }

    public void setRightOffset(int rightOffset) {
        this.rightOffset = rightOffset;
    }

    /** 中心位置索引[从1开始计算] */
    public int getMiddleOffset() {
        int groupSize = getGroupSize();
        int totalPage = getTotalPage();
        if (groupSize >= totalPage) {
            return -1;
        }
        return getLeftOffset() + 1;
    }

    public String[] getPageRange() {
        setRangeIndex();
        int size = endIndex - startIndex + 1;
        if (size <= 0) {
            return new String[0];
        }
        if (totalPage == 1) {
            return new String[] { "1" };
        }
        pageRange = new String[size];
        for (int i = 0; i < size; i++) {
            pageRange[i] = (startIndex + i + 1) + "";
        }
        return pageRange;
    }

    public void setPageRange(String[] pageRange) {
        this.pageRange = pageRange;
    }

    public Collection<T> getItems() {
        return items;
    }

    public void setItems(Collection<T> items) {
        this.items = items;
    }

    public List<Document> getDocList() {
        return docList;
    }

    public void setDocList(List<Document> docList) {
        this.docList = docList;
    }

    public ScoreDoc getAfterDoc() {
        setAfterDocId(afterDocId);
        return afterDoc;
    }

    public void setAfterDoc(ScoreDoc afterDoc) {
        this.afterDoc = afterDoc;
    }

    public int getAfterDocId() {
        return afterDocId;
    }

    public void setAfterDocId(int afterDocId) {
        this.afterDocId = afterDocId;
        if (null == afterDoc) {
            this.afterDoc = new ScoreDoc(afterDocId, 1.0f);
        }
    }

    public Page() {
    }

    public Page(int currentPage, int pageSize) {
        this.currentPage = currentPage;
        this.pageSize = pageSize;
    }

    public Page(int currentPage, int pageSize, Collection<T> items) {
        this.currentPage = currentPage;
        this.pageSize = pageSize;
        this.items = items;
    }

    public Page(int currentPage, int pageSize, Collection<T> items,
            int groupSize) {
        this.currentPage = currentPage;
        this.pageSize = pageSize;
        this.items = items;
        this.groupSize = groupSize;
    }

    public Page(int currentPage, int pageSize, int groupSize, int afterDocId) {
        this.currentPage = currentPage;
        this.pageSize = pageSize;
        this.groupSize = groupSize;
        this.afterDocId = afterDocId;
    }

    public static void main(String[] args) {
        Collection<Integer> items = new ArrayList<Integer>();
        int totalRecord = 201;
        for (int i = 0; i < totalRecord; i++) {
            items.add(new Integer(i));
        }
        Page<Integer> page = new Page<Integer>(1, 10, items, 10);
        page.setTotalRecord(totalRecord);
        int totalPage = page.getTotalPage();
        for (int i = 0; i < totalPage; i++) {
            page.setCurrentPage(i + 1);
            String[] pageRange = page.getPageRange();
            System.out.println("当前第" + page.currentPage + "页");
            for (int j = 0; j < pageRange.length; j++) {
                System.out.print(pageRange[j] + "  ");
            }
            System.out.println("\n");
        }
    }
}

package com.whf.demo;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.search.suggest.document.SuggestField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import com.whf.pojo.MyCountQuery;

public class Main {

    public static final String path = "F://index_data";

    /**
     * 优化：
     * @param args
     * @throws IOException
     * @throws ParseException
     */
    public static void main(String[] args) throws IOException, ParseException {

//      Directory directory = LuceneUtil.openFSDirectory(path);
//      Analyzer analyzer = new StandardAnalyzer();
//      IndexWriterConfig config = new IndexWriterConfig(
//              analyzer);
//      //有效提高效率，下次插入生效，默认16M
//      config.setRAMBufferSizeMB(256);
//      //缓存中文档的数目，不足时写入磁盘
//      config.setMaxBufferedDocs(200);

//      IndexWriter writer = LuceneUtil.getIndexWrtier(directory, config);
//      try {
//          
//          Document doc1 = new Document();
//          doc1.add(LuceneUtil.storeVectorTextField("name", "this is a demo", Store.YES));
//          doc1.add(LuceneUtil.storeVectorTextField("desc", "whf", Store.YES));
//          
//          Document doc2 = new Document();
//          doc2.add(LuceneUtil.storeVectorTextField("name", "this is a test", Store.YES));
//          doc2.add(LuceneUtil.storeVectorTextField("desc", "smx", Store.YES));
//          
            writer.updateDocument(new Term("name", "very"), doc);
//          writer.addDocument(doc1);
//          writer.addDocument(doc2);
//          //强制合并段，提高检索速度
//          writer.forceMerge(5);
//      } catch (Exception e) {
//          e.printStackTrace();
//      } finally {
//          writer.commit();
//          writer.close();
//      }
//      IndexReader reader = LuceneUtil.getIndexReader(directory);
//      IndexSearcher searcher = LuceneUtil.getIndexSearcher(reader);
//      QueryParser parser = new MultiFieldQueryParser(new String[] { "name", "desc" }, analyzer);
//      Query query = new MatchAllDocsQuery();
//      Query myquery=new MyCountQuery(query, "name");
//      TopDocs tops = searcher.search(myquery, 100);
//      System.out.println("*****************");
        System.out.println(tops.totalHits);
//      ScoreDoc scoreDoc[] = tops.scoreDocs;
//      for (int i = 0; i < scoreDoc.length; i++) {
//          System.out.println(scoreDoc[i].doc);
            Document doc = searcher.doc(scoreDoc[i].doc);
            System.out.println(scoreDoc[i].score);
            System.out.println(doc.getFields());
            for(IndexableField field:doc){
                System.out.println(field.name()+" "+field.stringValue());
            }
//      }


    }

    // TermQuery、BooleanQuery、PhraseQuery、PrefixQuery、
    // RangeQuery、MultiTermQuery、FilteredQuery、SpanQuery
}

wanhf11

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Lucene 入门和简单封装

管理package com.whf.demo;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.Collections;import java.util.List;import java.util.Set;import java.util.concurren
复制链接

扫一扫

专栏目录