Lucene自带示例精简,只留下了主要代码。以备查看
对文件夹生成索引
- package zhch.illq.lucene;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import net.paoding.analysis.analyzer.PaodingAnalyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.DateTools;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- public class LuceneIndex {
- static final File INDEX_DIR = new File( "d:\\temp\\index" );
- // 主要代码 索引docDir文件夹下文档,索引文件在INDEX_DIR文件夹中
- public static void main(String[] args) {
- File docDir = new File( "d:\\temp\\neirong" );
- try {
- IndexWriter standardWriter = new IndexWriter(FSDirectory.open(INDEX_DIR), new StandardAnalyzer(
- Version.LUCENE_CURRENT), true , IndexWriter.MaxFieldLength.LIMITED);
- // 如果是索引中文内容,可以使用Paoding中文分词器
- IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new PaodingAnalyzer(), true ,
- IndexWriter.MaxFieldLength.LIMITED);
- String[] files = docDir.list();
- for (String fileStr : files) {
- File file = new File(docDir, fileStr);
- if (!file.isDirectory()) {
- writer.addDocument(document(file));
- }
- }
- writer.optimize();
- writer.close();
- } catch (IOException e) {
- System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
- }
- }
- public static Document document(File f) throws java.io.FileNotFoundException {
- Document doc = new Document();
- // 添加path,索引(可查询)但不切词
- doc.add(new Field( "path" , f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
- // 添加最后修改日期
- doc.add(new Field( "modified" , DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- // 添加内容,指定一个Reader,文件内容解析但不存储,这里的Reader使用系统默认的编码读入
- doc.add(new Field( "contents" , new FileReader(f)));
- return doc;
- }
- }
对索引进行查询
- package zhch.illq.lucene;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import net.paoding.analysis.analyzer.PaodingAnalyzer;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Searcher;
- import org.apache.lucene.search.TopScoreDocCollector;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- public class LuceneSearch {
- /** Simple command-line based search demo. */
- public static void main(String[] args) throws Exception {
- String index = "d:\\temp\\index" ;
- String field = "contents" ;
- String queries = null ;
- boolean raw = false ;
- // 要显示条数
- int hitsPerPage = 10 ;
- // searching, so read-only=true
- IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true ); // only
- Searcher searcher = new IndexSearcher(reader);
- Analyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
- // 如果是索引中文内容,可以使用Paoding中文分词器
- Analyzer analyzer = new PaodingAnalyzer();
- BufferedReader in = new BufferedReader( new InputStreamReader(System.in));
- QueryParser parser = new QueryParser(field, analyzer);
- while ( true ) {
- if (queries == null ) // prompt the user
- System.out.println("Enter query: " );
- String line = in.readLine();
- if (line == null || line.length() == - 1 )
- break ;
- line = line.trim();
- if (line.length() == 0 )
- break ;
- Query query = parser.parse(line);
- System.out.println("Searching for: " + query.toString(field));
- doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null );
- }
- reader.close();
- }
- public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw,
- boolean interactive) throws IOException {
- TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, false );
- searcher.search(query, collector);
- ScoreDoc[] hits = collector.topDocs().scoreDocs;
- int end, numTotalHits = collector.getTotalHits();
- System.out.println(numTotalHits + " total matching documents" );
- int start = 0 ;
- end = Math.min(hits.length, start + hitsPerPage);
- for ( int i = start; i < end; i++) {
- Document doc = searcher.doc(hits[i].doc);
- String path = doc.get("path" );
- if (path != null ) {
- System.out.println((i + 1 ) + ". " + path);
- System.out.println(" modified: " + doc.get( "modified" ));
- } else {
- System.out.println((i + 1 ) + ". " + "No path for this document" );
- }
- }
- }
- }