lucene初步

最新推荐文章于 2024-11-10 16:26:55 发布

goodsun00

最新推荐文章于 2024-11-10 16:26:55 发布

阅读量387

点赞数

文章标签： lucene import string query file null

本文链接：https://blog.csdn.net/goodsun00/article/details/6322066

版权

采用2.4版本

package com.hxs;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

/** */ /**
* author lighter date 2006-8-7
*/
public   class TextFileIndexer {
     public   static   void main(String[] args) throws Exception {
         /**/ /* 指明要索引文件夹的位置,这里是C盘的S文件夹下 */
        File fileDir =   new File( "E://opt" );

         /**/ /* 这里放索引文件的位置 */
        File indexDir =   new File( "E://opt" );
        Analyzer luceneAnalyzer =   new StandardAnalyzer();
        IndexWriter indexWriter =   new IndexWriter(indexDir, luceneAnalyzer,
                 true );
        File[] textFiles = fileDir.listFiles();
         long startTime =   new Date().getTime();

         // 增加document到索引去
         for ( int i =   0 ; i < textFiles.length; i ++ ) {
             if (textFiles[i].isFile()
                     && textFiles[i].getName().endsWith( ".txt" )) {
                System.out.println( "File"   + textFiles[i].getCanonicalPath()
                         +   "正在被索引 ." );
                String temp = FileReaderAll(textFiles[i].getCanonicalPath(),
                         "GBK" );
                System.out.println(temp);
                Document document =   new Document();
                Field FieldPath =   new Field( "path" , textFiles[i].getPath(),
                        Field.Store.YES, Field.Index.NO);
                Field FieldBody =   new Field( "body" , temp, Field.Store.YES,
                        Field.Index.TOKENIZED,
                        Field.TermVector.WITH_POSITIONS_OFFSETS);
                document.add(FieldPath);
                document.add(FieldBody);
                indexWriter.addDocument(document);
            }
        }
         // optimize()方法是对索引进行优化
        indexWriter.optimize();
        indexWriter.close();

         // 测试一下索引的时间
         long endTime =   new Date().getTime();
        System.out
                .println( " 这花费了 "
                         + (endTime - startTime)
                         +   " 毫秒来把文档增加到索引里面去! "
                         + fileDir.getPath());
    }

     public   static String FileReaderAll(String FileName, String charset)
             throws IOException {
        BufferedReader reader =   new BufferedReader( new InputStreamReader(
                 new FileInputStream(FileName), charset));
        String line =   new String();
        String temp =   new String();

         while ((line = reader.readLine()) !=   null ) {
            temp += line;
        }
        reader.close();
         return temp;
    }
}

package com.hxs;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

public   class TestQuery {
     public   static   void main(String[] args) throws IOException, ParseException {
        Hits hits =   null ;
        String queryString =   "人民" ;
        Query query =   null ;
        IndexSearcher searcher =   new IndexSearcher( "E://opt" );

        Analyzer analyzer =   new StandardAnalyzer();
         try   {
            QueryParser qp =   new QueryParser( "body" , analyzer);
            query = qp.parse(queryString);
        }   catch (ParseException e) {
        }
         if (searcher !=   null ) {
            hits = searcher.search(query);
             if (hits.length() >   0 ) {
                System.out.println( " 找到: "   + hits.length() +   " 个结果! " );
            }
        }
    }

}