Lucene全文检索样例(解决大文本建索引)

建索引:

package  com.pccw;   
  
 import  java.io.BufferedReader;   
 import  java.io.File;   
 import  java.io.FileInputStream;   
 import  java.io.IOException;   
 import  java.io.InputStreamReader;   
 import  java.util.Date;   
  
 import  org.apache.lucene.analysis.Analyzer;   
 import  org.apache.lucene.analysis.standard.StandardAnalyzer;   
 import  org.apache.lucene.document.Document;   
 import  org.apache.lucene.document.Field;   
 import  org.apache.lucene.index.IndexWriter;   
  
 /** */ /**   
 * author Shane in PCCW
 *
  */   
 public   class  TextFileIndexer   {   
     public   static   void  main(String[] args)  throws  Exception   {   
         /**/ /*  指明要索引文件夹的位置,这里是C盘的S文件夹下  */   
        File fileDir  =   new  File( "c:\\s" );   
  
         /**/ /*  这里放索引文件的位置  */   
        File indexDir  =   new  File( "c:\\index" );   
        Analyzer luceneAnalyzer  =   new  StandardAnalyzer();   
        IndexWriter indexWriter  =   new  IndexWriter(indexDir, luceneAnalyzer,   
                 true );
        indexWriter.setMaxFieldLength(99999999);//增加内存域长度限制(非常重要)
        File[] textFiles  =  fileDir.listFiles();   
         long  startTime  =   new  Date().getTime();   
           
         // 增加document到索引去    
           for  ( int  i  =   0 ; i  <  textFiles.length; i ++ )   {   
             if  (textFiles[i].isFile()   
                     &&  textFiles[i].getName().endsWith( ".txt" ))   {   
                System.out.println( " File  "   +  textFiles[i].getCanonicalPath()   
                         +   " 正在被索引. " );   
                String temp  =  FileReaderAll(textFiles[i].getCanonicalPath(),   
                         " GBK " );   
                System.out.println(temp);   
                Document document  =   new  Document();   
                Field FieldPath  =   new  Field( " path " , textFiles[i].getPath(),   
                        Field.Store.YES, Field.Index.NO);   
                Field FieldBody  =   new  Field( " body " , temp, Field.Store.YES,   
                        Field.Index.TOKENIZED,   
                        Field.TermVector.WITH_POSITIONS_OFFSETS);   
                document.add(FieldPath);   
                document.add(FieldBody);   
                indexWriter.addDocument(document);   
            }    
        }    
         // optimize()方法是对索引进行优化    
         indexWriter.optimize();   
        indexWriter.close();   
           
         // 测试一下索引的时间    
          long  endTime  =   new  Date().getTime();   
        System.out   
                .println( " 这花费了 "   
                         +  (endTime  -  startTime)   
                         +   "  毫秒来把文档增加到索引里面去! "   
                         +  fileDir.getPath());   
    }    
  
     public   static  String FileReaderAll(String FileName, String charset)   
             throws  IOException   {   
        BufferedReader reader  =   new  BufferedReader( new  InputStreamReader(   
                 new  FileInputStream(FileName), charset));   
        String line  =   new  String();   
        String temp  =   new  String();   
           
         while  ((line  =  reader.readLine())  !=   null )   {   
            temp  +=  line + "\n";   
        }    
        reader.close();   
         return  temp;   
    }    
}   

查询:

package  com.pccw;   
  
 import  java.io.IOException;   
  
 import  org.apache.lucene.analysis.Analyzer;   
 import  org.apache.lucene.analysis.standard.StandardAnalyzer;   
 import  org.apache.lucene.queryParser.ParseException;   
 import  org.apache.lucene.queryParser.QueryParser;   
 import  org.apache.lucene.search.Hits;   
 import  org.apache.lucene.search.IndexSearcher;   
 import  org.apache.lucene.search.Query;   
  
 public   class  TestQuery   {   
     public   static   void  main(String[] args)  throws  IOException, ParseException   {   
        Hits hits  =   null ;   
        String queryString  =   "中华" ;   
        Query query  =   null ;   
        IndexSearcher searcher  =   new  IndexSearcher( " c:\\index " );   
  
        Analyzer analyzer  =   new  StandardAnalyzer();   
         try    {   
            QueryParser qp  =   new  QueryParser( " body " , analyzer);   
            query  =  qp.parse(queryString);   
        }   catch  (ParseException e)   {   
        }    
         if  (searcher  !=   null )   {   
            hits  =  searcher.search(query);   
             if  (hits.length()  >   0 )   {   
                System.out.println( " 找到: "   +  hits.length()  +   "  个结果! " );   
            }    
        }    
    }  
  
}   

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值