Lucene检索文件(txt,jsp,html)

Lucene检索文件包裹txt,jsp,html格式(如果是word和pdf格式需要进行格式转化)

建立索引文件的代码如下:

import org.apache.lucene.index.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.document.*;
import java.io.*;
/**
 * @author Eric Zhang
 */
public class IndexFiles {
 public static void main(String[] args) {
  try{
   IndexWriter writer = new IndexWriter("myindex", new StandardAnalyzer(), true);
    
   File files = new File("mydoc");
   String[] Fnamelist = files.list();
   for (int i = 0; i < Fnamelist.length; i++){
    File file = new File(files,Fnamelist[i]);
     
    Document doc = new Document();
    Field fld = Field.Text("path", file.getPath());
    doc.add(fld);
  
    fld = Field.Keyword("modified", DateField.timeToString(file.lastModified()));
    doc.add(fld);
      
    FileInputStream in = new FileInputStream(file);
    Reader reader = new BufferedReader(new InputStreamReader(in));
    fld = Field.Text("contents", reader);
    doc.add(fld);
   
    writer.addDocument(doc);
    System.out.println("Added : " + doc.get("path"));  
   }   
   writer.optimize();
   writer.close();
   System.out.println("Has Added Total: " + Fnamelist.length);
  }catch(Exception e){
   System.out.println(e);
  }
 }
}

 检索索引的java代码如下:

import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.search.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.document.*;
//import com.augmentum.hrms.*;
import java.util.Date;
/**
 * @author Eric Zhang
 */
public class SearchFile {
 public static void main(String[] args) {
  
  //XMap a = new XMap("");
  Analyzer anlzr = new StandardAnalyzer();
  try{
   Query q = QueryParser.parse("数据库", "contents", anlzr);
   System.out.println("Searching for : " + q.toString("contents"));
  
   Searcher serch = new IndexSearcher("myindex");
   Hits hts = serch.search(q);
   for(int i=0; i<hts.length(); i++){
    Document doc = hts.doc(i);
    String path = doc.get("path");
    System.out.println("Find: " +i+": "+ path);
    System.out.println("Find: " + doc.get("modified"));
    System.out.println("Find: " + doc.get("path"));
   }
   System.out.println("Find Total: " + hts.length());
  }catch(Exception e){
   System.out.println(e);
  }
 }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值