基于java的全文检索接口lucene2.4测试一

最新推荐文章于 2024-07-09 06:12:17 发布

lwlsoftware

最新推荐文章于 2024-07-09 06:12:17 发布

阅读量678

点赞数

分类专栏：全文检索-搜索引擎-lucene 文章标签：全文检索 lucene java 测试 file string

本文链接：https://blog.csdn.net/lwlsoftware/article/details/3679993

版权

全文检索-搜索引擎-lucene 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

 
 Indexer.java
package lucene.main.test;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
public class Indexer{
 /*
  * @param args
  * @throws Exception
  * @author rush 
  */
 public static void main(String[] args) throws Exception{
  File indexDir=new File("D://lucene//temp//Text_Index_Path");//存放索引目录  
  File dataDir=new File("D://lucene//temp//TextPath//");//被索引目录 
  
  long start=new Date().getTime();
  int numIndexed=index(indexDir,dataDir);
  long end=new Date().getTime();
  
  System.out.println("Indexing "+numIndexed+" file took "+(end-start)+" milliseconds");
  System.out.println(numIndexed);
  
  
 }
 private static int index(File indexDir, File dataDir) throws IOException {
  // TODO Auto-generated method stub 
  if(!dataDir.exists()||!dataDir.isDirectory())
  {
   throw new IOException(dataDir+"does not exit or is not a directory");
  }
  
  IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true,MaxFieldLength.LIMITED);
  writer.setUseCompoundFile(false);//默认创建复合式索引,false:多文件索引 true 复合式索引 
  indexDirectory(writer,dataDir);
  
  int numIndexed=writer.maxDoc();//返回在指定索引中 
  writer.optimize();//优化 
  writer.close();
  return numIndexed;
 }
 private static void indexDirectory(IndexWriter writer, File dir) throws IOException {
  // TODO Auto-generated method stub 
  File[] files=dir.listFiles();//返回抽象路径名数组，这些路径名表示此抽象路径名表示的目录中的文件和目录。 
  
  for(int i=0;i<files.length;i++)
  {
   File file=files[i];
   if(file.isDirectory())
   {
    indexDirectory(writer,file);
   }else if(file.getName().endsWith(".txt")){
    indexFile(writer,file);
   }
  }
  
 }
 private static void indexFile(IndexWriter writer, File file) throws IOException {
  // TODO Auto-generated method stub 
  if(file.isHidden()||!file.canRead()||!file.exists())
  {
   return;
  }
  //getCanonicalPath()返回文件路径名的规范形式 
  System.out.println("Indexing "+file.getCanonicalPath());
  
  Document doc=new Document();
  
  Field contents=new Field("contents",new FileReader(file));
  Field filename=new Field("filename",file.getCanonicalPath(),Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.NO);
  doc.add(contents);
  doc.add(filename);
  writer.addDocument(doc);
 }
}
Searcher.java
package lucene.main.test;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/*
 * @param args
 * @throws Exception
 * @author rush 
 */
public class Searcher {
  public static void main(String[] args) throws Exception{
   File indexDir=new File("D://lucene//temp//Text_Index_Path");
   String keywords="love";
   
   if(!indexDir.exists()||!indexDir.isDirectory())
   {
    throw new Exception(indexDir+"does not exits or is not a directory.");
   }
   
   search(indexDir,keywords);
  }
private static void search(File indexDir, String keywords) throws IOException, ParseException {
 // TODO Auto-generated method stub 
 Directory fsDir=FSDirectory.getDirectory(indexDir);//获取索引目录 
 IndexSearcher searcher=new IndexSearcher(fsDir);//创建索引查找器 
 //创建解析器(查询字符串的解析),指定域名和分析器 
 QueryParser qp=new QueryParser("contents",new StandardAnalyzer());
 //指定查找关键字 
 Query query=qp.parse(keywords);
 //指定返回的结果集数 
 int hitsPerPage=100;
 TopDocCollector collector=new TopDocCollector(hitsPerPage);
 long start=new Date().getTime();
 //开始查找，传入查找的关键字参数和返回结果数参数 
 searcher.search(query,collector);
 ScoreDoc[] hits=collector.topDocs().scoreDocs;
 long end=new Date().getTime();
 System.out.println("Found "+hits.length+" documents in "+(end-start)+" milliseconds that matched query '"+keywords+"':");
 for(int i=0;i<hits.length;i++){
  int docId=hits[i].doc;
  Document doc=searcher.doc(docId);
  System.out.println(docId+":"+doc.get("filename"));
 }
}
}
 

lwlsoftware

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
基于java的全文检索接口lucene2.4测试一

Indexer.javapackage lucene.main.test;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.util.Date;import org.apache.lucene.analysis.standard.S
复制链接

扫一扫