- Indexer.java
- package lucene.main.test;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.Date;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriter.MaxFieldLength;
- public class Indexer{
- /*
- * @param args
- * @throws Exception
- * @author rush
- */
- public static void main(String[] args) throws Exception{
- File indexDir=new File("D://lucene//temp//Text_Index_Path");//存放索引目录
- File dataDir=new File("D://lucene//temp//TextPath//");//被索引目录
- long start=new Date().getTime();
- int numIndexed=index(indexDir,dataDir);
- long end=new Date().getTime();
- System.out.println("Indexing "+numIndexed+" file took "+(end-start)+" milliseconds");
- System.out.println(numIndexed);
- }
- private static int index(File indexDir, File dataDir) throws IOException {
- // TODO Auto-generated method stub
- if(!dataDir.exists()||!dataDir.isDirectory())
- {
- throw new IOException(dataDir+"does not exit or is not a directory");
- }
- IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true,MaxFieldLength.LIMITED);
- writer.setUseCompoundFile(false);//默认创建复合式索引,false:多文件索引 true 复合式索引
- indexDirectory(writer,dataDir);
- int numIndexed=writer.maxDoc();//返回在指定索引中
- writer.optimize();//优化
- writer.close();
- return numIndexed;
- }
- private static void indexDirectory(IndexWriter writer, File dir) throws IOException {
- // TODO Auto-generated method stub
- File[] files=dir.listFiles();//返回抽象路径名数组,这些路径名表示此抽象路径名表示的目录中的文件和目录。
- for(int i=0;i<files.length;i++)
- {
- File file=files[i];
- if(file.isDirectory())
- {
- indexDirectory(writer,file);
- }else if(file.getName().endsWith(".txt")){
- indexFile(writer,file);
- }
- }
- }
- private static void indexFile(IndexWriter writer, File file) throws IOException {
- // TODO Auto-generated method stub
- if(file.isHidden()||!file.canRead()||!file.exists())
- {
- return;
- }
- //getCanonicalPath()返回文件路径名的规范形式
- System.out.println("Indexing "+file.getCanonicalPath());
- Document doc=new Document();
- Field contents=new Field("contents",new FileReader(file));
- Field filename=new Field("filename",file.getCanonicalPath(),Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.NO);
- doc.add(contents);
- doc.add(filename);
- writer.addDocument(doc);
- }
- }
- Searcher.java
- package lucene.main.test;
- import java.io.File;
- import java.io.IOException;
- import java.util.Date;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocCollector;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- /*
- * @param args
- * @throws Exception
- * @author rush
- */
- public class Searcher {
- public static void main(String[] args) throws Exception{
- File indexDir=new File("D://lucene//temp//Text_Index_Path");
- String keywords="love";
- if(!indexDir.exists()||!indexDir.isDirectory())
- {
- throw new Exception(indexDir+"does not exits or is not a directory.");
- }
- search(indexDir,keywords);
- }
- private static void search(File indexDir, String keywords) throws IOException, ParseException {
- // TODO Auto-generated method stub
- Directory fsDir=FSDirectory.getDirectory(indexDir);//获取索引目录
- IndexSearcher searcher=new IndexSearcher(fsDir);//创建索引查找器
- //创建解析器(查询字符串的解析),指定域名和分析器
- QueryParser qp=new QueryParser("contents",new StandardAnalyzer());
- //指定查找关键字
- Query query=qp.parse(keywords);
- //指定返回的结果集数
- int hitsPerPage=100;
- TopDocCollector collector=new TopDocCollector(hitsPerPage);
- long start=new Date().getTime();
- //开始查找,传入查找的关键字参数和返回结果数参数
- searcher.search(query,collector);
- ScoreDoc[] hits=collector.topDocs().scoreDocs;
- long end=new Date().getTime();
- System.out.println("Found "+hits.length+" documents in "+(end-start)+" milliseconds that matched query '"+keywords+"':");
- for(int i=0;i<hits.length;i++){
- int docId=hits[i].doc;
- Document doc=searcher.doc(docId);
- System.out.println(docId+":"+doc.get("filename"));
- }
- }
- }