1.lucene创建索引和搜索,主要用到一下几个类,IndexWriter,Document,Analyzer;IndexSearcher,QueryParser,Query,TopDocs,
2.通过FSDirectory和RAMDirectory的并用,可以提高速度。先把磁盘上的索引文件载入内存,然后在内存操作,免去了IO操作,可以提高效率,最后退出时,要把内存操作的结果保存在磁盘上。
3.fsIndexWriter.optimize();优化索引文件,把多个cfs文件合并成一个
4.建立索引和进行搜索时应该使用同一个分词器。
简单的例子(代码如下):
1 package utils; 2 3 import java.io.File; 4 import java.io.FileReader; 5 import java.io.IOException; 6 import java.io.Reader; 7 8 import org.apache.lucene.analysis.standard.StandardAnalyzer; 9 import org.apache.lucene.document.Document; 10 import org.apache.lucene.document.Field; 11 import org.apache.lucene.index.IndexWriter; 12 13 public class Indexer { 14 15 public int index(String indexDir, String dataDir) throws IOException 16 { 17 File indexDirFile = new File(indexDir); 18 File dataDirFile = new File(dataDir); 19 int numIndexed = index(indexDirFile, dataDirFile); 20 return 0; 21 } 22 23 private int index(File indexDirFile, File dataDirFile) throws IOException { 24 if(!dataDirFile.exists() || !dataDirFile.isDirectory()) 25 { 26 throw new IOException(dataDirFile + " does not exist or is not a directory"); 27 } 28 IndexWriter writer = new IndexWriter(indexDirFile, new StandardAnalyzer(), true); 29 writer.setUseCompoundFile(false); 30 indexDirectory(writer, dataDirFile); 31 32 int numIndexed = writer.docCount(); 33 writer.optimize(); 34 writer.close(); 35 return numIndexed; 36 } 37 38 private void indexDirectory(IndexWriter writer, File dataDirFile) throws IOException { 39 File[] files = dataDirFile.listFiles(); 40 for(int i = 0; i<files.length; i++) 41 { 42 File f = files[i]; 43 if(f.isDirectory()) 44 { 45 indexDirectory(writer, f); 46 }else if(f.getName().endsWith(".java") || f.getName().endsWith(".txt"))//需要索引的文件类型 47 { 48 indexFile(writer, f); 49 } 50 51 } 52 53 } 54 55 private void indexFile(IndexWriter writer, File f) throws IOException { 56 if(f.isHidden() || !f.exists() || !f.canRead()) 57 { 58 return; 59 } 60 System.out.println("Indexing" + f.getCanonicalPath()); 61 Document doc = new Document(); 62 Reader txtReader = new FileReader(f); 63 doc.add(new Field("path",f.getCanonicalPath(),Field.Store.YES,Field.Index.UN_TOKENIZED)); 64 doc.add(new Field("contents",txtReader)); 65 doc.add(new Field("name",f.getName(),Field.Store.YES,Field.Index.UN_TOKENIZED)); 66 writer.addDocument(doc); 67 } 68 69 }
调用的代码如下:
1 String filesRepoDir = "C:/workspace-2.0";//需要被索引的目录 2 String indexDir = "C:/apache-tomcat-6.0.18/webapps/index";//存放索引的目录 3 Indexer indexer= new Indexer(); 4 indexer.index(indexDir, filesRepoDir);