package org.apache.lucene.demo; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Date; /** *//** Index all text files under a directory. */ publicclass IndexFiles ...{ private IndexFiles() ...{} staticfinal File INDEX_DIR =new File("index"); /** *//** Index all text files under a directory. */ publicstaticvoid main(String[] args) ...{ String usage ="java org.apache.lucene.demo.IndexFiles <root_directory>"; if (args.length ==0) ...{ System.err.println("Usage: "+ usage); System.exit(1); } if (INDEX_DIR.exists()) ...{ System.out.println("Cannot save index to '"+INDEX_DIR+"' directory, please delete it first"); System.exit(1); } final File docDir =new File(args[0]); if (!docDir.exists() ||!docDir.canRead()) ...{ System.out.println("Document directory '"+docDir.getAbsolutePath()+"' does not exist or is not readable, please check the path"); System.exit(1); } Date start =new Date(); try...{ IndexWriter writer =new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true); // step 1 System.out.println("Indexing to directory '"+INDEX_DIR+"'..."); indexDocs(writer, docDir); // look here System.out.println("Optimizing..."); writer.optimize(); writer.close(); Date end =new Date(); System.out.println(end.getTime() - start.getTime() +" total milliseconds"); }catch (IOException e) ...{ System.out.println(" caught a "+ e.getClass() + " with message: "+ e.getMessage()); } } staticvoid indexDocs(IndexWriter writer, File file) throws IOException ...{ // do not try to index files that cannot be read if (file.canRead()) ...{ if (file.isDirectory()) ...{ String[] files = file.list(); // an IO error could occur if (files !=null) ...{ for (int i =0; i < files.length; i++) ...{ indexDocs(writer, new File(file, files[i])); } } }else...{ System.out.println("adding "+ file); try...{ writer.addDocument(FileDocument.Document(file)); //step 2 } // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help catch (FileNotFoundException fnfe) ...{ ; } } } } }
like someone said lucene is an api. the key issue here is: 1. create IndexWriter IndexWriter writer =new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true); 2. make index for a file writer.addDocument(FileDocument.Document(file));
search index
main step: Hits hits = searcher.search(query); [step 6] help class 1.IndexReader; step1 and 2 to create Searcher object [ a easy way: IndexSearcher searcher = new IndexSearcher(dirStr); ]
2.QueryParser, step 3,4 and 5to create query using parse() method
Hit class carrys search result infomation
package org.apache.lucene.demo; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; /** *//** Simple command-line based search demo. */ publicclass SearchFiles ...{ /** *//** Use the norms from one field for all fields. Norms are read into memory, * using a byte of memory per document per searched field. This can cause * search of large collections with a large number of fields to run out of * memory. If all of the fields contain only a single token, then the norms * are all identical, then single norm vector may be shared. */ privatestaticclass OneNormsReader extends FilterIndexReader ...{ private String field; public OneNormsReader(IndexReader in, String field) ...{ super(in); this.field = field; } publicbyte[] norms(String field) throws IOException ...{ return in.norms(this.field); } } private SearchFiles() ...{} /** *//** Simple command-line based search demo. */ publicstaticvoid main(String[] args) throws Exception ...{ String usage = "Usage: java org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]"; if (args.length >0&& ("-h".equals(args[0]) ||"-help".equals(args[0]))) ...{ System.out.println(usage); System.exit(0); } String index ="index"; String field ="contents"; String queries =null; int repeat =0; boolean raw =false; String normsField =null; for (int i =0; i < args.length; i++) ...{ if ("-index".equals(args[i])) ...{ index = args[i+1]; i++; }elseif ("-field".equals(args[i])) ...{ field = args[i+1]; i++; }elseif ("-queries".equals(args[i])) ...{ queries = args[i+1]; i++; }elseif ("-repeat".equals(args[i])) ...{ repeat = Integer.parseInt(args[i+1]); i++; }elseif ("-raw".equals(args[i])) ...{ raw =true; }elseif ("-norms".equals(args[i])) ...{ normsField = args[i+1]; i++; } } IndexReader reader = IndexReader.open(index); //step 1 if (normsField !=null) reader =new OneNormsReader(reader, normsField); Searcher searcher =new IndexSearcher(reader); //step 2 Analyzer analyzer =new StandardAnalyzer(); //step 3 BufferedReader in =null; if (queries !=null) ...{ in =new BufferedReader(new FileReader(queries)); }else...{ in =new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser =new QueryParser(field, analyzer); //step 4 while (true) ...{ if (queries ==null) // prompt the user System.out.println("Enter query: "); String line = in.readLine(); if (line ==null|| line.length() ==-1) break; line = line.trim(); if (line.length() ==0) break; Query query = parser.parse(line); //step 5 System.out.println("Searching for: "+ query.toString(field)); Hits hits = searcher.search(query); // step 6 key issue if (repeat >0) ...{ // repeat & time as benchmark Date start =new Date(); for (int i =0; i < repeat; i++) ...{ hits = searcher.search(query); } Date end =new Date(); System.out.println("Time: "+(end.getTime()-start.getTime())+"ms"); } System.out.println(hits.length() +" total matching documents"); finalint HITS_PER_PAGE =10; for (int start =0; start < hits.length(); start += HITS_PER_PAGE) ...{ int end = Math.min(hits.length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) ...{ if (raw) ...{ // output raw format System.out.println("doc="+hits.id(i)+" score="+hits.score(i)); continue; } Document doc = hits.doc(i); String path = doc.get("path"); if (path !=null) ...{ System.out.println((i+1) +". "+ path); String title = doc.get("title"); if (title !=null) ...{ System.out.println(" Title: "+ doc.get("title")); } }else...{ System.out.println((i+1) +". "+"No path for this document"); } } if (queries !=null) // non-interactive break; if (hits.length() > end) ...{ System.out.println("more (y/n) ? "); line = in.readLine(); if (line.length() ==0|| line.charAt(0) =='n') break; } } } reader.close(); } }
Removing Documents from an index
main step: IndexReader.deleteDocument(id)
String indexDir = ... dir = FSDirectory.getDirectory(indexDir); IndexReader reader = IndexReader.open(dir); int id = ... reader.deleteDocument(id); reader.close();