Lucene in 5 minutes
from LuceneTutorial.com
Now updated for Lucene 4.0!
Lucene makes it easy to add full-text search capability to your application. In fact, its so easy, I'm going to show you how in 5 minutes!
1. Index
For this simple case, we're going to create an in-memory index from some strings.
Directory index = new RAMDirectory ( ) ;
IndexWriterConfig config = new IndexWriterConfig (Version. LUCENE_40, analyzer ) ;
IndexWriter w = new IndexWriter (index, config ) ;
addDoc (w, "Lucene in Action", "193398817" ) ;
addDoc (w, "Lucene for Dummies", "55320055Z" ) ;
addDoc (w, "Managing Gigabytes", "55063554A" ) ;
addDoc (w, "The Art of Computer Science", "9900333X" ) ;
w. close ( ) ;
addDoc() is what actually adds documents to the index:
Document doc = new Document ( ) ;
doc. add ( new TextField ( "title", title, Field. Store. YES ) ) ;
doc. add ( new StringField ( "isbn", isbn, Field. Store. YES ) ) ;
w. addDocument (doc ) ;
}
Note the use of TextField for content we want tokenized, and StringField for id fields and the like, which we don't want tokenized.
2. Query
We read the query from stdin, parse it and build a lucene Query out of it.
Query q = new QueryParser (Version. LUCENE_40, "title", analyzer ). parse (querystr ) ;
3. Search
Using the Query we create a Searcher to search the index. Then a TopScoreDocCollector is instantiated to collect the top 10 scoring hits.
IndexReader reader = IndexReader. open (index ) ;
IndexSearcher searcher = new IndexSearcher (reader ) ;
TopScoreDocCollector collector = TopScoreDocCollector. create (hitsPerPage, true ) ;
searcher. search (q, collector ) ;
ScoreDoc [ ] hits = collector. topDocs ( ). scoreDocs ;
4. Display
Now that we have results from our search, we display the results to the user.
for ( int i = 0 ;i <hits. length ;++i ) {
int docId = hits [i ]. doc ;
Document d = searcher. doc (docId ) ;
System. out. println ( (i + 1 ) + ". " + d. get ( "isbn" ) + "\t" + d. get ( "title" ) ) ;
}
Here's the app in its entirety. Download HelloLucene.java
import org.apache.lucene.document.Document ;
import org.apache.lucene.document.Field ;
import org.apache.lucene.document.StringField ;
import org.apache.lucene.document.TextField ;
import org.apache.lucene.index.DirectoryReader ;
import org.apache.lucene.index.IndexReader ;
import org.apache.lucene.index.IndexWriter ;
import org.apache.lucene.index.IndexWriterConfig ;
import org.apache.lucene.queryparser.classic.ParseException ;
import org.apache.lucene.queryparser.classic.QueryParser ;
import org.apache.lucene.search.IndexSearcher ;
import org.apache.lucene.search.Query ;
import org.apache.lucene.search.ScoreDoc ;
import org.apache.lucene.search.TopScoreDocCollector ;
import org.apache.lucene.store.Directory ;
import org.apache.lucene.store.RAMDirectory ;
import org.apache.lucene.util.Version ;
import java.io.IOException ;
public class HelloLucene {
public static void main ( String [ ] args ) throws IOException, ParseException {
// 0. Specify the analyzer for tokenizing text.
// The same analyzer should be used for indexing and searching
StandardAnalyzer analyzer = new StandardAnalyzer (Version. LUCENE_40 ) ;
// 1. create the index
Directory index = new RAMDirectory ( ) ;
IndexWriterConfig config = new IndexWriterConfig (Version. LUCENE_40, analyzer ) ;
IndexWriter w = new IndexWriter (index, config ) ;
addDoc (w, "Lucene in Action", "193398817" ) ;
addDoc (w, "Lucene for Dummies", "55320055Z" ) ;
addDoc (w, "Managing Gigabytes", "55063554A" ) ;
addDoc (w, "The Art of Computer Science", "9900333X" ) ;
w. close ( ) ;
// 2. query
String querystr = args. length > 0 ? args [ 0 ] : "lucene" ;
// the "title" arg specifies the default field to use
// when no field is explicitly specified in the query.
Query q = new QueryParser (Version. LUCENE_40, "title", analyzer ). parse (querystr ) ;
// 3. search
int hitsPerPage = 10 ;
IndexReader reader = DirectoryReader. open (index ) ;
IndexSearcher searcher = new IndexSearcher (reader ) ;
TopScoreDocCollector collector = TopScoreDocCollector. create (hitsPerPage, true ) ;
searcher. search (q, collector ) ;
ScoreDoc [ ] hits = collector. topDocs ( ). scoreDocs ;
// 4. display results
System. out. println ( "Found " + hits. length + " hits." ) ;
for ( int i = 0 ;i <hits. length ;++i ) {
int docId = hits [i ]. doc ;
Document d = searcher. doc (docId ) ;
System. out. println ( (i + 1 ) + ". " + d. get ( "isbn" ) + "\t" + d. get ( "title" ) ) ;
}
// reader can only be closed when there
// is no need to access the documents any more.
reader. close ( ) ;
}
private static void addDoc (IndexWriter w, String title, String isbn ) throws IOException {
Document doc = new Document ( ) ;
doc. add ( new TextField ( "title", title, Field. Store. YES ) ) ;
// use a string field for isbn because we don't want it tokenized
doc. add ( new StringField ( "isbn", isbn, Field. Store. YES ) ) ;
w. addDocument (doc ) ;
}
}