Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎。
以上介绍来自百度百科。
在全文索引工具中,都是由三部分组成的:
1,索引部分
2,分词部分
3,搜索部分
下面进入Lucene的学习,由lucene创建索引
创建工程引入jar包lucene-core-3.6.2.jar
HelloLucene.java
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import java.io.Reader;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.LockObtainFailedException;
- import org.apache.lucene.store.RAMDirectory;
- import org.apache.lucene.util.Version;
- public class HelloLucene {
- /**
- * 建立索引
- */
- public void index(){
- IndexWriter writer = null;
- try {
- //1,创建词典
- // Directory directory = new RAMDirectory();//存储到内存
- Directory directory = FSDirectory.open(new File("D:\\lucene\\index"));
- //2,创建IndexWriter索引笔
- IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));
- writer = new IndexWriter(directory, config);
- //3,创建Document对象
- Document doc = null;
- File file = new File("d:/lucene/file");
- for(File f : file.listFiles()){
- doc = new Document();
- //4,为Document添加Field
- doc.add(new Field("content",new FileReader(f)));
- doc.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
- doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
- //5,通过IndexWriter添加文档到索引中
- writer.addDocument(doc);
- }
- } catch (CorruptIndexException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (LockObtainFailedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- if(writer!=null){
- try {
- writer.close();
- } catch (CorruptIndexException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- }
- /**
- * 搜索
- */
- public void search(){
- IndexReader reader = null;
- try {
- //1,创建Directory
- Directory directory = FSDirectory.open(new File("d:/lucene/index"));
- //2,创建IndexReader
- reader = IndexReader.open(directory);
- //3,根据IndexReader创建IndexSearcher
- IndexSearcher searcher = new IndexSearcher(reader);
- //4,创建搜索的Query
- QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
- Query query = parser.parse("Apache License");
- //5,根据searcher搜索并返回TopDocs
- TopDocs tds = searcher.search(query, 10);
- //6,根据TopDocs获取ScoreDoc对象
- ScoreDoc[] sds = tds.scoreDocs;
- //7,根据searcher和ScoreDoc对象获取具体的Document对象
- for(ScoreDoc sd:sds){
- Document doc = searcher.doc(sd.doc);
- //8,根据Document对象获取需要的值
- System.out.println(doc.get("filename")+":"+doc.get("path"));
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (ParseException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- //9,关闭reader
- if(reader!=null){
- try {
- reader.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- }
- }
测试
- import static org.junit.Assert.*;
- import org.junit.BeforeClass;
- import org.junit.Test;
- public class TestCase {
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- }
- @Test
- public void testIndex() {
- new HelloLucene().index();
- }
- @Test
- public void testSearch(){
- new HelloLucene().search();
- }
- }
本文出自 “Kenan_ITBlog” 博客,请务必保留此出处http://soukenan.blog.51cto.com/5130995/1119421