Lucene主要分为三大块:
1、创建索引
2、分词
3、读取并查询索引
前提:由于本人目前看的是3.5的视频材料,所以可能里面有的写法还是3.5的写法,我能保证demo能跑通,能运营,能明白意思。
发现在lucene3.5里,lucene的主要jar都在core包里,但是在4.0以后好像被拆分成了多个jar,需要根据项目需要,一点一点自己往上加jar包。
所用jar: lucene 4.10.2
教学视频:lucene 3.5
依赖的jar包:
入门代码1:
package com.test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class HelloLucene2 {
@SuppressWarnings("deprecation")
public void index() throws Exception {
//创建directory,主要为了存储索引存放的路径
//Directory directory = new RAMDirectory();// 建立在内存中
Directory fsddirectory = FSDirectory.open(new File(
"D:/lucene-file/index01"));//建立在硬盘上
//indexwriter的配置信息
IndexWriterConfig IndexWriterConfig = new IndexWriterConfig(
Version.LATEST, new StandardAnalyzer(Version.LATEST));
//indexwriter主要是用来写索引的,类似于file流
IndexWriter writer = new IndexWriter(fsddirectory, IndexWriterConfig);
//document类似于一条数据,存放数据信息
Document document = null;
//field类似于字段,用于存放数据的每个分类信息
File f = new File("D:/lucene-file/exampletxt");
for (File file : f.listFiles()) {
document = new Document();
document.add(new Field("content", new FileReader(file)));
document.add(new Field("filename", file.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
document.add(new Field("path", file.getAbsolutePath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.addDocument(document);
}
if (writer != null) {
writer.close();
}
}
public void search() throws Exception {
//主要流程:
//1:创建directory 从哪读取信息与资料
Directory fsddirectory = FSDirectory.open(new File(
"D:/lucene-file/index01"));//放在硬盘上的信息源
//2:创建indexReader //读取Index索引
IndexReader reader = IndexReader.open(fsddirectory);
//3:根据indexReader创建IndexSearcher
IndexSearcher searcher = new IndexSearcher(reader);
//4:创建搜索的Query
//需要创建parser来确定要搜索的文件的内容,第二个参数表示搜索的域
QueryParser parser = new QueryParser(Version.LATEST, "content",new StandardAnalyzer(Version.LATEST));
//表示域中包含徐这个字的文档
Query query = parser.parse("java");
//5:根据 searcher搜索并且返回TopDocs,类似于数据库的结果集
//10代表搜索10条,
TopDocs tds = searcher.search(query, 10);
//6:根据TOPDocs获取ScoreDoc对象
ScoreDoc[] sds =tds.scoreDocs;
//7:根据searcher和ScordDoc对象获取具体的Document对象
for (ScoreDoc scoreDoc : sds) {
Document d = searcher.doc(scoreDoc.doc);
System.out.println(d.get("filename"));
System.out.println(d.get("path"));
System.out.println("---------");
}
System.out.println("length"+sds.length);
//8:根据Document对象获取需要的值
//9:关闭reader
reader.close();
}
}
test类:
package com.junittest;
import static org.junit.Assert.*;
import org.junit.Test;
import com.test.HelloLucene2;
public class TestLucene {
/* @Test
public void testindex() throws Exception {
HelloLucene2 hl = new HelloLucene2();
hl.index();
}*/
@Test
public void testsearch() throws Exception {
HelloLucene2 hl = new HelloLucene2();
hl.index();
hl.search();
}
}