本例子基于Maven构建
首先创建一个Maven项目叫Lucene01
在pom.xml贴上相关依赖
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>6.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>6.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>6.6.0</version>
</dependency>
在D:\lucene\data 准备一些数据
创建一个索引类 Indexer
package com.gcx.lucene;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Indexer {
//索引实例
private IndexWriter writer;
/**
* 把索引写入那个文件夹中
* @param indexDir
* @throws Exception
*/
public Indexer(String indexDir) throws Exception{
Directory dir=FSDirectory.open(Paths.get(indexDir));
//标准分词器
Analyzer analyzer=new StandardAnalyzer();
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
writer=new IndexWriter(dir, iwc);
}
/**
* 索引数据的目录
* @throws IOException
*/
public int index(String dataDir) throws IOException{
File[] files=new File(dataDir).listFiles();
for(File f:files){
indexFile(f);
}
//索引多少个文件
return writer.maxDoc();
}
/**
* 对每个文件进行索引
* @throws IOException
*/
private void indexFile(File f) throws IOException {
System.out.println("索引文件:"+f.getCanonicalPath());
Document document = getDocument(f);
//添加到索引里面
writer.addDocument(document);
}
/**
* 获取文档,文档里在设置字段(相当于获取数据库中的一条记录)
* @param f
* @throws IOException
*/
private Document getDocument(File f) throws IOException {
Document doc=new Document();
doc.add(new TextField("contents",new FileReader(f)));
//把文件名存到索引文件里
doc.add(new TextField("fileName",f.getName(),Field.Store.YES));
doc.add(new TextField("fullPath",f.getCanonicalPath(),Field.Store.YES));
return doc;
}
/**
* 关闭写索引2
* @throws IOException
*/
public void close() throws IOException{
writer.close();
}
public static void main(String[] args) {
String indexDir="D:\\lucene";
String dataDir="D:\\lucene\\data";
Indexer id = null;
int num=0;
long start = start=System.currentTimeMillis();
try {
id=new Indexer(indexDir);
num=id.index(dataDir);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
id.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
long end=System.currentTimeMillis();
System.out.println("索引多少个文件:"+num);
System.out.println("花费了"+(end-start)+"毫秒");
}
}
运行后:
此时在D:\lucene多出5个文件 后期用工具查看生成的五个文件
接下来简单查询下
package com.gcx.lucene;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import sun.applet.Main;
public class Searcher {
public static void search(String indexDir,String q) throws IOException, ParseException{
Directory dir=FSDirectory.open(Paths.get(indexDir));
//读取完整目录下的索引
DirectoryReader reader = DirectoryReader.open(dir);
//索引查询器
IndexSearcher is=new IndexSearcher(reader);
//标准分词器
Analyzer analyzer=new StandardAnalyzer();
//索引解析
QueryParser parse=new QueryParser("contents", analyzer);
//解析查询
Query query = parse.parse(q);
long start = start=System.currentTimeMillis();
//查询10条
TopDocs hits = is.search(query, 10);
long end=System.currentTimeMillis();
System.out.println("花费了"+(end-start)+"毫秒");
//遍历集合(得到分数的文档)
for(ScoreDoc sd:hits.scoreDocs){
//获取文档(现获取主键sd.doc-->再根据主键获取文档)
Document doc = is.doc(sd.doc);
System.out.println(doc.get("fullPath"));
}
reader.close();
}
public static void main(String[] args) {
String indexDir="D:\\lucene";
String q="LICENSE-2.0";
try {
search(indexDir,q);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
运行结果: