1.lucene适用范围
文本检索、网站信息检索、数据库搜索
2.lucene的组件
【1】.document对象表示被索引的文档,IndexWriter将文档add到index中
【2】用户query时,通过indexSearcher搜索lucene的index,同时计算term weight和score,之后返回结果
3.API调用方法:
- 创建索引
- 创建indexWriter写入index,包括参数:1)INDEX_DIR,所以文件位置;2)Analyzer,文档词法分析
- 创建进行索引文档:new Document,把field放进去,indexWriter addDocument即可
- 搜索
- 创建queryParse函数,parser查询语句,查询语法树放到query中
- 用indexSearcher调用search方法搜索语法树,得到结果
package com.triple.one.indexer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
/**
* Func:
* Created by tripleone on 18/2/28.
*/
public class Indexer {
private IndexWriter writer;
public Indexer(String indexDir) throws IOException{
Directory dir = FSDirectory.open(new File(indexDir));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
writer = new IndexWriter(dir, analyzer,true,
IndexWriter.MaxFieldLength.UNLIMITED);
}
public void close() throws IOException{
writer.close();
}
/**
* 索引文件
* @param dataDir
* @param filter
* @return
* @throws IOException
*/
public int index(String dataDir, FileFilter filter)
throws IOException{
File[] files = new File(dataDir).listFiles();
for (File f: files){
if(!f.isDirectory()
&& !f.isHidden()
&& f.exists()
&& f.canRead()
&& (filter == null || filter.accept(f))){
try {
indexFile(f);
} catch (Exception e) {
e.printStackTrace();
}
}
}
return writer.numDocs();
}
private static class TextFilesFilter implements FileFilter{
public boolean accept(File path){
//只对.txt文件进行索引
return path.getName().toLowerCase().endsWith(".txt");
}
}
protected Document getDocument(File f)throws Exception{
Document doc = new Document();
doc.add(new Field("文档", new FileReader(f)));
doc.add(new Field("文件名", f.getName(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("路径", f.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
return doc;
}
private void indexFile(File f)throws Exception{
System.out.println("Indexing "+f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
public static void main(String[] args) throws IOException {
System.out.println(System.getProperty("user.dir"));
String indexDir = System.getProperty("user.dir") + "/test1"; //示例索引
String dataDir = System.getProperty("user.dir") + "/test2" ; //对该目录下文档进行索引
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed = 0;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} catch (IOException e) {
e.printStackTrace();
}finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("搜索到 " + numIndexed + " 文件夹共花费 "
+ (end - start) + " 毫秒" );
}
}
依赖包:
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>3.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers</artifactId>
<version>3.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>3.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queries -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>3.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queries -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>3.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-highlighter -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>3.3.0</version>
</dependency>
test2里共有6个以txt结尾的文件