使用的是apache Lucene7.0 ,jdk 8.0
1.遍历文件夹建立索引
package com.kyd.demo.standardAnaly;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class FileIndexDemo {
private static List<String> textList= new ArrayList<>();
static {
textList.add(".txt");
textList.add(".java");
textList.add(".html");
textList.add(".htm");
textList.add(".java");
textList.add(".css");
textList.add(".js");
textList.add(".xml");
textList.add(".properties");
textList.add(".tld");
}
public static void main(String[] args) {
Directory directory =null;
IndexWriter indexWriter = null;
try {
/**
* 1.选择索引保存方式 此处是保存到硬盘
* 还有内存等等
*/
directory = FSDirectory.open(Paths.get("file_index"));
/**
* 2.选择分词器
* 此处是Lucene标准分词器
*/
Analyzer analyzer = new StandardAnalyzer();
/**
* 3.配置索引配置
*/
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriterConfig.setOpenMode(OpenMode.CREATE);
/**
* 4.创建索引写类
*/
indexWriter = new IndexWriter(directory, indexWriterConfig);
String scanDir ="D:\\360安全浏览器下载";
/**
* 5.做循环遍历文件夹创建索引
*/
indexDocs(indexWriter,scanDir);
indexWriter.flush();
indexWriter.commit();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
if(directory != null) {
try {
directory.close();
} catch (IOException e) {
directory =null;
e.printStackTrace();
}
}
}
}
/**
*
* @param indexWriter
* @param scanDir
* @throws IOException
*/
private static void indexDocs(IndexWriter indexWriter, String scanDir) throws IOException {
if(Files.isDirectory(Paths.get(scanDir))) {
Files.walkFileTree(Paths.get(scanDir), new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
System.out.println(file.toString());
if(Files.exists(file)) {
indexDoc(indexWriter,file);
}
return FileVisitResult.CONTINUE;
}
});
}else {
indexDoc(indexWriter,Paths.get(scanDir));
}
}
/**
*
* @param indexWriter
* @param scanDir
* @throws IOException
*/
private static void indexDoc(IndexWriter indexWriter, Path filsPath) throws IOException {
if(isTextFile(filsPath)) {
try(InputStream stream = Files.newInputStream(filsPath)){
Document document = new Document();
IndexableField lastModifiedTime = new LongPoint("lastModifiedTime", Files.getLastModifiedTime(filsPath).toMillis());
document.add(lastModifiedTime);
IndexableField path = new StringField("path", filsPath.toString(), Store.YES);
document.add(path);
IndexableField content = new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)));
document.add(content);
indexWriter.addDocument(document);
}
}
}
/**
* 判断是否是文本文件 假如是文本文件就直接索引
* @param scanDir
* @return
*/
private static boolean isTextFile(Path scanDir) {
int i =-1;
if(( i =scanDir.toString().lastIndexOf("."))!=-1) {
String ext= scanDir.toString().substring(i);
return textList.contains(ext);
}else {
return true;
}
}
}
2.查询案例
package com.kyd.demo.standardAnaly;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
public class SearchDemo {
public static void main(String[] args) {
try {
/**
* 1.打开索引文件
*/
IndexReader indexReader = DirectoryReader.open(FSDirectory.open(Paths.get("file_index")));
/**
* 2.创建索引查询类
*/
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
/**
* 3.创建分词器
*/
Analyzer analyzer = new StandardAnalyzer();
/**
* 4.构建查询语句
*/
QueryParser queryParser = new QueryParser("contents",analyzer);
Query query = queryParser.parse("Lucene");
/**
* 5.执行查询
* 查询前100条记录
*/
TopDocs topDocs = indexSearcher.search(query, 100);
/**
* 6.遍历结果
*/
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
System.out.println("一共匹配到:"+topDocs.totalHits+"条记录");
for(int i=0;i<scoreDocs.length;i++) {
Document document = indexSearcher.doc(scoreDocs[i].doc);
System.out.println(i+":"+document.get("path"));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}