lunece简单的创建索引和搜索功能的实现
需要加载一下几个包
package com.cn.shupu.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class IndexManager {
private static String content = "";
private static List<File> filelist = new ArrayList<File>();
public static void createIndex(String sourcePath, String indexPath) {
//sourcePath是要索引文件的路径;indexPath是存放生成索引文件的路径
File fileDir = new File(sourcePath);
/**/ /* 这里放索引文件的位置 */
File indexDir = new File(indexPath);
Date date1 = new Date();
List<File> fileList = getFileList(sourcePath);
for (File file : fileList) {
content = "";
// 获取文件后缀
String type = file.getName().substring(file.getName().lastIndexOf(".") + 1);
if ("txt".equalsIgnoreCase(type)) {
String filePath = file.getAbsolutePath();
int index = filePath.lastIndexOf(File.separator);
filePath = filePath.substring(0, index);
if (filePath.contains("txt")) {
content += txt2String(file);
}
}
Directory dir;
try {
dir = FSDirectory.open(Paths.get(indexPath));// 这里的路径为保存索引的路径
// 创建一个保存索引的文件路径
Analyzer analyzer = new SmartChineseAnalyzer();// 创建一个分词器
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);// 创建一个索引生成器
IndexWriter writer = new IndexWriter(dir, iwc);
Document document = new Document();
document.add(new TextField("filename", file.getName(), Store.YES));
document.add(new TextField("content", content, Store.YES));
document.add(new TextField("path", file.getPath(), Store.YES));
writer.addDocument(document);
writer.commit();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
content = "";
}
Date date2 = new Date();
System.out.println("创建索引-----耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
}
/**
* 121 * 读取txt文件的内容 122 * @param file 想要读取的文件对象 123 * @return 返回文件内容 124
*/
public static String txt2String(File file) {
String result = "";
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "GBK"));
String s = null;
while ((s = br.readLine()) != null) {// 使用readLine方法,一次读一行
result = result + "\n" + s;
}
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
/**
* 过滤目录下的文件
*
* @param dirPath
* 想要获取文件的目录
* @return 返回文件list
*/
public static List<File> getFileList(String dirPath) {
File[] files = new File(dirPath).listFiles();
for (File file : files) {
if (file.isDirectory()) {
getFileList(file.getAbsolutePath());
} else if (file.isFile()) {
if (isTxtFile(file.getName())) {
filelist.add(file);
}
}
}
return filelist;
}
/**
* 236 * 判断是否为目标文件,目前支持txt xls doc格式 237 * @param fileName 文件名称 238
* * @return 如果是文件类型满足过滤条件,返回true;否则返回false 239
*/
public static boolean isTxtFile(String fileName) {
if (fileName.lastIndexOf(".txt") > 0) {
return true;
} else {
return false;
}
}
/**
* 187 * 查找索引,返回符合条件的文件 188 * @param text 查找的字符串 189 * @return 符合条件Map集合 190
*/
public static Map<String, String> searchIndex(String text, String indexPath) {
if (text == null)
return null;
Date date1 = new Date();
Map<String, String> rs = new HashMap<String, String>();
Directory directory;
try {
directory = FSDirectory.open(Paths.get(indexPath));
Analyzer analyzer = new SmartChineseAnalyzer();
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
QueryParser parser = new QueryParser("content", analyzer);
Query query = parser.parse(text);
TopDocs results = isearcher.search(query, 100);
ScoreDoc[] hits = results.scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
String content = hitDoc.get("content");
// 文件路径
String path = hitDoc.get("path");
// 查询字段高亮操作 其中300为字符长度可以自动修改的;
String s = displayHtmlHighlight(query, analyzer, "content", content, 180);
// System.out.println("____________________________");
//
// System.out.println("高亮:---------" + s);
// System.out.println(hitDoc.get("filename"));
// // System.out.println(hitDoc.get("content"));
// System.out.println(hitDoc.get("path"));
// System.out.println("____________________________");
rs.put(path, s);
}
ireader.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
Date date2 = new Date();
System.out.println("查看索引-----耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
return rs;
}
/**
* 获取高亮显示结果的html代码
*
* @param query
* 查询
* @param analyzer
* 分词器
* @param fieldName
* 域名
* @param fieldContent
* 域内容
* @param fragmentSize
* 结果的长度(不含html标签长度)
* @return 结果(一段html代码)
* @throws IOException
* @throws InvalidTokenOffsetsException
*/
static String displayHtmlHighlight(Query query, Analyzer analyzer, String fieldName, String fieldContent,
int fragmentSize) throws IOException, InvalidTokenOffsetsException {
// 创建一个高亮器
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color='red'>", "</font>"),
new QueryScorer(query));
Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
highlighter.setTextFragmenter(fragmenter);
return highlighter.getBestFragment(analyzer, fieldName, fieldContent);
}
}