交换实体类:
public class LuceneWrapper implements Serializable{
private String id;
/**
* 030
*/
private String docType;
/**
* 创建时间 yyyy-MM-dd
*/
private String creationDate;
/**
* 标题
*/
private String title;
/**
* 文号
*/
private String wenhao;
/**
* 链接地址
*/
private String url;
/**
* 正文
*/
private Attachment zhengwen;
/**
* 附件
*/
private Set<Attachment> attachments;
public LuceneWrapper(){
}
private static final long serialVersionUID = 6707156570710732532L;
public static final String ID = "id";
public static final String TITLE = "title";
public static final String CREATIONDATE = "creationDate";
public static final String DOCTYPE = "docType";
public static final String ZHENGWEN = "zw_centent";
public static final String URL = "url";
public static final String WENHAO = "wenhao";
public static final String FUJIAN = "fj_content";
}
public interface LuceneManager{
public static String INDEX_DIR ="g:\\ztzx\\";
public void createIndex(LuceneWrapper entity) throws RuntimeException;
public void updateIndex(LuceneWrapper entity) throws RuntimeException;
public void deleteIndex(String entityId) throws RuntimeException;
public Page searchPage(int pageNo, int pageSize,String queryString) throws RuntimeException;
}
@Service("luc49Manager")
public class Lucene49ManagerImpl implements LuceneManager {
private Logger logger = LoggerFactory.getLogger(getClass());
public static Version LUCENE_VERSION = Version.LUCENE_46;
static {
com.aspose.words.Document.setLicence("sj_laokai");
}
@Override
public void createIndex(LuceneWrapper entity) throws RuntimeException {
IndexWriter indexWriter = null;
try {
Directory director = FSDirectory.open(new File(OaConstants.INDEX_DIR));// 创建Directory关联源文件
Analyzer analyzer = new PaodingAnalyzer();// 创建 庖丁解牛 分词器
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);// 创建索引的配置信息
indexWriter = new IndexWriter(director, indexWriterConfig);
Document doc = new Document();// 创建文档
// 读取正文中内容
String zhengwen = "";
String filePath = AttachmentUtils.getFileBasePath() + entity.getZhengwen().getFilePath();
File file = new File(filePath);
String fileExt = "";
if (file.isFile()) {
fileExt = FilenameUtils.getExtension(file.getName()).toLowerCase();
if ("doc".equals(fileExt) || "docx".equals(fileExt) || "wps".equals(fileExt)) {
com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath);
zhengwen = zwdoc.getText();
} else {
logger.error("===正文文件创建索引格式只支持word和wps==={}", filePath);
}
} else {
logger.error("===正文创建索引文件不存在==={}", filePath);
}
doc.add(new TextField(LuceneWrapper.ZHENGWEN, zhengwen, Store.YES));
// 读取附件中内容
zhengwen = "";
for (Attachment a : entity.getAttachments()) {
filePath = AttachmentUtils.getFileBasePath() + a.getFilePath();
file = new File(filePath);
if (file.isFile()) {
fileExt = FilenameUtils.getExtension(file.getName()).toLowerCase();
if ("doc".equals(fileExt) || "docx".equals(fileExt) || "wps".equals(fileExt)) {
com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath);
zhengwen = zwdoc.getText();
} else {
logger.error("===附件文件创建索引格式只支持word和wps==={}", filePath);
}
} else {
logger.error("===附件创建索引文件不存在==={}", filePath);
}
}
doc.add(new TextField(LuceneWrapper.FUJIAN, zhengwen, Store.YES));
doc.add(new StringField(LuceneWrapper.ID, entity.getId(), Store.YES));
doc.add(new StringField(LuceneWrapper.DOCTYPE, entity.getDocType(), Store.YES));
doc.add(new StringField(LuceneWrapper.CREATIONDATE, entity.getCreationDate(), Store.YES));
doc.add(new TextField(LuceneWrapper.TITLE, entity.getTitle(), Store.YES));
doc.add(new TextField(LuceneWrapper.WENHAO, entity.getWenhao(), Store.YES));
doc.add(new StringField(LuceneWrapper.URL, entity.getUrl(), Store.YES));
doc.add(new DoubleField("version", 1.0, Store.YES));
// 添加文本到索引中
indexWriter.addDocument(doc);
logger.debug("===添加索引成功==={}", entity.getTitle());
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
indexWriter.close();// 关闭索引
} catch (IOException e) {
}
}
}
@Override
public void updateIndex(LuceneWrapper entity) throws RuntimeException {
IndexWriter indexWriter = null;
try {
Directory director = FSDirectory.open(new File(OaConstants.INDEX_DIR));// 创建Directory关联源文件
Analyzer analyzer = new PaodingAnalyzer();// 创建 庖丁解牛 分词器
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);// 创建索引的配置信息
indexWriter = new IndexWriter(director, indexWriterConfig);
Document doc = new Document();// 创建文档
// 读取正文中内容
String zhengwen = "";
String filePath = AttachmentUtils.getFileBasePath() + entity.getZhengwen().getFilePath();
File file = new File(filePath);
String fileExt = "";
if (file.isFile()) {
fileExt = FilenameUtils.getExtension(file.getName()).toLowerCase();
if ("doc".equals(fileExt) || "docx".equals(fileExt) || "wps".equals(fileExt)) {
com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath);
zhengwen = zwdoc.getText();
} else {
logger.error("===正文文件创建索引格式只支持word和wps==={}", filePath);
}
} else {
logger.error("===正文创建索引文件不存在==={}", filePath);
}
doc.add(new TextField(LuceneWrapper.ZHENGWEN, zhengwen, Store.YES));
// 读取附件中内容
zhengwen = "";
for (Attachment a : entity.getAttachments()) {
filePath = AttachmentUtils.getFileBasePath() + a.getFilePath();
file = new File(filePath);
if (file.isFile()) {
fileExt = FilenameUtils.getExtension(file.getName()).toLowerCase();
if ("doc".equals(fileExt) || "docx".equals(fileExt) || "wps".equals(fileExt)) {
com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath);
zhengwen = zwdoc.getText();
} else {
logger.error("===附件文件创建索引格式只支持word和wps==={}", filePath);
}
} else {
logger.error("===附件创建索引文件不存在==={}", filePath);
}
}
doc.add(new TextField(LuceneWrapper.FUJIAN, zhengwen, Store.YES));
doc.add(new StringField(LuceneWrapper.ID, entity.getId(), Store.YES));
doc.add(new StringField(LuceneWrapper.DOCTYPE, entity.getDocType(), Store.YES));
doc.add(new StringField(LuceneWrapper.CREATIONDATE, entity.getCreationDate(), Store.YES));
doc.add(new TextField(LuceneWrapper.TITLE, entity.getTitle(), Store.YES));
doc.add(new TextField(LuceneWrapper.WENHAO, entity.getWenhao(), Store.YES));
doc.add(new StringField(LuceneWrapper.URL, entity.getUrl(), Store.YES));
doc.add(new DoubleField("version", 1.0, Store.YES));
// 更新索引
indexWriter.updateDocument(new Term(LuceneWrapper.ID, entity.getId()), doc);
indexWriter.commit();
logger.debug("===更新索引成功==={}", entity.getTitle());
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
indexWriter.close();// 关闭索引
} catch (IOException e) {
}
}
}
@Override
public void deleteIndex(String entityId) throws RuntimeException {
IndexWriter indexWriter = null;
try {
Directory director = FSDirectory.open(new File(OaConstants.INDEX_DIR));// 创建Directory关联源文件
Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);// 创建一个分词器
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);// 创建索引的配置信息
indexWriter = new IndexWriter(director, indexWriterConfig);
indexWriter.deleteDocuments(new Term(LuceneWrapper.ID, entityId));
indexWriter.commit();
logger.debug("===删除索引成功===");
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
indexWriter.close();// 关闭索引
} catch (IOException e) {
}
}
}
@Override
public Page searchPage(int pageNo, int pageSize, String queryString) throws RuntimeException {
try {
if (pageNo < 1) {
pageNo = 1;
}
if (pageSize < 1) {
pageSize = 1;
}
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(OaConstants.INDEX_DIR)));// 索引读取类
IndexSearcher search = new IndexSearcher(reader);// 搜索入口工具类
Analyzer analyzer = new PaodingAnalyzer();
// 按 标题 文号 正文内容 附件内容 多条件查询
String[] queryConditions = { queryString, queryString, queryString,queryString };
String[] fields = { LuceneWrapper.TITLE, LuceneWrapper.WENHAO, LuceneWrapper.ZHENGWEN,LuceneWrapper.FUJIAN };
BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD,
BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD };
Query query = MultiFieldQueryParser.parse(LUCENE_VERSION, queryConditions, fields, flags, analyzer);
// 按创建时间倒排序
Sort sort = new Sort(new SortField("creationDate", SortField.Type.STRING, false));// false升序true降序
TopDocs topdocs = search.search(query, 10000, sort);// 查询前100条
// 高亮
Scorer score = new QueryScorer(query);// 检索评份
LuceneWrapper lw = null;
List<LuceneWrapper> list = new ArrayList<LuceneWrapper>();
ScoreDoc scores[] = topdocs.scoreDocs;// 得到所有结果集
for (int i = (pageNo - 1) * pageSize; i < scores.length; i++) {
int num = scores[i].doc;// 得到文档id
Document doc = search.doc(num);// 拿到指定的文档
lw = new LuceneWrapper();
lw.setId(doc.get("id"));
// lw.setZhengwen(entity.getZhengwen());
// 高亮显示title
lw.setTitle(highLighter("title", doc.get("title"), queryString, score));
if (StringUtils.isBlank(lw.getTitle())) {
lw.setTitle(doc.get("title"));
}
lw.setCreationDate(doc.get("creationDate"));
lw.setDocType(doc.get("docType"));
// 高亮显示wenhao
lw.setWenhao(highLighter("wenhao", doc.get("wenhao"), queryString, score));
if (StringUtils.isBlank(lw.getWenhao())) {
lw.setWenhao(doc.get("wenhao"));
}
lw.setUrl(doc.get("url"));
list.add(lw);
}
return new Page(list, pageNo, pageSize, topdocs.scoreDocs.length);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* 高亮显示
*
* @param content
* 内容
* @param queryString
* 查询语句
* @param score
* 评分
* @return
* @throws IOException
* @throws InvalidTokenOffsetsException
*/
public String highLighter(String fieldName, String content, String queryString, Scorer score)
throws IOException, InvalidTokenOffsetsException {
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");// 高亮html格式
Fragmenter fragmenter = new SimpleFragmenter(content.length());// 设置最大片断为100
Highlighter highlighter = new Highlighter(formatter, score);// 高亮显示类
highlighter.setTextFragmenter(fragmenter);// 设置格式
TokenStream tokenStream = new PaodingAnalyzer().tokenStream(fieldName, new StringReader(content));
return highlighter.getBestFragment(tokenStream, content);// 得到高亮显示后的内容
}
}
public class Lucene49ManagerTest extends CommSpringJunitTest {
static {
com.aspose.words.Document.setLicence("sj_laokai");
}
@Autowired
private WfEntityManager wfManager;
@Autowired
private LuceneManager luc49Manager;
@Autowired
private PropertyManager prManager;
@Test
public void testRun(){
//testReadTitle();
testLucene();
}
public void testReadTitle() {
try {
String filePath = "g:\\tmp\\word1.doc";
File file = new File(filePath);
if (file.isFile()) {
com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath);
System.out.println("===word===" + zwdoc.getText());
}
filePath = "g:\\tmp\\wps1.wps";
file = new File(filePath);
if (file.isFile()) {
com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath);
System.out.println("===wps===" + zwdoc.getText());
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void testLucene() {
CommConstant.PROPERTY_MAP = prManager.init();
WfEntity entity = wfManager.get("8a8a8985501157d10150115ac49e0000");
LuceneWrapper lw = new LuceneWrapper();
lw.setId(entity.getId());
lw.setZhengwen(entity.getZhengwen());
lw.setTitle("关于国庆放假的通知");
lw.setCreationDate(CommUtils.FormatDateToString(entity.getCreationDate(), "yyyy-MM-dd"));
lw.setDocType("收文");
lw.setWenhao("中央财政【2015】第20号");
lw.setUrl("urlurlurlurlurlurlurlurlurlurl");
// luc49Manager.createIndex(lw);
luc49Manager.updateIndex(lw);
//
luc49Manager.searchPage(1, 20, "国庆");
}
public void testQuery() {
try {
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(LuceneManager.INDEX_DIR)));// 索引读取类
IndexSearcher search = new IndexSearcher(reader);// 搜索入口工具类
String queryStr = "8a8a";// 搜索关键字
QueryParser queryParser = new QueryParser(Lucene49ManagerImpl.LUCENE_VERSION, "id", new PaodingAnalyzer());// 实例查询条件类
Query query = queryParser.parse(queryStr);
TopDocs topdocs = search.search(query, 100);// 查询前100条
System.out.println("查询结果总数---" + topdocs.totalHits);
ScoreDoc scores[] = topdocs.scoreDocs;// 得到所有结果集
for (int i = 0; i < scores.length; i++) {
int num = scores[i].doc;// 得到文档id
Document document = search.doc(num);// 拿到指定的文档
System.out.println("内容====" + document.get("zhengwen"));// 由于内容没有存储所以执行结果为null
System.out.println("标题====" + document.get("title"));
System.out.println("版本====" + document.get("version"));
System.out.println("评分====" + document.get("score"));
System.out.println("id--" + num + "---scors--" + scores[i].score + "---index--" + scores[i].shardIndex);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void testAllIndex() {
try {
Directory indexDirectory = FSDirectory.open(new File(LuceneManager.INDEX_DIR));
IndexReader indexReader = DirectoryReader.open(indexDirectory);
Fields fields = MultiFields.getFields(indexReader);
Iterator<String> fieldsIterator = fields.iterator();
while (fieldsIterator.hasNext()) {
String field = fieldsIterator.next();
Terms terms = fields.terms(field);
TermsEnum termsEnums = terms.iterator(null);
BytesRef byteRef = null;
System.out.println("field : " + field);
while ((byteRef = termsEnums.next()) != null) {
String term = new String(byteRef.bytes, byteRef.offset, byteRef.length);
System.out.println("term is : " + term);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}