lucene3搜索引擎,索引建立搜索排序分页高亮显示, IKAnalyzer分词

package com.zjr.service.impl;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

import com.zjr.model.User;

public class UserIndexService {

private final Log logger = LogFactory.getLog(UserIndexService.class);
private final String dirPath = "d:/temp/user";

Analyzer analyzer = new IKAnalyzer();
Directory directory = null;
IndexWriter writer = null;
IndexSearcher indexSearcher = null;

private void confirmDirs() {
File indexFile = new File(dirPath);
if (!indexFile.exists()) {
indexFile.mkdirs();
}
if (!indexFile.exists() || !indexFile.canWrite()) {
if (logger.isDebugEnabled())
logger.error("索引文件目录创建失败或不可写入!");
}
}

public void init() {
confirmDirs();
try {
File f = new File(dirPath);
directory = FSDirectory.open(f);

} catch (Exception e) {
if (logger.isDebugEnabled()) {
logger.error("解除索引文件锁定失败!" + e.getCause());
}
}
}

public void createIndex(List<User> userList) {
init();
try {

// 第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中),
// 第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度
writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED);
writer.setMergeFactor(500);
writer.setMaxBufferedDocs(155);
writer.setMaxFieldLength(Integer.MAX_VALUE);
writeIndex(writer, userList);
writer.optimize();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public List<User> search(String keyword) {

File indexFile = new File(dirPath);
if (!indexFile.exists()) {
return null;
}
Directory dir;
try {
dir = FSDirectory.open(indexFile);
indexSearcher = new IndexSearcher(dir);
indexSearcher.setSimilarity(new IKSimilarity());
// 单字段查询,单条件查询
// Query query = IKQueryParser.parse("userInfo", keyword);

// 多字段,单条件查询
String[] fields = new String[] { "userInfo", "parameter1" };
Query query = IKQueryParser.parseMultiField(fields, keyword);

// 多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And)
// BooleanClause.Occur[]数组,它表示多个条件之间的关系,
// BooleanClause.Occur.MUST表示 and,
// BooleanClause.Occur.MUST_NOT表示not,
// BooleanClause.Occur.SHOULD表示or.
// String[] fields =new String[]{"userInfo","parameter1"};
// BooleanClause.Occur[] flags=new
// BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};
// Query query = IKQueryParser.parseMultiField(fields,
// keyword,flags);

// //多Field,多条件查询分析
// String[] fields =new String[]{"userInfo","parameter1"};
// String[] queries = new String[]{keyword,keyword};
// Query query = IKQueryParser.parseMultiField(fields,queries);

// 多Field,多条件,多Occur 查询
// String[] fields =new String[]{"userInfo","parameter1"};
// String[] queries = new String[]{keyword,keyword};
// BooleanClause.Occur[] flags=new
// BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD};
// Query query =
// IKQueryParser.parseMultiField(fields,queries,flags);

// 搜索相似度最高的20条记录
TopDocs topDocs = indexSearcher.search(query, 20);
ScoreDoc[] hits = topDocs.scoreDocs;
return hitsToQuery(hits, query);

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return null;
}

private List<User> hitsToQuery(ScoreDoc[] hits, Query query) {
List<User> list = new ArrayList<User>();
try {
for (int i = 0; i < hits.length; i++) {
User u = new User();
Document doc = indexSearcher.doc(hits[i].doc);
u.setUserId(Integer.parseInt(doc.get("userId")));
u.setUserName(doc.get("userName"));
u.setUserAge(Integer.parseInt(doc.get("userAge")));
// 高亮设置
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(
"<font color=\"red\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter,
new QueryScorer(query));
TokenStream tokenStream = analyzer.tokenStream("text",
new StringReader(doc.get("userInfo")));
String userInfo = highlighter.getBestFragment(tokenStream, doc
.get("userInfo"));
if (userInfo != null) {
u.setUserInfo(userInfo);
} else {
u.setUserInfo(doc.get("userInfo"));
}

SimpleHTMLFormatter simpleHtmlFormatter1 = new SimpleHTMLFormatter(
"<font color=\"red\">", "</font>");
Highlighter highlighter1 = new Highlighter(
simpleHtmlFormatter1, new QueryScorer(query));
TokenStream tokenStream1 = analyzer.tokenStream("text1",
new StringReader(doc.get("parameter1")));
String p1 = highlighter1.getBestFragment(tokenStream1, doc
.get("parameter1"));
if (p1 != null) {
u.setParameter1(p1);
} else {
u.setParameter1(doc.get("parameter1"));
}

u.setParameter2(doc.get("parameter2"));
u.setParameter3(doc.get("parameter3"));
u.setParameter4(doc.get("parameter4"));
list.add(u);
}

indexSearcher.close();
return list;
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}

public void writeIndex(IndexWriter writer, List<User> userList) {

try {
for (User u : userList) {
Document doc = getDoc(u);
writer.addDocument(doc);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

private Document getDoc(User user) {
System.out.println("用户ID 为" + user.getUserId() + " 索引被创建");
Document doc = new Document();
addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED);
addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED);// Index.NOT_ANALYZED
// 不分词,但建立索引
addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED);// Index.ANALYZED
// 分词并且建立索引
addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED);
addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED);
addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED);
addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED);
addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED);
return doc;
}

private void addField2Doc(Document doc, Object bean, String name, Store s,
Index i) {
String value;
try {
value = BeanUtils.getProperty(bean, name);
if (value != null) {
doc.add(new Field(name, value, s, i,
Field.TermVector.WITH_POSITIONS_OFFSETS));
}
} catch (IllegalAccessException e) {
logger.error("get bean property error", e);
} catch (InvocationTargetException e) {
logger.error("get bean property error", e);
} catch (NoSuchMethodException e) {
logger.error("get bean property error", e);
}
}

/**
* 没有排序,有高亮,有分页
*
* @param pageNo
* @param pageSize
* @param keyword
* @return
*/
public PageBean getPageQuery(int pageNo, int pageSize, String keyword) {
List result = new ArrayList();
File indexFile = new File(dirPath);
if (!indexFile.exists()) {
return null;
}
Directory dir;
try {
dir = FSDirectory.open(indexFile);
indexSearcher = new IndexSearcher(dir);
indexSearcher.setSimilarity(new IKSimilarity());

String[] fields = new String[] { "userInfo", "parameter1" };
BooleanClause.Occur[] flags = new BooleanClause.Occur[] {
BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };
Query query = IKQueryParser.parseMultiField(fields, keyword, flags);

TopScoreDocCollector topCollector = TopScoreDocCollector.create(
indexSearcher.maxDoc(), true);
indexSearcher.search(query, topCollector);
// 查询当页的记录
ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize,
pageSize).scoreDocs;

// String[] highlightCol = {"userInfo", "parameter1"};
// 高亮设置
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(
"<font color=\"red\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter,
new QueryScorer(query));

for (ScoreDoc scdoc : docs) {
User u = new User();
Document doc = indexSearcher.doc(scdoc.doc);
//
// for (Fieldable fa : doc.getFields()) {
// System.out.println(fa.name());
// String value = doc.get(fa.name());
// for (String col : highlightCol) {
// if(fa.name().equals(col)) {
// //设置高显内容
// TokenStream tokenStream = analyzer.tokenStream("text",new
// StringReader(value));
// value = highlighter.getBestFragment(tokenStream, value);
// }
// }
//
// }

u.setUserId(Integer.parseInt(doc.get("userId")));
u.setUserName(doc.get("userName"));
u.setUserAge(Integer.parseInt(doc.get("userAge")));

TokenStream tokenStream = analyzer.tokenStream("text",
new StringReader(doc.get("userInfo")));
String userInfo = highlighter.getBestFragment(tokenStream, doc
.get("userInfo"));
if (userInfo != null) {
u.setUserInfo(userInfo);
} else {
u.setUserInfo(doc.get("userInfo"));
}

TokenStream tokenStream1 = analyzer.tokenStream("text1",
new StringReader(doc.get("parameter1")));
String p1 = highlighter.getBestFragment(tokenStream1, doc
.get("parameter1"));
if (p1 != null) {
u.setParameter1(p1);
} else {
u.setParameter1(doc.get("parameter1"));
}

u.setParameter2(doc.get("parameter2"));
u.setParameter3(doc.get("parameter3"));
u.setParameter4(doc.get("parameter4"));
result.add(u);

}
PageBean pb = new PageBean();
pb.setCurrentPage(pageNo);// 当前页
pb.setPageSize(pageSize);
pb.setAllRow(topCollector.getTotalHits());// hit中的记录数目
pb.setList(result);
return pb;

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return null;
}

/**
* 排序,有高亮,有分页
*
* @param pageNo
* @param pageSize
* @param keyword
* @return
*/
public PageBean getPageQuery2(int pageNo, int pageSize, String keyword) {
List result = new ArrayList();
File indexFile = new File(dirPath);
if (!indexFile.exists()) {
return null;
}
Directory dir;
try {
dir = FSDirectory.open(indexFile);
indexSearcher = new IndexSearcher(dir);
indexSearcher.setSimilarity(new IKSimilarity());

String[] fields = new String[] { "userInfo", "parameter1" };
BooleanClause.Occur[] flags = new BooleanClause.Occur[] {
BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };
Query query = IKQueryParser.parseMultiField(fields, keyword, flags);

// 多字段排序,设置在前面的会优先排序
SortField[] sortFields = new SortField[2];
SortField sortField = new SortField("userId", SortField.INT, false);//false升序,true降序
SortField FIELD_SEX = new SortField("userAge", SortField.INT, true);
sortFields[0] = sortField;
sortFields[1] = FIELD_SEX;
Sort sort = new Sort(sortFields);

TopDocs topDocs = indexSearcher.search(query, null, 50, sort);

if (topDocs.totalHits != 0) {
// for(ScoreDoc sd : topDocs.scoreDocs) {
//
// }
// 高亮设置
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));

for (int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) {
ScoreDoc scdoc = topDocs.scoreDocs[i];
User u = new User();
Document doc = indexSearcher.doc(scdoc.doc);
u.setUserId(Integer.parseInt(doc.get("userId")));
u.setUserName(doc.get("userName"));
u.setUserAge(Integer.parseInt(doc.get("userAge")));
TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo")));
String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo"));
if (userInfo != null) {
u.setUserInfo(userInfo);
} else {
u.setUserInfo(doc.get("userInfo"));
}

TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1")));
String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1"));
if (p1 != null) {
u.setParameter1(p1);
} else {
u.setParameter1(doc.get("parameter1"));
}

u.setParameter2(doc.get("parameter2"));
u.setParameter3(doc.get("parameter3"));
u.setParameter4(doc.get("parameter4"));
result.add(u);

}
PageBean pb = new PageBean();
pb.setCurrentPage(pageNo);// 当前页
pb.setPageSize(pageSize);
pb.setAllRow(topDocs.totalHits);// hit中的记录数目
pb.setList(result);
return pb;

}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return null;
}

/**
* 删除索引
* @param userId
*/
public void deleIndex(String userId){

try {
File f = new File(dirPath);
directory = FSDirectory.open(f);
IndexReader reader = IndexReader.open(directory,false);
Term term = new Term("userId", userId);
reader.deleteDocuments(term);
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}


}

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值