以前写过一个lucence3.5版本的案例。但是现在要用maven, 并且要用新版本。 研究了下。 稍微跟老版本有所不同。 记录下来初期的一段代码供大家参考。 能完成一般的, 建立索引,修改索引,中文分词,删除索引,关键字查询, 高亮显示关键字查询结果。
原创地址:https://blog.csdn.net/yjflinchong/article/details/109601765
数据对象
import java.util.List;
public class LucenceVo {
public static final String ID = "id";
public static final String TITLE = "title";
public static final String TYPE = "type";
public static final String DESCRIPTION = "description";
private String id;
private String type;
private String title;
private String description;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}
工具类,lucence建立 索引,修改索引,删除索引,查询,高亮查询。 中文分词器。
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class MyLuceneUtil {
private static File indexDir;
private static boolean havapath;
private static Analyzer analyzer;
private static QueryParser parser;
private int textmaxlength = 200;
private static String prefixHTML = "<font color='red'>";
private static String suffixHTML = "</font>";
private static MyLuceneUtil lucence = null;
/** 初始化工具 **/
private MyLuceneUtil(){
System.out.println("lucence初始化开始....");
// analyzer = new StandardAnalyzer(Version.LUCENE_46);
analyzer = new IKAnalyzer();
indexDir = new File(this.getClass().getClassLoader().getResource("lucence_index").getFile());
havapath = (indexDir.list().length >= 0);
parser = new QueryParser(Version.LUCENE_46, LucenceVo.DESCRIPTION,
analyzer);
}
/** 返回一个单例 **/
public static MyLuceneUtil instance(){
if(lucence==null) {
lucence = new MyLuceneUtil();
}
return lucence;
}
public List<LucenceVo> searchList(String querystr, int start, int size) throws IOException {
// Query q = null;
// try {
// StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
// q = new QueryParser(Version.LUCENE_46,"title", analyzer).parse(querystr);
// } catch (Exception e) {
// e.printStackTrace();
// }
// search
int hitsPerPage = (size+start);
Directory index = FSDirectory.open(indexDir);
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
try {
// StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
searcher.search(parser.parse(querystr), collector);
} catch (Exception e) {
e.printStackTrace();
}
ScoreDoc[] hits = collector.topDocs().scoreDocs;
int end = (size+start)<hits.length?(size+start):hits.length;
// display results
// System.out.println("Found " + hits.length + " hits.");
List<LucenceVo> volist = new ArrayList<LucenceVo>();
for (int i = start; i < end; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
LucenceVo info = new LucenceVo();
// System.out.println((i + 1) + ". " + d.get("id") + "\t" + d.get("title") + "\t" + d.get(info.DESCRIPTION));
info.setId(d.get(info.ID));
info.setTitle(d.get(info.TITLE));
info.setType(d.get(info.TYPE));
info.setDescription(d.get(info.DESCRIPTION));
volist.add(info);
}
reader.close();
return volist;
}
public static void cutWordsDemo(String text) throws IOException {
// SimpleAnalyzer analyzer = new SimpleAnalyzer(Version.LUCENE_46);
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
try {
tokenStream.reset();
while (tokenStream.incrementToken()) {
System.out.println(charTermAttribute.toString());
}
tokenStream.end();
} finally {
tokenStream.close();
// analyzer.close();
}
}
/**
* 添加单个到索引库中
* @
* @param vo
* @throws IOException
*/
public void addIndex(LucenceVo vo) throws IOException {
IndexWriter writer = openIndexWriter();
try {
Document document = builderDocument(vo);
writer.addDocument(document);
} finally {
writer.close();
}
}
/**
* 修改单个到索引库中
* @
* @param vo
* @throws IOException
*/
public void updateIndex(LucenceVo vo) throws IOException {
IndexWriter writer = openIndexWriter();
try {
Document document = builderDocument(vo);
writer.updateDocument(new Term(vo.ID, vo.getId()),document);
} finally {
writer.close();
}
}
/**
* 修改单个到索引库中
* @
* @param vo
* @throws IOException
*/
public void deleteIndex(LucenceVo vo) throws IOException {
IndexWriter writer = openIndexWriter();
try {
writer.deleteDocuments(new Term(vo.ID, vo.getId()));
} finally {
writer.close();
}
}
/**
* 删除所有索引文件
* @
*/
public void deleteAllFile(){
File[] files = indexDir.listFiles();
for (int i = 0; i < files.length; i++) {
if(!files[i].getAbsolutePath().endsWith(".java") && !files[i].getAbsolutePath().endsWith(".class"))
files[i].delete();
}
}
private IndexWriter openIndexWriter() throws IOException {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_46, analyzer);
//索引 设置为追加或者覆盖
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
return new IndexWriter(FSDirectory.open(indexDir), indexWriterConfig);
}
@SuppressWarnings("static-access")
private Document builderDocument(LucenceVo lucenceVo) {
Document document = new Document();
document.add(new StringField(lucenceVo.ID, lucenceVo.getId(), Field.Store.YES));
document.add(new TextField(lucenceVo.TITLE, lucenceVo.getTitle(), Field.Store.YES));
document.add(new TextField(lucenceVo.DESCRIPTION, lucenceVo.getDescription(), Field.Store.YES));
document.add(new TextField(lucenceVo.TYPE, lucenceVo.getType(), Field.Store.YES));
return document;
}
/**
* 索引库中查询-高亮
* @param keyword
* @return
* @
*/
public List<LucenceVo> searchIndexHighlighter(String keyword,int start,int size) {
IndexSearcher searcher = null;
List<LucenceVo> list = new ArrayList<LucenceVo>();
if(!havapath){
return list;
}
try {
IndexReader reader = IndexReader.open(FSDirectory.open(indexDir));
searcher = new IndexSearcher(reader);
Query query =parser.parse(keyword);
TopDocs topDocs = searcher.search(query, (size+start));
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
int end = (size+start)<topDocs.totalHits?(size+start):topDocs.totalHits;
for (int i = start; i < end; i++) {
Document doc = searcher.doc(scoreDocs[i].doc);
LucenceVo info = new LucenceVo();
//关键词加亮
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(prefixHTML, suffixHTML);
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(textmaxlength));
// String DESCRIPTION = highlighter.getBestFragment(analyzer,LucenceVo.DESCRIPTION,doc.get(LucenceVo.DESCRIPTION));
String TITLE = highlighter.getBestFragment(analyzer,LucenceVo.TITLE,doc.get(LucenceVo.TITLE));
info.setDescription(doc.get(LucenceVo.DESCRIPTION));
if(TITLE==null)
{
TITLE = doc.get(LucenceVo.TITLE);
}
//取消重复加亮节点
TITLE = TITLE.replaceAll(suffixHTML+prefixHTML, "");
// DESCRIPTION = info.getDescription().replaceAll(suffixHTML+prefixHTML, "");
//实体赋值
info.setId(doc.get(LucenceVo.ID));
info.setTitle(TITLE);
list.add(info);
}
}catch (Exception e) {
e.printStackTrace();
}
return list;
}
public void testwirte(){
for (int i = 0; i < 10; i++) {
LucenceVo vo = new LucenceVo();
vo.setId("myid"+i);
vo.setTitle("中国今天新闻"+i);
vo.setDescription("中国今天新闻"+i+"。新闻关键字备注"+i);
vo.setType("新闻");
try{
this.updateIndex(vo);
}catch (Exception e){
e.printStackTrace();
}
}
}
public static void main(String[] args) {
MyLuceneUtil lu = MyLuceneUtil.instance();
try{
// lu.testwirte();
List<LucenceVo> list = lu.searchList("中国3 关键字",0,5);
// List<LucenceVo> list = lu.searchIndexHighlighter("关键字",0,5);
for (int i=0;i<list.size();i++){
System.out.println(list.get(i).getTitle()+"==="+list.get(i).getId());
}
}catch (Exception e){
e.printStackTrace();
}
}
}
maven 配置文件
<!-- lucence搜索引擎 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-codecs</artifactId>
<version>4.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-memory</artifactId>
<version>4.6.1</version>
</dependency>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>