<span style="font-family: Arial, Helvetica, sans-serif; font-size: 12px;">package org.action;</span>
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class lol {
/**
* @param args
*/
public boolean buildIndexer(Analyzer analyzer, Directory directory, List<Fileindex> items) {
IndexWriter iwriter = null;
try {
// 创建写入索引的对象,需要索引路径和分词器
iwriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_47, analyzer));
// 删除所有document
iwriter.deleteAll();
// 将文档信息存入索引
Document doc[] = new Document[items.size()];
for (int i = 0; i < items.size(); i++) {
doc[i] = new Document();
//调用Item的get方法获取属性名并存入doc
Fileindex item = items.get(i);
//获取所有Item属性
java.lang.reflect.Field[] fields = item.getClass().getDeclaredFields();
for (java.lang.reflect.Field field : fields) {
//获取每个属性名
String fieldName = field.getName();
//拼出get方法名
String getMethodName = "get"+toFirstLetterUpperCase(fieldName);
//调用get方法获取属性值
Object obj = item.getClass().getMethod(getMethodName).invoke(item);
//加入document TextField.TYPE_STORED存储完整属性
doc[i].add(new Field(fieldName, (String)obj, TextField.TYPE_STORED));
}
iwriter.addDocument(doc[i]);
}
} catch (Exception e) {
e.printStackTrace();
return false;
}finally {
try {
iwriter.close();
} catch (IOException e) {
}
}
return true;
}
/**
* 根据keyword搜索索引
* @param analyzer
* @param directory
* @param keyword
* @return
*/
public List<Fileindex> searchIndexer(Analyzer analyzer, Directory directory, String[] keyword) {
DirectoryReader ireader = null;
List<Fileindex> result = new ArrayList<Fileindex>();
try {
// 设定搜索目录
ireader = DirectoryReader.open(directory);
//创建搜索对象
IndexSearcher isearcher = new IndexSearcher(ireader);
// 对多field进行搜索,获取Item类的所有属性名
java.lang.reflect.Field[] fields = Fileindex.class.getDeclaredFields();
int length = fields.length;
String[] multiFields = new String[length];
for (int i = 0; i < length; i++) {
multiFields[i] = fields[i].getName();
}
//创造布尔查询语句
BooleanQuery bQuery = new BooleanQuery(); //组合查询
// 关键字之间用或关系
BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD}; //OR 或者
//对多属性进行搜索
MultiFieldQueryParser parser = new MultiFieldQueryParser(
Version.LUCENE_47, multiFields, analyzer);
parser.setDefaultOperator(QueryParser.OR_OPERATOR);
// 设定具体的搜索词
int length1=keyword.length;
for(int i = 0; i < length1; i++){
@SuppressWarnings("static-access")
Query query = parser.parse(Version.LUCENE_47, keyword[i], multiFields, flags, analyzer);
bQuery.add(query,BooleanClause.Occur.SHOULD);
}
//接收结果,前100条
TopDocs top=isearcher.search(bQuery, null, 100);
ScoreDoc[] hits = top.scoreDocs;
// highlighter.setMaxDocCharsToAnalyze(1);//设置高亮处理的字符个数
for(ScoreDoc sd:top.scoreDocs){
Document doc=isearcher.doc(sd.doc);
Fileindex item = new Fileindex();
java.lang.reflect.Field[] fields2 = item.getClass().getDeclaredFields();
for (java.lang.reflect.Field field : fields2) {
//获取每个属性名
String fieldName = field.getName();
//拼出get方法名
String setMethodName = "set"+toFirstLetterUpperCase(fieldName);
//调用get方法获取属性值
QueryScorer score=new QueryScorer(bQuery,fieldName);//传入评分
SimpleHTMLFormatter fors=new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");//定制高亮标签
Highlighter highlighter=new Highlighter(fors,score);//高亮分析器
String filed=field.getName();
String name=doc.get(filed);
TokenStream token=TokenSources.getAnyTokenStream(isearcher.getIndexReader(), sd.doc, filed, new StandardAnalyzer(Version.LUCENE_47));//获取tokenstream
Fragmenter fragment=new SimpleSpanFragmenter(score);
highlighter.setTextFragmenter(fragment);
String str=highlighter.getBestFragment(token, name);//获取高亮的片段,可以对其数量进行限制
if(str==null)str=doc.get(filed);
Object obj = item.getClass().getMethod(setMethodName,new Class[] { field.getType() }).invoke(item,str);
//加入document TextField.TYPE_STORED存储完整属性
}
//调用Item的set方法为Item赋值
result.add(item);
}
//加入list
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
try {
ireader.close();
directory.close();
} catch (IOException e) {
}
}
return result;
}
public void deleteIndex(Analyzer analyzer, Directory directory,String id) throws IOException {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);// 创建索引的配置信息
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
indexWriter.deleteDocuments(new Term("id",id));
indexWriter.commit();
// indexWriter.rollback();
indexWriter.close();
}
/**
* 首字母转大写
* @param str
* @return
*/
public static String toFirstLetterUpperCase(String str) {
if(str == null || str.length() < 2){
return str;
}
//第一个字符大写,后面正常输出
return str.substring(0, 1).toUpperCase() + str.substring(1, str.length());
}
public static void main(String[] args) {
// TODO Auto-generated method stub
lol demo = new lol();
//创建分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
//模拟搜索数据
List<Fileindex> items = new ArrayList<Fileindex>();
String[] a={"asd","bef"};
String[] b={"fdfd","ghhhh"};
String[] c={"llld","vvv"};
String[] d={"nnn","qqqq"};
items.add(new Fileindex("4","我","啊","搜索" ,"我是中国人"));
items.add(new Fileindex("5", "坏","哈哈","吼吼","我叫何瑞"));
// 索引存到内存中的目录
//Directory directory = new RAMDirectory();
// 索引存储到硬盘,存储路径
File file = new File("d:/lucene");
Directory directory = null;
try {
directory = FSDirectory.open(file);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//创建索引
// demo.test(directory);
demo.buildIndexer(analyzer, directory, items);
//搜索关键词的结果
String[] keyword={"我是","何瑞"};
List<Fileindex> result = demo.searchIndexer(analyzer, directory, keyword);
//输入Item所有属性值
for (Fileindex item : result) {
System.out.println(item.toString());
}
Directory directory2 = null;
try {
directory2 = FSDirectory.open(file);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
demo.deleteIndex(analyzer, directory2, "5");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
List<Fileindex> result2 = demo.searchIndexer(analyzer, directory2, keyword);
for (Fileindex item : result2) {
System.out.println(item.toString());
}
}
}
package org.action;
public class Fileindex {
private String id;
private String title;
private String titlekeyword;
private String filekeyword;
private String content;
public Fileindex() {
}
public Fileindex(String id, String title, String titlekeyword,String filekeyword ,String content) {
this.id = id;
this.title=title;
this.titlekeyword=titlekeyword;
this.filekeyword=filekeyword;
this.content = content;
}
public String getId() {
return id;
}
public void setId(String string) {
this.id = string;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getTitlekeyword() {
return titlekeyword;
}
public void setTitlekeyword(String titlekeyword) {
this.titlekeyword = titlekeyword;
}
public String getFilekeyword() {
return filekeyword;
}
public void setFilekeyword(String filekeyword) {
this.filekeyword = filekeyword;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String toString() {
//把所有的属性拼成一个字符串
StringBuilder sb = new StringBuilder();
sb.append("[id=").append(id).append(",title=").append(title).append(",titlekeyword=").append(titlekeyword).append(",filekeyword=").append(filekeyword)
.append(",content=").append(content).append("]");
return sb.toString();
}
}
这样存在一个问题,就是搜中国人的中字 时没有结果,因为单字没有加入索引,
//*********************************
//再对当前指针位置的字符进行单字匹配
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成词
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
context.addLexeme(newLexeme);
//同时也是词前缀
if(singleCharHit.isPrefix()){
//前缀匹配则放入hit列表
this.tmpHits.add(singleCharHit);
}
}else if(singleCharHit.isPrefix()){//首字为词前缀
//前缀匹配则放入hit列表
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
context.addLexeme(newLexeme);
this.tmpHits.add(singleCharHit);
}
所以就稍微改了一下源码CJKSegmenter.java