/**
* lucene-core-3.3.0
* lucene-highlighter-3.3.0
*/
public class LuceneIndex {
//分词器
private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33);
//索引文件存放位置
private final String indexPath = "/home/zhanghc/luence/index/";
/**
* 创建索引
*/
public boolean createIndex() throws IOException {
Directory directory = FSDirectory.open(new File(indexPath));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(directory, config);
String [] array_content = {"在泽州县彤康食品有限公司", "屠宰加工项目建成投产", "比如肉制品均来自双汇", "雨润等大型肉类生产商"};
String [] array_name = {"泽州", "屠宰", "肉制品", "肉类生产商"};
for (int i = 0; i < 4; i++) {
Document document = new Document();
document.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
document.add(new Field("name", array_name[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
document.add(new Field("content", array_content[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
writer.addDocument(document);
}
writer.optimize();
writer.close();
return true;
}
/**
* 删除索引
* @throws ParseException
*/
public void deletes() throws IOException, ParseException{
Directory directory = FSDirectory.open(new File(indexPath));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(directory, config);
Term term = new Term("id", "0");
writer.deleteDocuments(term);
// Query query = new QueryParser(Version.LUCENE_33, "id", new StandardAnalyzer(Version.LUCENE_33)).parse("2");
// writer.deleteDocuments(query);
writer.optimize();
System.out.println(writer.numDocs());
writer.close();
}
/**
* 更新索引
* @throws IOException
*/
public void updates() throws IOException{
Directory directory = FSDirectory.open(new File(indexPath));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(directory, config);
String [] array_content = {"索引的删除,纠结了很久,看到一篇总结不错的文章,转载过来好好学习", "即使在不关闭IndexReader的情况下"};
String [] array_name = {"索引", "关闭"};
for (int i = 0; i < 2; i++) {
Document document = new Document();
Term term = new Term("id", String.valueOf(i));
document.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
document.add(new Field("name", array_name[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
document.add(new Field("content", array_content[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
writer.updateDocument(term, document);
//Updates a document by first deleting the document(s) containing term and then adding the new document. The delete and then add are atomic as seen by a reader on the same index (flush may happen only after the add).
}
System.out.println(writer.numDocs());
writer.optimize();
writer.close();
}
/**
* 单条件查询
*/
public void search(String val) throws CorruptIndexException, IOException, ParseException{
IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true);
final String field = "name";
QueryParser queryParser = new QueryParser(Version.LUCENE_33, field, analyzer);
Query query = queryParser.parse(val);
//设置高亮显示
//设置高亮显示格式
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>");
//语法高亮显示设置
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));//100似乎是表示最终输出字的个数
TopDocs result = search.search(query, 10);// 10是显示队列的Size
System.out.println("搜索结果,搜索条数为:" + result.totalHits);
for (ScoreDoc item : result.scoreDocs) {
Document document = search.doc(item.doc);
String out = document.get(field);
//搜索结果
TokenStream tream = analyzer.tokenStream(field, new StringReader(out));
try {
System.out.println(highlighter.getBestFragment(tream, out));
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
search.close();
}
/**
* 多条件查询
* @throws IOException
* @throws CorruptIndexException
* @throws ParseException
*/
public void mutilSearch(String val) throws CorruptIndexException, IOException, ParseException{
IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true);
BooleanClause.Occur [] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
//BooleanClause.Occur [] 表示多个条件之间的关系,
//BooleanClause.Occur.MUST表示必须含有
//BooleanClause.Occur.MUST_NOT表示不含有
//BooleanClause.Occur.SHOULDb表示含不含有均可
final String [] fields = {"name", "content"};
//根据 name content两个属性查询
Query query = MultiFieldQueryParser.parse(Version.LUCENE_33, val, fields, clauses, analyzer);
//设置高亮显示
//设置高亮显示格式
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>");
//语法高亮显示设置
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));//100是高亮范围
TopDocs result = search.search(query, 10);// 10是显示队列的Size
System.out.println("搜索结果,搜索条数为:" + result.totalHits);
for (ScoreDoc item : result.scoreDocs) {
Document document = search.doc(item.doc);
String out_name = document.get("name");
String out_content = document.get("content");
//搜索结果
TokenStream tream_name = analyzer.tokenStream("name", new StringReader(out_name));
TokenStream tream_content = analyzer.tokenStream("content", new StringReader(out_content));
try {
String name = highlighter.getBestFragment(tream_name, out_name);
String content = highlighter.getBestFragment(tream_content, out_content);
if(name == null)
name = out_name;
if(content == null)
content = out_content;
System.out.println(name);
System.out.println(content);
System.out.println("---------------");
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
search.close();
}
/**
* 多条件查询
* @throws IOException
* @throws CorruptIndexException
* @throws ParseException
*/
public void mutilSearch_2(String val) throws CorruptIndexException, IOException, ParseException{
IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true);
BooleanQuery booleanQuery = new BooleanQuery();
final String field = "name";
QueryParser queryParser = new QueryParser(Version.LUCENE_33, field, analyzer);
Query query = queryParser.parse(val);
booleanQuery.add(query, Occur.MUST_NOT);
final String field_2 = "content";
QueryParser queryParser_2 = new QueryParser(Version.LUCENE_33, field_2, analyzer);
Query query_2 = queryParser_2.parse(val);
booleanQuery.add(query_2, Occur.MUST);
//设置高亮显示
//设置高亮显示格式
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>");
//语法高亮显示设置
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));//100似乎是表示最终输出字的个数
TopDocs result = search.search(booleanQuery, 10);// 10是显示队列的Size
System.out.println("搜索结果,搜索条数为:" + result.totalHits);
for (ScoreDoc item : result.scoreDocs) {
Document document = search.doc(item.doc);
String out_name = document.get("name");
String out_content = document.get("content");
//搜索结果
TokenStream tream_name = analyzer.tokenStream("name", new StringReader(out_name));
TokenStream tream_content = analyzer.tokenStream("content", new StringReader(out_content));
try {
String name = highlighter.getBestFragment(tream_name, out_name);
String content = highlighter.getBestFragment(tream_content, out_content);
if(name == null)
name = out_name;
if(content == null)
content = out_content;
System.out.println(name);
System.out.println(content);
System.out.println("---------------");
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
search.close();
}
}