package test;
import java.io.File;
import java.io.StringReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Test {
private static Log logger = LogFactory.getLog(Test.class);
public static void main(String[] args) throws Exception {
//write();
//read();
analyKeyWord("北京 全文索引");
}
public static void write() throws Exception {
File dir = new File("d:/indexdata2/");
Analyzer analyzer = new IKAnalyzer();
FSDirectory directory = FSDirectory.open(dir);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
IndexWriter writer = new IndexWriter(directory, iwc);
try{
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
doc.add(new TextField("title", "用lucene实现在一个上海", Field.Store.YES));
doc.add(new TextField("content","用lucene实现在一个(或者多个)字段中查找多个关键字", Field.Store.YES));
doc.add(new StringField("cityCode", "32057",Field.Store.YES));
writer.addDocument(doc);
//writer.deleteDocuments(new Term("title", "用lucene实现在一个上海"+i));
//writer.updateDocument(new Term("title", "用lucene实现在一个上海"+i), doc);
Document doc1 = new Document();
doc1.add(new StringField("id", "1", Field.Store.YES));
doc1.add(new TextField("title", "开源的免费北京", Field.Store.YES));
doc1.add(new TextField("content","Lucene是非常优秀的成熟的开源的的纯java语言的全文索引检索工具包", Field.Store.YES));
doc1.add(new StringField("cityCode", "32057",Field.Store.YES));
writer.addDocument(doc1);
//writer.forceMerge(1);
} catch(Exception ex) {
logger.error(ex.getMessage(),ex);
} finally{
writer.close();
}
}
public static void read() throws Exception {
File dir = new File("d:/indexdata2/");
FSDirectory directory = FSDirectory.open(dir);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
//MultiReader
/*File dir2 = new File("d:/indexdata2/");
FSDirectory directory2 = FSDirectory.open(dir2);
IndexReader reader2 = DirectoryReader.open(directory2);
MultiReader mr = new MultiReader(reader,reader2);
IndexSearcher searcher = new IndexSearcher(mr);
mr.close();*/
//对要查询的关键字进行分词
Analyzer analyzer = new IKAnalyzer();
String[] fields = {"title", "content"};
BooleanQuery query = new BooleanQuery();
TermQuery cityQuery = new TermQuery(new Term("cityCode", "32057"));
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer);
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
Query keyQuery = parser.parse("北京 全文索引");
//BooleanClause.Occur[] flags = new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
//Query keyQuery = MultiFieldQueryParser.parse(Version.LUCENE_40, keyword, fields, flags, analyzer);
query.add(cityQuery, BooleanClause.Occur.MUST);
query.add(keyQuery, BooleanClause.Occur.MUST);
//BooleanQuery
/*BooleanQuery boolquery = new BooleanQuery();
QueryParser parser = new QueryParser(Version.LUCENE_40, "title",analyzer);
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
boolquery.add(parser.parse("北京 全文检索"),BooleanClause.Occur.SHOULD);
QueryParser parser2 = new QueryParser(Version.LUCENE_40, "content",analyzer);
parser2.setDefaultOperator(QueryParser.AND_OPERATOR);
boolquery.add(parser2.parse("北京 全文检索"),BooleanClause.Occur.SHOULD);
BooleanQuery query2 = new BooleanQuery();
query2.add(cityQuery, BooleanClause.Occur.MUST);
query.add(boolquery, BooleanClause.Occur.MUST);*/
//排序
//Sort sort = new Sort(new SortField[]{SortField.FIELD_SCORE, new SortField("pubDate", SortField.Type.STRING, true)});
TopDocs results = searcher.search(query, 10);
ScoreDoc[] hits = results.scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
String title = doc.get("title");
//高亮
SimpleHTMLFormatter format = new SimpleHTMLFormatter("<font color=red>", "</font>");
Highlighter highlighter = new Highlighter(format, new QueryScorer(query));
//关键字附近字符串的截取,截取150个字符
Fragmenter fragmenter = new SimpleFragmenter(150);
highlighter.setTextFragmenter(fragmenter);
TokenStream title_tokenStream = analyzer.tokenStream("title", new StringReader(title));
title = highlighter.getBestFragment(title_tokenStream, title);
if(title == null) {
title = doc.get("title");
}
System.out.println(title);
}
System.out.println("共检索出符合条件的Document "+hits.length+" 个。");
reader.close();
}
/**
* 分词
*/
public static void analyKeyWord(String keyWord) throws Exception {
Analyzer analyzer = new IKAnalyzer();
StringReader strReader = new StringReader(keyWord);
TokenStream ts = analyzer.tokenStream(keyWord, strReader);
StringBuilder builder = new StringBuilder();
while (ts.incrementToken()) {
CharTermAttribute ta = ts.getAttribute(CharTermAttribute.class);
builder.append(ta.toString() + " ");
}
System.out.println(builder.toString());
}
}
lucene 4.0 小示例
最新推荐文章于 2019-02-26 21:22:17 发布