本示例是对Lucene查询,对结果进行了一些处理(Lucene 3.5):
支持后缀匹配,如国* 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。
parser.setAllowLeadingWildcard(true);
2、搜索时在有通配符时可以不区分大小写:
// 有通配符时不转换大小写
parser.setLowercaseExpandedTerms(false);
3、结果进行多字段排序,详细见代码排序部分;
支持后缀匹配,如国* 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。
parser.setAllowLeadingWildcard(true);
2、搜索时在有通配符时可以不区分大小写:
// 有通配符时不转换大小写
parser.setLowercaseExpandedTerms(false);
3、结果进行多字段排序,详细见代码排序部分;
4、结果高亮显示,详细见代码高亮部分。
- package cn.test.gxg.engine.query;
- import java.io.File;
- import java.io.IOException;
- import java.io.StringReader;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.Fieldable;
- import org.apache.lucene.document.NumericField;
- import org.apache.lucene.document.Field.Store;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Searcher;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.search.highlight.Highlighter;
- import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
- import org.apache.lucene.search.highlight.QueryScorer;
- import org.apache.lucene.search.highlight.SimpleFragmenter;
- import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.LockObtainFailedException;
- import org.apache.lucene.util.Version;
- /**
- * 创建索引并查询示例
- *
- * @createTime: Feb 22, 2010 3:02:28 PM
- * @author: <a href="mailto:leader1212@sina.com.cn">天涯 </a>
- * @version: 0.1
- * @lastVersion: 0.1
- * @updateTime:
- * @updateAuthor: <a href="mailto:leader1212@sina.com.cn">天涯 </a>
- * @changesSum:
- *
- */
- public class QueryTest {
- public static void main(String[] args) {
- //索引目录 D:\workspace\code\java\TestLucene3\index\txt\test
- String INDNEX_PATH = "D:\\workspace\\code\\java\\TestLucene3\\index\\txt\\test";
- createIndex(INDNEX_PATH);
- search(INDNEX_PATH);
- }
- public static void createIndex (String indexPath) {
- // 获取中文分词器,查询的时候也要用一样的分词器。不然会导致查询结果不准确
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
- // 建立索引
- IndexWriter writer;
- NumericField nField = null;
- try {
- writer = new IndexWriter(FSDirectory.open(new File(indexPath)),
- analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
- Document doc = new Document();
- Field field = null;
- for(int i =0; i <10; i++) {
- doc = new Document();
- field = new Field("Code", "feinnocdb_App_info"+i, Field.Store.YES,
- Field.Index.ANALYZED);
- doc.add(field);
- nField = new NumericField("Id", Store.YES, true).setIntValue(i%3);
- doc.add(nField);
- field = new Field("Name", "国家名字-" + i, Field.Store.YES,
- Field.Index.ANALYZED);
- doc.add(nField);
- field = new Field("Content", "中国中华人民共和国—" + i, Field.Store.YES,
- Field.Index.ANALYZED);
- doc.add(field);
- nField = new NumericField("Type", Store.YES, true).setIntValue((i%10));
- doc.add(nField);
- nField = new NumericField("Price", Store.YES, true).setFloatValue((i%3));
- doc.add(nField);
- nField = new NumericField("Sex", Store.YES, true).setIntValue((i%2));
- doc.add(nField);
- writer.addDocument(doc);
- }
- writer.close();
- System.out.println("Indexed success!");
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- public static void search(String indexPath) {
- //获取Lucene标准分词器,可以使用其他分词器,前提是创建索引的时候也使用相同的分词器
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
- //建立索引
- try {
- IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
- QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "Content", analyzer);
- Query query = null;
- String q = "Content:国";
- try {
- query = parser.parse(q);
- // 支持后缀匹配,如*国 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。
- parser.setAllowLeadingWildcard(true);
- // 有通配符时不转换大小写
- parser.setLowercaseExpandedTerms(false);
- } catch (ParseException e) {
- e.printStackTrace();
- }
- // 多字段排序,设置在前面的会优先排序
- SortField[] sortFields = new SortField[2];
- SortField sortField = new SortField("Id", SortField.INT, true);
- SortField FIELD_SEX = new SortField("Sex", SortField.INT, false);
- sortFields[0] = sortField;
- sortFields[1] = FIELD_SEX;
- Sort sort = new Sort(sortFields);
- // 单字段排序
- /*
- SortField sortField = new SortField("Id", SortField.INT, true);
- Sort sort = new Sort(sortField);
- */
- Searcher searcher = new IndexSearcher(reader);
- // 如果不需要排序则使用注释掉的代码查询
- // TopDocs topDocs = searcher.search(query, 100);
- TopDocs topDocs = searcher.search(query, null, 1000, sort);
- System.out.println("查询语句为:" + query.toString());
- System.out.println("查询到数据条数为:" + topDocs.totalHits);
- if (topDocs.totalHits != 0) {
- // 用作高亮显示的Query语句。绝大多数情况都是使用查询的Query语句。
- // 这里为了演示,所以不那样做
- Query hilightQuery = null;
- try {
- hilightQuery = parser.parse("Content:中");
- } catch (ParseException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- // 设置需要高亮的字段值
- String[] highlightCol = {"Content", "Name"};
- Highlighter highlighter = null;
- // 关键字高亮显示设置
- // 设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀
- SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(" <FONT COLOR='RED'>", " </FONT>");
- highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(hilightQuery));
- //设置每次返回的字符数
- highlighter.setTextFragmenter(new SimpleFragmenter(1000));
- // 遍历查询的索引,得到具体索引值。
- for(ScoreDoc sd : topDocs.scoreDocs) {
- Document document = searcher.doc(sd.doc);
- for (Fieldable fa : document.getFields()) {
- String value = document.get(fa.name());
- for (String col : highlightCol) {
- if(fa.name().equals(col)) {
- //设置高显内容
- TokenStream tokenStream = analyzer.tokenStream("Content",new StringReader(value));
- value = highlighter.getBestFragment(tokenStream, value);
- }
- }
- System.out.print(fa.name() + ":" + value + " ");
- }
- System.out.println();
- }
- }
- reader.close();
- } catch (CorruptIndexException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (LockObtainFailedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (InvalidTokenOffsetsException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }