QueryParser.jj 文件全面解析

最新推荐文章于 2024-05-15 09:36:11 发布

2哥的成长记录

最新推荐文章于 2024-05-15 09:36:11 发布

阅读量6.6k

点赞数

分类专栏： javacc 文章标签： query string wildcard deprecated character exception

本文链接：https://blog.csdn.net/zyb243380456/article/details/7225620

版权

QueryParser.jj文件详细解析Lucene的查询语法，包括AND、OR、NOT操作符，通配符查询，模糊查询，范围查询等，并讨论了查询字段的关联和修饰符，以及日期处理和国际化设置。

摘要由CSDN通过智能技术生成

/**

 *0.概要

 *  javacc是根据特定模式的字符窜来生成不同的query查询对象

 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

options {
  STATIC=false;
  JAVA_UNICODE_ESCAPE=true;
  USER_CHAR_STREAM=true;
}

PARSER_BEGIN(QueryParser)

package org.apache.lucene.queryParser;

import java.io.IOException;
import java.io.StringReader;
import java.text.Collator;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.VirtualMethod;

/**
 * This class is generated by JavaCC.  The most important method is
 * {@link #parse(String)}.
 *
 * The syntax for query strings is as follows:
 * A Query is a series of clauses.
 * A clause may be prefixed by:
 * <ul>
 * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
 * that the clause is required or prohibited respectively; or
 * <li> a term followed by a colon, indicating the field to be searched.
 * This enables one to construct queries which search multiple fields.
 * </ul>
 *
 * A clause may be either:
 * <ul>
 * <li> a term, indicating all the documents that contain this term; or
 * <li> a nested query, enclosed in parentheses.  Note that this may be used
 * with a <code>+</code>/<code>-</code> prefix to require any of a set of
 * terms.
 * </ul>
 *
 * Thus, in BNF, the query grammar is:
 * <pre>
 *   Query  ::= ( Clause )*
 *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
 * </pre>
 *
 * <p>
 * Examples of appropriately formatted queries can be found in the <a
 * href="../../../../../../queryparsersyntax.html">query syntax
 * documentation</a>.
 * </p>
 *
 * <p>
 * In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g.
 * <tt>date:[6/1/2005 TO 6/4/2005]</tt> produces a range query that searches
 * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format
 * of the accepted input depends on {@link #setLocale(Locale) the locale}.
 * By default a date is converted into a search term using the deprecated
 * {@link DateField} for compatibility reasons.
 * To use the new {@link DateTools} to convert dates, a
 * {@link org.apache.lucene.document.DateTools.Resolution} has to be set.
 * </p>
 * <p>
 * The date resolution that shall be used for RangeQueries can be set
 * using {@link #setDateResolution(DateTools.Resolution)}
 * or {@link #setDateResolution(String, DateTools.Resolution)}. The former
 * sets the default date resolution for all fields, whereas the latter can
 * be used to set field specific date resolutions. Field specific date
 * resolutions take, if set, precedence over the default date resolution.
 * </p>
 * <p>
 * If you use neither {@link DateField} nor {@link DateTools} in your
 * index, you can create your own
 * query parser that inherits QueryParser and overwrites
 * {@link #getRangeQuery(String, String, String, boolean)} to
 * use a different method for date conversion.
 * </p>
 *
 * <p>Note that QueryParser is <em>not</em> thread-safe.</p> 
 * 
 * <p><b>NOTE</b>: there is a new QueryParser in contrib, which matches
 * the same syntax as this class, but is more modular,
 * enabling substantial customization to how a query is created.
 *
 * <a name="version"/>
 * <p><b>NOTE</b>: You must specify the required {@link Version}
 * compatibility when creating QueryParser:
 * <ul>
 *    <li> As of 2.9, {@link #setEnablePositionIncrements} is true by
 *         default.
 *    <li> As of 3.1, {@link #setAutoGeneratePhraseQueries} is false by
 *         default.
 * </ul>
 */
public class QueryParser {
  
  /** 查询字段和查询字段的关系连接符 */
  private static final int CONJ_NONE   = 0;/** 查询字段之间采用默认的方式进行一个关联 */
  private static final int CONJ_AND    = 1;/** 两个查询字段之间采用AND的关系进行关联 */
  private static final int CONJ_OR     = 2;/** 两个查询字段之间采用OR的关系进行关联 */

  /** 查询字段的重要级别修饰符号 */
  private static final int MOD_NONE    = 0;/** 表示在一个查询字段前面没有修饰符号 */
  private static final int MOD_NOT     = 10;/** 查询字段前面是-表示-f:some查询结果必须不满足这个查询字段的条件 */
  private static final int MOD_REQ     = 11;/** 查询字段前面是+表示+f:some查询结果必须满足这个查询字段的条件 */

  /** 设置默认的查询字段之间的关联关系符号的时候不需要使用一个嵌套的内部类而可以直接访问一下成员就可以了 */
  /** Alternative form of QueryParser.Operator.AND */
  public static final Operator AND_OPERATOR = Operator.AND;
  /** Alternative form of QueryParser.Operator.OR */
  public static final Operator OR_OPERATOR = Operator.OR;

  /** 解析器使用这个操作符号去联合其他查询的域 */
  private Operator operator = OR_OPERATOR;

  boolean lowercaseExpandedTerms = true;/** 将正在解析的term字符窜小写 */
  MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
  boolean allowLeadingWildcard = false;/** 是否允许查询关键词字符窜的第一个字符是通配符* */
  boolean enablePositionIncrements = true;

  Analyzer analyzer;/** 语法解析器所用到分词器 */
  String field;/** 默认的查询字段名称 */
  int phraseSlop = 0; /** 设置短语查询或者模糊查询词语和字母之间的间隔是多少 */
  float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; /** 设置模糊查询的最小相似度 */
  int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;/** 如果是模糊查询设置前缀模糊查询前缀的最多个字符数 */
  Locale locale = Locale.getDefault();/** 国际化操作参数 */

  /** 默认的查询字符窜日期处理方案 */
  DateTools.Resolution dateResolution = null;
  // maps field names to date resolutions
  Map<String,DateTools.Resolution> fieldToDateResolution = null;

  // The collator to use when determining range inclusion,
  // for use when constructing RangeQuerys.
  Collator rangeCollator = null;

  /** @deprecated remove when getFieldQuery is removed */
  @Deprecated
  private static final VirtualMethod<QueryParser> getFieldQueryMethod =
    new VirtualMethod<QueryParser>(QueryParser.class, "getFieldQuery", String.class, String.class);
  /** @deprecated remove when getFieldQuery is removed */
  @Deprecated
  private static final VirtualMethod<QueryParser> getFieldQueryWithQuotedMethod =
    new VirtualMethod<QueryParser>(QueryParser.class, "getFieldQuery", String.class, String.class, boolean.class);
  /** @deprecated remove when getFieldQuery is removed */
  @Deprecated
  private final boolean hasNewAPI = 
    VirtualMethod.compareImplementationDistance(getClass(), 
        getFieldQueryWithQuotedMethod, getFieldQueryMethod) >= 0; // its ok for both to be overridden

  private boolean autoGeneratePhraseQueries;

  /** The default operator for parsing queries. 
   * Use {@link QueryParser#setDefaultOperator} to change it.
   */
  static public enum Operator { OR, AND }

  /** 构造一个语法解析器
   *  @param matchVersion  合适的Lucene版本
   *  @param f  默认的查询字段
   *  @param a  用于在查询字符窜中找出关键词的分词器
   */
  public QueryParser(Version matchVersion, String f, Analyzer a) {
    this(new FastCharStream(new StringReader("")));
    analyzer = a;
    field = f;
    if (matchVersion.onOrAfter(Version.LUCENE_29)) {
      enablePositionIncrements = true;
    } else {
      enablePositionIncrements = false;
    }
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      setAutoGeneratePhraseQueries(false);
    } else {
      setAutoGeneratePhraseQueries(true);
    }
  }

  /**
   *  语法分析从这里开始
   *  解析一段字符窜, 返回一个Query的实现的类的对象
   *  @param query  需要解析的查询字符窜
   *  @如果解析失败抛出ParseException异常
   */
  public Query parse(String query) throws ParseException {
    ReInit(new FastCharStream(new StringReader(query)));/** 再一次初始化几号管理器将我们的查询字符窜交给几号管理其管理 */
    try {
      Query res = TopLevelQuery(field);/** 正式开始调用方法解析 */
      return res!=null ? res : newBooleanQuery(false);
    }
    catch (ParseException tme) {
      ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage());
      e.initCause(tme);
      throw e;
    }
    catch (TokenMgrError tme) {
      ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage());
      e.initCause(tme);
      throw e;
    }
    catch (BooleanQuery.TooManyClauses tmc) {
      ParseException e = new ParseException("Cannot parse '" +query+ "': too many boolean clauses");
      e.initCause(tmc);
      throw e;
    }
  }

   /**
   * @返回当前语法分析器所使用的分词器
   */
  public A

最低0.47元/天解锁文章

2哥的成长记录

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
QueryParser.jj 文件全面解析

/** *0.概要 * javacc是根据特定模式的字符窜来生成不同的query查询对象 * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work
复制链接

扫一扫

专栏目录