【HIVE】Hook

有一篇文章,讲 Hook. 【HIVE】Hook(钩子)函数从入门到放弃

HookRunner

HookRunner 定义了所有的hook 和所有 hook 的定义。
在 initialize 方法里,定义所有的 hook.

public void initialize() {
    if (initialized) {
      return;
    }
    initialized = true;
    queryHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.HIVE_QUERY_LIFETIME_HOOKS, QueryLifeTimeHook.class));
    saHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class));
    driverRunHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.HIVE_DRIVER_RUN_HOOKS, HiveDriverRunHook.class));
    preExecHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.PREEXECHOOKS, ExecuteWithHookContext.class));
    postExecHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.POSTEXECHOOKS, ExecuteWithHookContext.class));
    onFailureHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.ONFAILUREHOOKS, ExecuteWithHookContext.class));

    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED)) {
      queryHooks.add(new MetricsQueryLifeTimeHook());
    }
  }

QueryLifeTimeHook

定义了四个操作。

/**
 * A type of hook which triggers before query compilation and after query execution.
 */
@InterfaceAudience.Public
@InterfaceStability.Stable
public interface QueryLifeTimeHook extends Hook {

  /**
   * Invoked before a query enters the compilation phase.
   *
   * @param ctx the context for the hook
   */
  void beforeCompile(QueryLifeTimeHookContext ctx);

  /**
   * Invoked after a query compilation. Note: if 'hasError' is true,
   * the query won't enter the following execution phase.
   *
   * @param ctx the context for the hook
   * @param hasError whether any error occurred during compilation.
   */
  void afterCompile(QueryLifeTimeHookContext ctx, boolean hasError);

  /**
   * Invoked before a query enters the execution phase.
   *
   * @param ctx the context for the hook
   */
  void beforeExecution(QueryLifeTimeHookContext ctx);

  /**
   * Invoked after a query finishes its execution.
   *
   * @param ctx the context for the hook
   * @param hasError whether any error occurred during query execution.
   */
  void afterExecution(QueryLifeTimeHookContext ctx, boolean hasError);

}

HiveSemanticAnalyzerHook

定义了两个操作。

@InterfaceAudience.Public
@InterfaceStability.Stable
public interface HiveSemanticAnalyzerHook extends Hook {
  /**
   * Invoked before Hive performs its own semantic analysis on
   * a statement.  The implementation may inspect the statement AST and
   * prevent its execution by throwing a SemanticException.
   * Optionally, it may also augment/rewrite the AST, but must produce
   * a form equivalent to one which could have
   * been returned directly from Hive's own parser.
   *
   * @param context context information for semantic analysis
   *
   * @param ast AST being analyzed and optionally rewritten
   *
   * @return replacement AST (typically the same as the original AST unless the
   * entire tree had to be replaced; must not be null)
   */
  public ASTNode preAnalyze(
    HiveSemanticAnalyzerHookContext context,
    ASTNode ast) throws SemanticException;

  /**
   * Invoked after Hive performs its own semantic analysis on a
   * statement (including optimization).
   * Hive calls postAnalyze on the same hook object
   * as preAnalyze, so the hook can maintain state across the calls.
   *
   * @param context context information for semantic analysis
   * @param rootTasks root tasks produced by semantic analysis;
   * the hook is free to modify this list or its contents
   */
  public void postAnalyze(
    HiveSemanticAnalyzerHookContext context,
    List<Task<? extends Serializable>> rootTasks) throws SemanticException;
}

ReExecDriver

ReExecDriver 增加了一个 SemanticAnalyzerHook。

public ReExecDriver(QueryState queryState, String userName, QueryInfo queryInfo,
      ArrayList<IReExecutionPlugin> plugins) {
    this.queryState = queryState;
    coreDriver = new Driver(queryState, userName, queryInfo, null);
    coreDriver.getHookRunner().addSemanticAnalyzerHook(new HandleReOptimizationExplain());
    this.plugins = plugins;

    for (IReExecutionPlugin p : plugins) {
      p.initialize(coreDriver);
    }
  }

DriverFactory

public static IDriver newDriver(QueryState queryState, String userName, QueryInfo queryInfo) {
    boolean enabled = queryState.getConf().getBoolVar(ConfVars.HIVE_QUERY_REEXECUTION_ENABLED);
    if (!enabled) {
      return new Driver(queryState, userName, queryInfo);
    }

    String strategies = queryState.getConf().getVar(ConfVars.HIVE_QUERY_REEXECUTION_STRATEGIES);
    strategies = Strings.nullToEmpty(strategies).trim().toLowerCase();
    ArrayList<IReExecutionPlugin> plugins = new ArrayList<>();
    for (String string : strategies.split(",")) {
      if (string.trim().isEmpty()) {
        continue;
      }
      plugins.add(buildReExecPlugin(string));
    }

    return new ReExecDriver(queryState, userName, queryInfo, plugins);
  }
 HIVE_QUERY_REEXECUTION_ENABLED("hive.query.reexecution.enabled", true,
        "Enable query reexecutions"),
    HIVE_QUERY_REEXECUTION_STRATEGIES("hive.query.reexecution.strategies", "overlay,reoptimize",
        "comma separated list of plugin can be used:\n"
            + "  overlay: hiveconf subtree 'reexec.overlay' is used as an overlay in case of an execution errors out\n"
            + "  reoptimize: collects operator statistics during execution and recompile the query after a failure"),

DriverFactory.buildReExecPlugin

 private static IReExecutionPlugin buildReExecPlugin(String name) throws RuntimeException {
    if (name.equals("overlay")) {
      return new ReExecutionOverlayPlugin();
    }
    if (name.equals("reoptimize")) {
      return new ReOptimizePlugin();
    }
    throw new RuntimeException(
        "Unknown re-execution plugin: " + name + " (" + ConfVars.HIVE_QUERY_REEXECUTION_STRATEGIES.varname + ")");
  }

ReExecutionOverlayPlugin

ReExecutionOverlayPlugin add OnFailureHook

 @Override
  public void initialize(Driver driver) {
    this.driver = driver;
    driver.getHookRunner().addOnFailureHook(new LocalHook());
    HiveConf conf = driver.getConf();
    subtree = conf.subtree("reexec.overlay");
  }

ReOptimizePlugin

ReOptimizePlugin add 4 hooks.

@Override
  public void initialize(Driver driver) {
    coreDriver = driver;
    coreDriver.getHookRunner().addOnFailureHook(new LocalHook());
    statsReaderHook = new OperatorStatsReaderHook();
    coreDriver.getHookRunner().addOnFailureHook(statsReaderHook);
    coreDriver.getHookRunner().addPostHook(statsReaderHook);
    alwaysCollectStats = driver.getConf().getBoolVar(ConfVars.HIVE_QUERY_REEXECUTION_ALWAYS_COLLECT_OPERATOR_STATS);
    statsReaderHook.setCollectOnSuccess(alwaysCollectStats);

    coreDriver.setStatsSource(StatsSources.getStatsSource(driver.getConf()));
  }
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
apache-atlas-2.1.0-hive-hook.tar.gz是Apache Atlas项目中的一个软件包。Apache Atlas是一个开源的数据治理和元数据框架,用于收集、集成、索引和搜索数据资产。它提供了一个统一的视图来管理企业中的所有数据资产,包括表、列、模式、实体和关系等。而apache-atlas-2.1.0-hive-hook.tar.gz是Atlas项目为了与Hive集成而提供的一个插件。 Hive是一个构建在Hadoop之上的数据仓库基础设施工具,用于处理大规模的结构化数据。它提供了类似于SQL的查询和分析功能,可以将数据批量导入、导出和查询。通过与Apache Atlas的集成,可以实现对Hive中数据资产的元数据管理和治理。 在实际的应用中,apache-atlas-2.1.0-hive-hook.tar.gz可以被部署到Hive的服务器上,并与Hive的插件机制进行集成。通过配置Hive的元数据存储URL、用户名和密码等信息,Atlas可以自动从Hive中提取元数据,并将其索引到Atlas的元数据仓库中。这样,用户可以在Atlas的界面中浏览和搜索Hive中的表、列和关系,并进行数据资产的管理和治理。 此外,apache-atlas-2.1.0-hive-hook.tar.gz还提供了一些其他功能,如基于分类标签的权限控制、数据血缘追踪、数据脱敏等。通过这些功能,用户可以更好地理解和管理Hive中的数据资产,提高数据治理的效率和质量。 总之,apache-atlas-2.1.0-hive-hook.tar.gz是Apache Atlas项目中用于与Hive集成的插件,通过它可以实现对Hive中数据资产的元数据管理和数据治理。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值