有一篇文章,讲 Hook. 【HIVE】Hook(钩子)函数从入门到放弃
HookRunner
HookRunner 定义了所有的hook 和所有 hook 的定义。
在 initialize 方法里,定义所有的 hook.
public void initialize() {
if (initialized) {
return;
}
initialized = true;
queryHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.HIVE_QUERY_LIFETIME_HOOKS, QueryLifeTimeHook.class));
saHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class));
driverRunHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.HIVE_DRIVER_RUN_HOOKS, HiveDriverRunHook.class));
preExecHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.PREEXECHOOKS, ExecuteWithHookContext.class));
postExecHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.POSTEXECHOOKS, ExecuteWithHookContext.class));
onFailureHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.ONFAILUREHOOKS, ExecuteWithHookContext.class));
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED)) {
queryHooks.add(new MetricsQueryLifeTimeHook());
}
}
QueryLifeTimeHook
定义了四个操作。
/**
* A type of hook which triggers before query compilation and after query execution.
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public interface QueryLifeTimeHook extends Hook {
/**
* Invoked before a query enters the compilation phase.
*
* @param ctx the context for the hook
*/
void beforeCompile(QueryLifeTimeHookContext ctx);
/**
* Invoked after a query compilation. Note: if 'hasError' is true,
* the query won't enter the following execution phase.
*
* @param ctx the context for the hook
* @param hasError whether any error occurred during compilation.
*/
void afterCompile(QueryLifeTimeHookContext ctx, boolean hasError);
/**
* Invoked before a query enters the execution phase.
*
* @param ctx the context for the hook
*/
void beforeExecution(QueryLifeTimeHookContext ctx);
/**
* Invoked after a query finishes its execution.
*
* @param ctx the context for the hook
* @param hasError whether any error occurred during query execution.
*/
void afterExecution(QueryLifeTimeHookContext ctx, boolean hasError);
}
HiveSemanticAnalyzerHook
定义了两个操作。
@InterfaceAudience.Public
@InterfaceStability.Stable
public interface HiveSemanticAnalyzerHook extends Hook {
/**
* Invoked before Hive performs its own semantic analysis on
* a statement. The implementation may inspect the statement AST and
* prevent its execution by throwing a SemanticException.
* Optionally, it may also augment/rewrite the AST, but must produce
* a form equivalent to one which could have
* been returned directly from Hive's own parser.
*
* @param context context information for semantic analysis
*
* @param ast AST being analyzed and optionally rewritten
*
* @return replacement AST (typically the same as the original AST unless the
* entire tree had to be replaced; must not be null)
*/
public ASTNode preAnalyze(
HiveSemanticAnalyzerHookContext context,
ASTNode ast) throws SemanticException;
/**
* Invoked after Hive performs its own semantic analysis on a
* statement (including optimization).
* Hive calls postAnalyze on the same hook object
* as preAnalyze, so the hook can maintain state across the calls.
*
* @param context context information for semantic analysis
* @param rootTasks root tasks produced by semantic analysis;
* the hook is free to modify this list or its contents
*/
public void postAnalyze(
HiveSemanticAnalyzerHookContext context,
List<Task<? extends Serializable>> rootTasks) throws SemanticException;
}
ReExecDriver
ReExecDriver 增加了一个 SemanticAnalyzerHook。
public ReExecDriver(QueryState queryState, String userName, QueryInfo queryInfo,
ArrayList<IReExecutionPlugin> plugins) {
this.queryState = queryState;
coreDriver = new Driver(queryState, userName, queryInfo, null);
coreDriver.getHookRunner().addSemanticAnalyzerHook(new HandleReOptimizationExplain());
this.plugins = plugins;
for (IReExecutionPlugin p : plugins) {
p.initialize(coreDriver);
}
}
DriverFactory
public static IDriver newDriver(QueryState queryState, String userName, QueryInfo queryInfo) {
boolean enabled = queryState.getConf().getBoolVar(ConfVars.HIVE_QUERY_REEXECUTION_ENABLED);
if (!enabled) {
return new Driver(queryState, userName, queryInfo);
}
String strategies = queryState.getConf().getVar(ConfVars.HIVE_QUERY_REEXECUTION_STRATEGIES);
strategies = Strings.nullToEmpty(strategies).trim().toLowerCase();
ArrayList<IReExecutionPlugin> plugins = new ArrayList<>();
for (String string : strategies.split(",")) {
if (string.trim().isEmpty()) {
continue;
}
plugins.add(buildReExecPlugin(string));
}
return new ReExecDriver(queryState, userName, queryInfo, plugins);
}
HIVE_QUERY_REEXECUTION_ENABLED("hive.query.reexecution.enabled", true,
"Enable query reexecutions"),
HIVE_QUERY_REEXECUTION_STRATEGIES("hive.query.reexecution.strategies", "overlay,reoptimize",
"comma separated list of plugin can be used:\n"
+ " overlay: hiveconf subtree 'reexec.overlay' is used as an overlay in case of an execution errors out\n"
+ " reoptimize: collects operator statistics during execution and recompile the query after a failure"),
DriverFactory.buildReExecPlugin
private static IReExecutionPlugin buildReExecPlugin(String name) throws RuntimeException {
if (name.equals("overlay")) {
return new ReExecutionOverlayPlugin();
}
if (name.equals("reoptimize")) {
return new ReOptimizePlugin();
}
throw new RuntimeException(
"Unknown re-execution plugin: " + name + " (" + ConfVars.HIVE_QUERY_REEXECUTION_STRATEGIES.varname + ")");
}
ReExecutionOverlayPlugin
ReExecutionOverlayPlugin add OnFailureHook
@Override
public void initialize(Driver driver) {
this.driver = driver;
driver.getHookRunner().addOnFailureHook(new LocalHook());
HiveConf conf = driver.getConf();
subtree = conf.subtree("reexec.overlay");
}
ReOptimizePlugin
ReOptimizePlugin add 4 hooks.
@Override
public void initialize(Driver driver) {
coreDriver = driver;
coreDriver.getHookRunner().addOnFailureHook(new LocalHook());
statsReaderHook = new OperatorStatsReaderHook();
coreDriver.getHookRunner().addOnFailureHook(statsReaderHook);
coreDriver.getHookRunner().addPostHook(statsReaderHook);
alwaysCollectStats = driver.getConf().getBoolVar(ConfVars.HIVE_QUERY_REEXECUTION_ALWAYS_COLLECT_OPERATOR_STATS);
statsReaderHook.setCollectOnSuccess(alwaysCollectStats);
coreDriver.setStatsSource(StatsSources.getStatsSource(driver.getConf()));
}