Beeline 创建连接的流程
Beeline 使用标准 JDBC 连接数据库,如果邻接 Hive,则 Driver 是 org.apache.hive.jdbc.HiveDriver
。通过 HiveDriver 创建连接,生成一个 HiveConnection 对象,代码如下:
@Override
public Connection connect(String url, Properties info) throws SQLException {
return acceptsURL(url) ? new HiveConnection(url, info) : null;
}
HiveConnection
HiveConnection 的构造方法里,调用以下几个方法。
openTransport();
// set up the client
client = new TCLIService.Client(new TBinaryProtocol(transport));
// open client session
openSession();
executeInitSql();
HiveConnection#openSession
在 openSession 方法里,调用了 client.OpenSession 方法
private void openSession() throws SQLException {
// Omit some lines
TOpenSessionResp openResp = client.OpenSession(openReq);
// Omit some lines
Beeline 执行语句的流程
HiveConnection#createStatement
@Override
public Statement createStatement() throws SQLException {
if (isClosed) {
throw new SQLException("Can't create Statement, connection is closed");
}
return new HiveStatement(this, client, sessHandle, fetchSize);
}
HiveStatement#execute
执行一条 SQL 语句。
@Override
public boolean execute(String sql) throws SQLException {
runAsyncOnServer(sql);
TGetOperationStatusResp status = waitForOperationToComplete();
// The query should be completed by now
if (!status.isHasResultSet() && !stmtHandle.isHasResultSet()) {
return false;
}
resultSet = new HiveQueryResultSet.Builder(this).setClient(client).setSessionHandle(sessHandle)
.setStmtHandle(stmtHandle).setMaxRows(maxRows).setFetchSize(fetchSize)
.setScrollable(isScrollableResultset)
.build();
return true;
}
HiveStatement#runAsyncOnServer
runAsyncOnServer 通过 client.ExecuteStatement
请求 HiveServer2,并且得到 handler。
private void runAsyncOnServer(String sql) throws SQLException {
checkConnection("execute");
reInitState();
TExecuteStatementReq execReq = new TExecuteStatementReq(sessHandle, sql);
/**
* Run asynchronously whenever possible
* Currently only a SQLOperation can be run asynchronously,
* in a background operation thread
* Compilation can run asynchronously or synchronously and execution run asynchronously
*/
execReq.setRunAsync(true);
execReq.setConfOverlay(sessConf);
execReq.setQueryTimeout(queryTimeout);
try {
TExecuteStatementResp execResp = client.ExecuteStatement(execReq);
Utils.verifySuccessWithInfo(execResp.getStatus());
stmtHandle = execResp.getOperationHandle();
isExecuteStatementFailed = false;
} catch (SQLException eS) {
isExecuteStatementFailed = true;
isLogBeingGenerated = false;
throw eS;
} catch (Exception ex) {
isExecuteStatementFailed = true;
isLogBeingGenerated = false;
throw new SQLException(ex.toString(), "08S01", ex);
}
}
HiveStatement#waitForOperationToComplete
waitForOperationToComplete 不断通过 GetOperationStatus 接口,获取是否结束。
TGetOperationStatusResp waitForOperationToComplete() throws SQLException {
TGetOperationStatusReq statusReq = new TGetOperationStatusReq(stmtHandle);
boolean shouldGetProgressUpdate = inPlaceUpdateStream != InPlaceUpdateStream.NO_OP;
statusReq.setGetProgressUpdate(shouldGetProgressUpdate);
if (!shouldGetProgressUpdate) {
/**
* progress bar is completed if there is nothing we want to request in the first place.
*/
inPlaceUpdateStream.getEventNotifier().progressBarCompleted();
}
TGetOperationStatusResp statusResp = null;
// Poll on the operation status, till the operation is complete
while (!isOperationComplete) {
try {
/**
* For an async SQLOperation, GetOperationStatus will use the long polling approach It will
* essentially return after the HIVE_SERVER2_LONG_POLLING_TIMEOUT (a server config) expires
*/
statusResp = client.GetOperationStatus(statusReq);
inPlaceUpdateStream.update(statusResp.getProgressUpdateResponse());
Utils.verifySuccessWithInfo(statusResp.getStatus());
if (statusResp.isSetOperationState()) {
switch (statusResp.getOperationState()) {
case CLOSED_STATE:
case FINISHED_STATE:
isOperationComplete = true;
isLogBeingGenerated = false;
break;
case CANCELED_STATE:
// 01000 -> warning
String errMsg = statusResp.getErrorMessage();
if (errMsg != null && !errMsg.isEmpty()) {
throw new SQLException("Query was cancelled. " + errMsg, "01000");
} else {
throw new SQLException("Query was cancelled", "01000");
}
case TIMEDOUT_STATE:
throw new SQLTimeoutException("Query timed out after " + queryTimeout + " seconds");
case ERROR_STATE:
// Get the error details from the underlying exception
throw new SQLException(statusResp.getErrorMessage(), statusResp.getSqlState(),
statusResp.getErrorCode());
case UKNOWN_STATE:
throw new SQLException("Unknown query", "HY000");
case INITIALIZED_STATE:
case PENDING_STATE:
case RUNNING_STATE:
break;
}
}
} catch (SQLException e) {
isLogBeingGenerated = false;
throw e;
} catch (Exception e) {
isLogBeingGenerated = false;
throw new SQLException(e.toString(), "08S01", e);
}
}
/*
we set progress bar to be completed when hive query execution has completed
*/
inPlaceUpdateStream.getEventNotifier().progressBarCompleted();
return statusResp;
}
HiveServer 端的处理
启动
HiveServer2#init
在 HiveServer2 的 init 方法里,先创建 cliService,再根据 thrift 服务是 HTTP 还是二进制,创建不同的包装类。这两个包装类都继承 ThriftCLIService。所以,所有的请求都调用ThriftCLIService 的同名的方法。
cliService = new CLIService(this);
// Omit some lines
if (isHTTPTransportMode(hiveConf)) {
thriftCLIService = new ThriftHttpCLIService(cliService, oomHook);
} else {
thriftCLIService = new ThriftBinaryCLIService(cliService, oomHook);
}
@Override
public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException {
LOG.info("Client protocol version: " + req.getClient_protocol());
TOpenSessionResp resp = new TOpenSessionResp();
try {
SessionHandle sessionHandle = getSessionHandle(req, resp);
resp.setSessionHandle(sessionHandle.toTSessionHandle());
Map<String, String> configurationMap = new HashMap<String, String>();
// Set the updated fetch size from the server into the configuration map for the client
HiveConf sessionConf = cliService.getSessionConf(sessionHandle);
configurationMap.put(
HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE.varname,
Integer.toString(sessionConf != null ?
sessionConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE) :
hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE)));
resp.setConfiguration(configurationMap);
resp.setStatus(OK_STATUS);
ThriftCLIServerContext context =
(ThriftCLIServerContext)currentServerContext.get();
if (context != null) {
context.setSessionHandle(sessionHandle);
}
} catch (Exception e) {
LOG.warn("Error opening session: ", e);
resp.setStatus(HiveSQLException.toTStatus(e));
}
return resp;
}