Beeline 和 Hive Server 2 交互源代码分析

Beeline 创建连接的流程

Beeline 使用标准 JDBC 连接数据库,如果邻接 Hive,则 Driver 是 org.apache.hive.jdbc.HiveDriver。通过 HiveDriver 创建连接,生成一个 HiveConnection 对象,代码如下:

  @Override
  public Connection connect(String url, Properties info) throws SQLException {
    return acceptsURL(url) ? new HiveConnection(url, info) : null;
  }

HiveConnection

HiveConnection 的构造方法里,调用以下几个方法。

openTransport();
// set up the client
client = new TCLIService.Client(new TBinaryProtocol(transport));
// open client session
openSession();
executeInitSql();

HiveConnection#openSession

在 openSession 方法里,调用了 client.OpenSession 方法

private void openSession() throws SQLException {
    // Omit some lines
   TOpenSessionResp openResp = client.OpenSession(openReq);
   // Omit some lines

Beeline 执行语句的流程

HiveConnection#createStatement

@Override
  public Statement createStatement() throws SQLException {
    if (isClosed) {
      throw new SQLException("Can't create Statement, connection is closed");
    }
    return new HiveStatement(this, client, sessHandle, fetchSize);
  }

HiveStatement#execute

执行一条 SQL 语句。

  @Override
  public boolean execute(String sql) throws SQLException {
    runAsyncOnServer(sql);
    TGetOperationStatusResp status = waitForOperationToComplete();

    // The query should be completed by now
    if (!status.isHasResultSet() && !stmtHandle.isHasResultSet()) {
      return false;
    }
    resultSet =  new HiveQueryResultSet.Builder(this).setClient(client).setSessionHandle(sessHandle)
        .setStmtHandle(stmtHandle).setMaxRows(maxRows).setFetchSize(fetchSize)
        .setScrollable(isScrollableResultset)
        .build();
    return true;
  }

HiveStatement#runAsyncOnServer

runAsyncOnServer 通过 client.ExecuteStatement 请求 HiveServer2,并且得到 handler。

private void runAsyncOnServer(String sql) throws SQLException {
    checkConnection("execute");

    reInitState();

    TExecuteStatementReq execReq = new TExecuteStatementReq(sessHandle, sql);
    /**
     * Run asynchronously whenever possible
     * Currently only a SQLOperation can be run asynchronously,
     * in a background operation thread
     * Compilation can run asynchronously or synchronously and execution run asynchronously
     */
    execReq.setRunAsync(true);
    execReq.setConfOverlay(sessConf);
    execReq.setQueryTimeout(queryTimeout);
    try {
      TExecuteStatementResp execResp = client.ExecuteStatement(execReq);
      Utils.verifySuccessWithInfo(execResp.getStatus());
      stmtHandle = execResp.getOperationHandle();
      isExecuteStatementFailed = false;
    } catch (SQLException eS) {
      isExecuteStatementFailed = true;
      isLogBeingGenerated = false;
      throw eS;
    } catch (Exception ex) {
      isExecuteStatementFailed = true;
      isLogBeingGenerated = false;
      throw new SQLException(ex.toString(), "08S01", ex);
    }
  }

HiveStatement#waitForOperationToComplete

waitForOperationToComplete 不断通过 GetOperationStatus 接口,获取是否结束。

TGetOperationStatusResp waitForOperationToComplete() throws SQLException {
    TGetOperationStatusReq statusReq = new TGetOperationStatusReq(stmtHandle);
    boolean shouldGetProgressUpdate = inPlaceUpdateStream != InPlaceUpdateStream.NO_OP;
    statusReq.setGetProgressUpdate(shouldGetProgressUpdate);
    if (!shouldGetProgressUpdate) {
      /**
       * progress bar is completed if there is nothing we want to request in the first place.
       */
      inPlaceUpdateStream.getEventNotifier().progressBarCompleted();
    }
    TGetOperationStatusResp statusResp = null;

    // Poll on the operation status, till the operation is complete
    while (!isOperationComplete) {
      try {
        /**
         * For an async SQLOperation, GetOperationStatus will use the long polling approach It will
         * essentially return after the HIVE_SERVER2_LONG_POLLING_TIMEOUT (a server config) expires
         */
        statusResp = client.GetOperationStatus(statusReq);
        inPlaceUpdateStream.update(statusResp.getProgressUpdateResponse());
        Utils.verifySuccessWithInfo(statusResp.getStatus());
        if (statusResp.isSetOperationState()) {
          switch (statusResp.getOperationState()) {
          case CLOSED_STATE:
          case FINISHED_STATE:
            isOperationComplete = true;
            isLogBeingGenerated = false;
            break;
          case CANCELED_STATE:
            // 01000 -> warning
            String errMsg = statusResp.getErrorMessage();
            if (errMsg != null && !errMsg.isEmpty()) {
              throw new SQLException("Query was cancelled. " + errMsg, "01000");
            } else {
              throw new SQLException("Query was cancelled", "01000");
            }
          case TIMEDOUT_STATE:
            throw new SQLTimeoutException("Query timed out after " + queryTimeout + " seconds");
          case ERROR_STATE:
            // Get the error details from the underlying exception
            throw new SQLException(statusResp.getErrorMessage(), statusResp.getSqlState(),
                statusResp.getErrorCode());
          case UKNOWN_STATE:
            throw new SQLException("Unknown query", "HY000");
          case INITIALIZED_STATE:
          case PENDING_STATE:
          case RUNNING_STATE:
            break;
          }
        }
      } catch (SQLException e) {
        isLogBeingGenerated = false;
        throw e;
      } catch (Exception e) {
        isLogBeingGenerated = false;
        throw new SQLException(e.toString(), "08S01", e);
      }
    }

    /*
      we set progress bar to be completed when hive query execution has completed
    */
    inPlaceUpdateStream.getEventNotifier().progressBarCompleted();
    return statusResp;
  }

HiveServer 端的处理

启动

HiveServer2#init

在 HiveServer2 的 init 方法里,先创建 cliService,再根据 thrift 服务是 HTTP 还是二进制,创建不同的包装类。这两个包装类都继承 ThriftCLIService。所以,所有的请求都调用ThriftCLIService 的同名的方法。

   cliService = new CLIService(this);
    // Omit some lines
    if (isHTTPTransportMode(hiveConf)) {
      thriftCLIService = new ThriftHttpCLIService(cliService, oomHook);
    } else {
      thriftCLIService = new ThriftBinaryCLIService(cliService, oomHook);
    }

@Override
  public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException {
    LOG.info("Client protocol version: " + req.getClient_protocol());
    TOpenSessionResp resp = new TOpenSessionResp();
    try {
      SessionHandle sessionHandle = getSessionHandle(req, resp);
      resp.setSessionHandle(sessionHandle.toTSessionHandle());
      Map<String, String> configurationMap = new HashMap<String, String>();
      // Set the updated fetch size from the server into the configuration map for the client
      HiveConf sessionConf = cliService.getSessionConf(sessionHandle);
      configurationMap.put(
        HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE.varname,
        Integer.toString(sessionConf != null ?
          sessionConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE) :
          hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE)));
      resp.setConfiguration(configurationMap);
      resp.setStatus(OK_STATUS);
      ThriftCLIServerContext context =
        (ThriftCLIServerContext)currentServerContext.get();
      if (context != null) {
        context.setSessionHandle(sessionHandle);
      }
    } catch (Exception e) {
      LOG.warn("Error opening session: ", e);
      resp.setStatus(HiveSQLException.toTStatus(e));
    }
    return resp;
  }
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值