hive 删除大分区表(多级分区)时遇到超时报错;
删除表报错
metastore.ObjectStore: Falling back to ORM path due to direct SQL failure (this is not an error): Timeout when executing method: drop_table_with_environment_context; 605198ms exceeds 600000ms at org.apache.hadoop.hive.metastore.Deadline.newMetaException(Deadline.java:178) at org.apache.hadoop.hive.metastore.Deadline.check(Deadline.java:168) at org.apache.hadoop.hive.metastore.Deadline.checkTimeout(Deadline.java:147) at org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsFromPartitionIds(MetaStoreDirectSql.java:529) at org.apache.hadoop.hive.metastore.MetaStoreDirectSql.access$800(MetaStoreDirectSql.java:92) at org.apache.hadoop.hive.metastore.MetaStoreDirectSql$2.run(MetaStoreDirectSql.java:492) at org.apache.hadoop.hive.metastore.MetaStoreDirectSql.runBatched(MetaStoreDirectSql.java:1819) at org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsViaSqlFilterInternal(MetaStoreDirectSql.java:490) at org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitions(MetaStoreDirectSql.java:405) at org.apache.hadoop.hive.metastore.ObjectStore$2.getSqlResult(ObjectStore.java:2112);
Caused by: Timeout when executing method: drop_table_with_environment_context; 605198ms exceeds 600000ms at org.apache.hadoop.hive.metastore.Deadline.check(Deadline.java:164)
2022-04-08T06:36:04,979 ERROR [e2facd60-8054-493b-8470-c69444defa91 main] metastore.RetryingHMSHandler: Error happens in method drop_table_with_environment_context: MetaException(message:Timeout when executing method: drop_table_with_environment_context; 613553ms exceeds 600000ms)
at org.apache.hadoop.hive.metastore.Deadline.newMetaException(Deadline.java:178)
at org.apache.hadoop.hive.metastore.Deadline.check(Deadline.java:168)
at org.apache.hadoop.hive.metastore.Deadline.checkTimeout(Deadline.java:147)
at org.apache.hadoop.hive.metastore.ObjectStore.convertToParts(ObjectStore.java:2207)
at org.apache.hadoop.hive.metastore.ObjectStore.convertToParts(ObjectStore.java:2194)
at org.apache.hadoop.hive.metastore.ObjectStore.access$200(ObjectStore.java:182)
at org.apache.hadoop.hive.metastore.ObjectStore$2.getJdoResult(ObjectStore.java:2119)
at org.apache.hadoop.hive.metastore.ObjectStore$2.getJdoResult(ObjectStore.java:2108)
at org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.run(ObjectStore.java:2974)
at org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsInternal(ObjectStore.java:2124)
at org.apache.hadoop.hive.metastore.ObjectStore.getPartitions(ObjectStore.java:2102)
解决方案
hive-site.xml 新增
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>7200s</value>
</property>
问题排查
通过源码查看,发下是 MetaStoreDirectSql
执行底层 SQL 操作时,内部 Deadline.checkTimeout();
下面代码为对应被调用代码:
// 执行的SQL操作,可以看到有很多的关联操作
// Get most of the fields for the IDs provided.
// Assume db and table names are the same for all partition, as provided in arguments.
String queryText =
"select \"PARTITIONS\".\"PART_ID\", \"SDS\".\"SD_ID\", \"SDS\".\"CD_ID\","
+ " \"SERDES\".\"SERDE_ID\", \"PARTITIONS\".\"CREATE_TIME\","
+ " \"PARTITIONS\".\"LAST_ACCESS_TIME\", \"SDS\".\"INPUT_FORMAT\", \"SDS\".\"IS_COMPRESSED\","
+ " \"SDS\".\"IS_STOREDASSUBDIRECTORIES\", \"SDS\".\"LOCATION\", \"SDS\".\"NUM_BUCKETS\","
+ " \"SDS\".\"OUTPUT_FORMAT\", \"SERDES\".\"NAME\", \"SERDES\".\"SLIB\" "
+ "from \"PARTITIONS\""
+ " left outer join \"SDS\" on \"PARTITIONS\".\"SD_ID\" = \"SDS\".\"SD_ID\" "
+ " left outer join \"SERDES\" on \"SDS\".\"SERDE_ID\" = \"SERDES\".\"SERDE_ID\" "
+ "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc";
public static void checkTimeout() throws MetaException {
Deadline deadline = getCurrentDeadline();
if (deadline != null) {
deadline.check();
} else {
throw newMetaException(new DeadlineException("The threadlocal Deadline is null," +
" please register it first."));
}
}
private static final long NO_DEADLINE = Long.MIN_VALUE;
private void check() throws MetaException{
try {
if (startTime == NO_DEADLINE) {
throw new DeadlineException("Should execute startTimer() method before " +
"checkTimeout. Error happens in method: " + method);
}
long elapsedTime = System.nanoTime() - startTime;
// timeoutNanos 的初始化是在 RawStoreProxy 内调用 Deadline.registerIfNot(socketTimeout);
// socketTimeout 是hive 服务端的socket 配置,"hive.metastore.client.socket.timeout", 默认为"600s"
if (elapsedTime > timeoutNanos) {
throw new DeadlineException("Timeout when executing method: " + method + "; "
+ (elapsedTime / 1000000L) + "ms exceeds " + (timeoutNanos / 1000000L) + "ms");
}
} catch (DeadlineException e) {
throw newMetaException(e);
}
}