1.创建临时函数:
同过自定义函数实现,每次使用时执行 前面两行自定义函数的语句,然后使用函数。
ADD JAR hdfs://nameservice1/data/jar/max-partition.jar;
CREATE TEMPORARY FUNCTION maxPart AS 'com.mingyang.MaxPartition'USING JAR 'hdfs://nameservice1/data/jar/max-partition.jar';
select maxPart('dw','dwd_actor_exhibition');
select maxPart('dw','dwd_actor_exhibition',database_name);
JAR中关键代码-从hive元数据库中编写SQL
public String getSelSql(String dbName,String tbName,String partName){
StringBuilder sb = new StringBuilder();
String mainSql = "SELECT dt from (SELECT\n" +
"\tc.NAME AS db_name,\n" +
"\tb.TBL_NAME AS tbl_name,\n" +
"\tsubstr( SUBSTRING_INDEX( a.PART_NAME, \"/\", 1 ), 15 ) AS part_name,\n" +
"\tmax(RIGHT ( a.PART_NAME, 10 )) AS dt \n" +
"FROM\n" +
"\thive.`PARTITIONS` a\n" +
"\tLEFT JOIN hive.TBLS b ON a.TBL_ID = b.TBL_ID\n" +
"\tLEFT JOIN hive.DBS c ON b.DB_ID = c.DB_ID \n" +
"GROUP BY c.NAME,b.TBL_NAME,substr(SUBSTRING_INDEX( a.PART_NAME, \"/\", 1 ),15)) t";
//where t.db_name = 'dw' and t.tbl_name = 'app_company_contact'
sb.append(mainSql);
sb.append(" where t.db_name ='");
sb.append(dbName + "'");
sb.append(" and t.tbl_name ='");
sb.append(tbName + "'");
if(!StringUtils.isEmpty(partName)){
sb.append(" and t.part_name = '");
sb.append(partName + "'");
}
return sb.toString();
}
2. 创建永久函数:
1)本地打包上传到服务器
2)在hdfs上创建目录
hadoop fs -mkdir /user/lib/
3)将jar包从服务器上传到hdfs的指定目录,例如/user/lib/目录
hadoop fs -put /data/jar/max-partition.jar /user/lib/
4)设置文件的可读权限
hadoop fs -chmod a+x /user/lib/max-partition.jar
5)创建永久自定义函数drop function maxPart;
CREATE FUNCTION maxPart AS 'com.mingyang.GetMaxPartitionUDF'USING JAR 'hdfs://nameservice1/user/lib/max-partition.jar',JAR 'hdfs://nameservice1/user/lib/common_udf.jar';
reload functions;
reload function;
效率提高效果验证: