1,因业务需要,需要获取最新的分区时间,本例使用的是20191011格式的分区时间,如果是2019-10-11好像也支持,如
select maxpt('default.table1');
2,引用的 pom.xml如下
properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hive.version>3.1.0</hive.version>
<hadoop.version>3.1.0</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
3,具体代码实现
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public final class MaxPtProd extends UDF {
public Text evaluate(final Text s) {
String str = s.toString();
StringBuffer sb = new StringBuffer();
List<String> list = new ArrayList();
String ss = null;
String newStr = str.toString().replace("'", "").replace("\"", "").replace("=", "");
System.out.println(newStr);
String split1 = newStr.split("\\.")[0];
String split2 = newStr.split("\\.")[1];
String fileName = sb.append("/warehouse/tablespace/managed/hive/").append(split1).append(".db/").append(split2).toString();
System.out.println(fileName);
try{
ss = getFileList(fileName);
}catch (Exception e){
System.out.println("获取结果异常" +e.getMessage());
}
;
return new Text(ss);
}
// 参考配置 https://www.jianshu.com/p/ac75432d811b
public static String getFileList(String path) throws Exception{
String res = null;
Configuration conf=new Configuration(false);
String nameservices = "gtdata";
String[] namenodesAddr = {"data02:8020","data03:8020"};
String[] namenodes = {"nn1","nn2"};
conf.set("fs.defaultFS", "hdfs://" + nameservices);
conf.set("dfs.nameservices",nameservices);
conf.set("dfs.ha.namenodes." + nameservices, namenodes[0]+","+namenodes[1]);
conf.set("dfs.namenode.rpc-address." + nameservices + "." + namenodes[0], namenodesAddr[0]);
conf.set("dfs.namenode.rpc-address." + nameservices + "." + namenodes[1], namenodesAddr[1]);
conf.set("dfs.client.failover.proxy.provider." + nameservices
,"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
String hdfsRPCUrl = "hdfs://" + nameservices + ":" + 8020;
FileSystem hdfs = FileSystem.get(URI.create(path),conf);
FileStatus[] fs = hdfs.listStatus(new Path(path));
Path[] listPath = FileUtil.stat2Paths(fs);
List<Integer> list = new ArrayList();
for(Path p : listPath){
System.out.println(p);
String s = p.toString();
// s = "ttttttttttt";
String number = s.substring(s.length()-8, s.length());
try {
Integer num = Integer.parseInt(number);
list.add(num);
}catch (Exception e){
System.out.println("异常数据," + number);
}
}
if(list.size() != 0) {
res = Collections.max(list).toString();
}
return res;
}
}
4,也可以使用 hive的元数据mysql 的 tbls和 partitions两个表做管理,获取最新的分区