【hive】获取hive最新分区UDF函数

在离线数仓中,会碰到事实表关联维表时只需要维表最新分区的数据关联即可,这时可以编写UDF函数获取表的最新分区,下面代码是通过遍历HDFS文件路径获取最新分区。

<repositories>
        <repository>
            <id>mvnrepository</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
        <repository>
            <id>cloudera</id>
            <url>https://mvnrepository.com/artifact</url>
        </repository>
        <repository>
            <id>aliyun</id>
            <url>http://maven.aliyun.com/mvn/view</url>
        </repository>
        <repository>
            <id>jboss</id>
            <url>http://repository.jboss.com/nexus/content/groups/public</url>
        </repository>
    </repositories>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.7</maven.compiler.source>
        <maven.compiler.target>1.7</maven.compiler.target>
        <hadoop.version>2.7.6</hadoop.version>
        <hive.version>2.1.1</hive.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>log4j</groupId>
                    <artifactId>log4j</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
        </dependency>
    </dependencies>
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;

import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class GetNewestPatition extends UDF {

    public Text evaluate(Text tableName) {

        StringBuffer sb = new StringBuffer();
        String newestPatition = null;
        String split1 = tableName.toString().split("\\.")[0];
        String split2 = tableName.toString().split("\\.")[1];
        String fileName = sb.append("/user/hive/warehouse-3.1.1/").append(split1).append(".db/").append(split2).toString();
        try{
            newestPatition = getFileList(fileName);
        }catch (Exception e){
            System.out.println("获取结果异常" +e.getMessage());
        }
        ;
        return new Text(newestPatition);
        //return newestPatition;
    }

    public static String getFileList(String path) throws Exception{
        String res = null;

        Configuration conf=new Configuration(false);
        conf.set("fs.default.name", "hdfs://192.168.235.66:8020/");
        FileSystem hdfs = FileSystem.get(URI.create(path),conf);
        FileStatus[] fs = hdfs.listStatus(new Path(path));
        Path[] listPath = FileUtil.stat2Paths(fs);

        List<String> list = new ArrayList();
        for(Path p : listPath){
            String s = p.toString();
            String partition = s.split("=")[1];
            list.add(partition);
        }
        if(list.size() != 0) {
            res = Collections.max(list).toString();
        }
        return  res;
    }
}

然后用maven打包,上传到HDFS上

hdfs dfs -put hive-UDF-1.0.0-1.0-SNAPSHOT.jar /user/hive/udf/

登陆hive客户端

create function getnewest_partition as 'com.fuyun.udf.UDFLatestPartition' using jar 'hdfs:/user/hive/udf/hive-UDF-1.0.0-1.0-SNAPSHOT.jar';

select getnewest_partition('temp.temp_partition1_tb');

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值