hive 自定义函数在GROUP BY中异常不抛出，但丢失数据

最新推荐文章于 2023-11-28 17:05:53 发布

houzhizhen

最新推荐文章于 2023-11-28 17:05:53 发布

阅读量1.6k

点赞数

分类专栏： hive

本文链接：https://blog.csdn.net/houzhizhen/article/details/39896689

版权

hive 专栏收录该内容

154 篇文章 15 订阅

订阅专栏

hive GROUP BY 不同的条件数据不一样，查了一天，原来自定义函数出了问题。如果自定义函数出错，任务也会成功。但数据会丢失一些。

原UDF

public class Drag extends UDF {

   private static final Pattern pattern = Pattern.compile("dr=");

   public Drag() {
   }

   public String evaluate(String key) {

           if (pattern.matcher(key).find()) {
               key = key.split("dr=")[1].split("&")[0];
               String stime = key.split("_")[0];
               String etime = key.split("_")[1];
               int a = (Integer.parseInt(etime.split(":")[0]) - Integer
                       .parseInt(stime.split(":")[0]))
                       * 60
                       + (Integer.parseInt(etime.split(":")[1]) - Integer
                               .parseInt(stime.split(":")[1]));
               return (new StringBuilder(String.valueOf(stime))).append("_")
                       .append(a).toString();
           } else {
               return "-_-";
           }

   }

   public static void main(String args[]) {
       Drag decoder = new Drag();
       System.out.println(decoder.evaluate("dr=28:10_28:24"));
       System.out.println(decoder.evaluate(null));
   }

}

改成以下，就好了。

package drag;

import java.io.PrintStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.hive.ql.exec.UDF;

public class Drag extends UDF {
   private static final Pattern pattern = Pattern.compile("dr=");

   public Drag() {
   }

   public String evaluate(String key) {
       try {
           if (pattern.matcher(key).find()) {
               key = key.split("dr=")[1].split("&")[0];
               String stime = key.split("_")[0];
               String etime = key.split("_")[1];
               int a = (Integer.parseInt(etime.split(":")[0]) - Integer
                       .parseInt(stime.split(":")[0]))
                       * 60
                       + (Integer.parseInt(etime.split(":")[1]) - Integer
                               .parseInt(stime.split(":")[1]));
               return (new StringBuilder(String.valueOf(stime))).append("_")
                       .append(a).toString();
           } else {
               return "-_-";
           }
       } catch (Throwable t) {
           return "-_-";
       }
   }

   public static void main(String args[]) {
       Drag decoder = new Drag();
       System.out.println(decoder.evaluate("dr=28:10_28:24"));
       System.out.println(decoder.evaluate(null));
   }

}