hive GROUP BY 不同的条件数据不一样,查了一天,原来自定义函数出了问题。如果自定义函数出错,任务也会成功。但数据会丢失一些。
原UDF
public class Drag extends UDF {
private static final Pattern pattern = Pattern.compile("dr=");public Drag() {
}
public String evaluate(String key) {
if (pattern.matcher(key).find()) {
key = key.split("dr=")[1].split("&")[0];
String stime = key.split("_")[0];
String etime = key.split("_")[1];
int a = (Integer.parseInt(etime.split(":")[0]) - Integer
.parseInt(stime.split(":")[0]))
* 60
+ (Integer.parseInt(etime.split(":")[1]) - Integer
.parseInt(stime.split(":")[1]));
return (new StringBuilder(String.valueOf(stime))).append("_")
.append(a).toString();
} else {
return "-_-";
}
}
public static void main(String args[]) {
Drag decoder = new Drag();
System.out.println(decoder.evaluate("dr=28:10_28:24"));
System.out.println(decoder.evaluate(null));
}
}
改成以下,就好了。
package drag;
import java.io.PrintStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.hive.ql.exec.UDF;
public class Drag extends UDF {
private static final Pattern pattern = Pattern.compile("dr=");
public Drag() {
}
public String evaluate(String key) {
try {
if (pattern.matcher(key).find()) {
key = key.split("dr=")[1].split("&")[0];
String stime = key.split("_")[0];
String etime = key.split("_")[1];
int a = (Integer.parseInt(etime.split(":")[0]) - Integer
.parseInt(stime.split(":")[0]))
* 60
+ (Integer.parseInt(etime.split(":")[1]) - Integer
.parseInt(stime.split(":")[1]));
return (new StringBuilder(String.valueOf(stime))).append("_")
.append(a).toString();
} else {
return "-_-";
}
} catch (Throwable t) {
return "-_-";
}
}
public static void main(String args[]) {
Drag decoder = new Drag();
System.out.println(decoder.evaluate("dr=28:10_28:24"));
System.out.println(decoder.evaluate(null));
}
}