udaf
Kurt
import org.apache.commons.math3.stat.descriptive.moment.Kurtosis;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import java.util.*;
public class Kurt extends UDAF {
private static Kurtosis kurtosis = new Kurtosis();
public static class KurtEvaluatorLong implements UDAFEvaluator {
private static List<LongWritable> state;
public KurtEvaluatorLong() {
super();
state = new ArrayList<>();
init();
}
public void init() {
state.clear();
}
public boolean iterate(LongWritable o) {
if (o != null) {
state.add(o);
}
return true;
}
public List<LongWritable> terminatePartial() {
return state.size() == 0 ? null : state;
}
public boolean merge(List<LongWritable> o) {
if (o.size() != 0) {
state.addAll(o);
}
return true;
}
public Double terminate() {
if (state.size() == 0) {
return null;
} else {
double[] lon = new double[state.size()];
for (int i = 0; i < state.size(); i++) {
lon[i] = Double.parseDouble(state.get(i).toString());
}
return kurtosis.evaluate(lon);
}
}
}
public static class KurtEvaluatorDouble implements UDAFEvaluator {
private static List<DoubleWritable> state;
public KurtEvaluatorDouble() {
super();
state = new ArrayList<>();
init();
}
public void init() {
state.clear();
}
public boolean iterate(DoubleWritable o) {
if (o != null) {
state.add(o);
}
return true;
}
public List<DoubleWritable> terminatePartial() {
return state.size() == 0 ? null : state;
}
public boolean merge(List<DoubleWritable> o) {
if (o.size() != 0) {
state.addAll(o);
}
return true;
}
public Double terminate() {
if (state.size() == 0) {
return null;
} else {
double[] lon = new double[state.size()];
for (int i = 0; i < state.size(); i++) {
lon[i] = Double.parseDouble(state.get(i).toString());
}
return kurtosis.evaluate(lon);
}
}
}
}
Mode
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import java.util.*;
public class Mode extends UDAF {
public static class ModeEvaluatorLong implements UDAFEvaluator {
Map<Long, Long> stats;
List<Long> list;
Long maxNum;
public ModeEvaluatorLong() {
super();
stats = new HashMap<>();
list = new ArrayList<>();
init();
}
public void init() {
stats.clear();
list.clear();
maxNum = new Long(0);
}
public boolean iterate(LongWritable o) {
if (o != null) {
if (stats.containsKey(o.get())) {
stats.put(o.get(), stats.get(o.get()) + 1);
} else {
stats.put(o.get(), new Long(1));
}
}
return true;
}
public Map<Long, Long> terminatePartial() {
return stats.size() == 0 ? null : stats;
}
public boolean merge(Map<Long, Long> o) {
if (o.size() != 0) {
Iterator<Long> iterable = o.keySet().iterator();
while (iterable.hasNext()) {
Long key = iterable.next();
if (this.stats.containsKey(key)) {
this.stats.put(key,o.get(key)+stats.get(key));
}else {
this.stats.put(key, o.get(key));
}
}
}
return true;
}
public List<Long> terminate() {
if (stats.size() == 0) {
return null;
} else {
Iterator<Long> iterator = stats.keySet().iterator();
while (iterator.hasNext()) {
Long key = iterator.next();
Long sv = stats.get(key);
if (sv > maxNum) {
maxNum = sv;
list.clear();
list.add(key);
} else if (sv == maxNum) {
list.add(key);
}
}
}
return list;
}
}
public static class ModeEvaluatorDouble implements UDAFEvaluator {
Map<Double, Long> stats;
List<Double> list;
Long maxNum;
public ModeEvaluatorDouble() {
super();
stats = new HashMap<>();
list = new ArrayList<>();
init();
}
public void init() {
stats.clear();
list.clear();
maxNum = new Long(0);
}
public boolean iterate(DoubleWritable o) {
if (o != null) {
if (stats.containsKey(o.get())) {
stats.put(o.get(), stats.get(o.get()) + 1);
} else {
stats.put(o.get(), new Long(1));
}
}
return true;
}
public Map<Double, Long> terminatePartial() {
return stats.size() == 0 ? null : stats;
}
public boolean merge(Map<Double, Long> o) {
if (o.size() != 0) {
Iterator<Double> iterable = o.keySet().iterator();
while (iterable.hasNext()) {
Double key = iterable.next();
if (this.stats.containsKey(key)) {
this.stats.put(key,o.get(key)+stats.get(key));
}else {
this.stats.put(key, o.get(key));
}
}
}
return true;
}
public List<Double> terminate() {
if (stats.size() == 0) {
return null;
} else {
Iterator<Double> iterator = stats.keySet().iterator();
while (iterator.hasNext()) {
Double key = iterator.next();
Long sv = stats.get(key);
if (sv > maxNum) {
maxNum = sv;
list.clear();
list.add(key);
} else if (sv == maxNum) {
list.add(key);
}
}
}
return list;
}
}
}
Skew
import org.apache.commons.math3.stat.descriptive.moment.Skewness;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import java.util.ArrayList;
import java.util.List;
public class Skew extends UDAF {
private static Skewness skewness = new Skewness();
public static class SkewEvaluatorLong implements UDAFEvaluator {
private static List<LongWritable> state;
public SkewEvaluatorLong() {
super();
state = new ArrayList<>();
init();
}
public void init() {
state.clear();
}
public boolean iterate(LongWritable o) {
if (o != null) {
state.add(o);
}
return true;
}
public List<LongWritable> terminatePartial() {
return state.size() == 0 ? null : state;
}
public boolean merge(List<LongWritable> o) {
if (o.size() != 0) {
state.addAll(o);
}
return true;
}
public Double terminate() {
if (state.size() == 0) {
return null;
} else {
double[] lon = new double[state.size()];
for (int i = 0; i < state.size(); i++) {
lon[i] = Double.parseDouble(state.get(i).toString());
}
return skewness.evaluate(lon);
}
}
}
public static class SkewEvaluatorDouble implements UDAFEvaluator {
private static List<DoubleWritable> state;
public SkewEvaluatorDouble() {
super();
state = new ArrayList<>();
init();
}
public void init() {
state.clear();
}
public boolean iterate(DoubleWritable o) {
if (o != null) {
state.add(o);
}
return true;
}
public List<DoubleWritable> terminatePartial() {
return state.size() == 0 ? null : state;
}
public boolean merge(List<DoubleWritable> o) {
if (o.size() != 0) {
state.addAll(o);
}
return true;
}
public Double terminate() {
if (state.size() == 0) {
return null;
} else {
double[] lon = new double[state.size()];
for (int i = 0; i < state.size(); i++) {
lon[i] = Double.parseDouble(state.get(i).toString());
}
return skewness.evaluate(lon);
}
}
}
}
部分依赖
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<hadoop.version>3.0.0</hadoop.version>
<hive.version>2.1.1</hive.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.25</version>
<scope>compile</scope>
</dependency>
</dependencies>