环境:阿里云MaxCompute
阿里云MaxCompute的UDAF函数和hive的UDAF函数还是有所差别的。
需求:将拼接好的字段再次拼接,并将拼接好的结果去重,比如:北京,上海,天津;江西,湖北,湖南,北京;这两个字符串重新拼接后的结果应该为:北京,上海,天津,江西,湖北,湖南
这里我是用到了阿里云的MaxCompute Studio插件,关于该插件的下载和使用以及函数的发布大家可以参考官网https://helpcdn.aliyun.com/document_detail/27811.html?spm=a2c4g.11186623.6.601.64131f854SbjQa
import com.aliyun.odps.io.Text;
import com.aliyun.odps.io.Writable;
import com.aliyun.odps.udf.ExecutionContext;
import com.aliyun.odps.udf.UDFException;
import com.aliyun.odps.udf.Aggregator;
import com.aliyun.odps.udf.annotation.Resolve;
import java.util.*;
// TODO define input and output types, e.g. "double->double".
/**
*
* @author Liam
* @description 将hscode的多个字符串重新聚合去重拼接
* @date 2021-02-26 17:05:08
**/
@Resolve({"string->string"})
public class HscodeStrMerge extends Aggregator {
private Text result = new Text();
@Override
public void setup(ExecutionContext ctx) throws UDFException {
}
/**
* 创建聚合Buffer
*
* @return Writable聚合buffer
*/
@Override
public Writable newBuffer() {
return new Text();
}
/**
* @param buffer 聚合buffer,区内合并
* @param args SQL中调用UDAF时指定的参数,不能为null,但是args里面的元素可以为null,代表对应的输入数据是null
* @throws UDFException
*/
@Override
public void iterate(Writable buffer, Writable[] args) throws UDFException {
// 区内合并
Text buf = (Text) buffer;
for (Writable arg : args) {
if (!Objects.isNull(arg) && !"".equals(arg.toString())){
buf.set(arg + "," + buf);
}
}
}
/**
* @param buffer 聚合buffer,区间合并
* @param partial 分片聚合结果
* @throws UDFException
*/
@Override
public void merge(Writable buffer, Writable partial) throws UDFException {
// 区间合并
Text buf = (Text) buffer;
Text par = (Text) partial;
if (!Objects.isNull(partial) && !"".equals(par.toString())){
buf.set(par + "," + buf);
}
}
/**
* 生成最终结果
*
* @param buffer
* @return Object UDAF的最终结果
* @throws UDFException
*/
@Override
public Writable terminate(Writable buffer) throws UDFException {
HashSet<String> set = new HashSet<>();
StringBuilder strResult = new StringBuilder();
// 生成最终结果
Text buf = (Text) buffer;
String[] split = buf.toString().split(",");
set.addAll(Arrays.asList(split));
for (String s : set) {
strResult.append(s).append(",");
}
if (strResult.length()!=0){
result.set(strResult.substring(0,strResult.length()-1));
return result;
}else {
return null;
}
}
@Override
public void close() throws UDFException {
}
}
更具体的实现可以参考:https://blog.csdn.net/beautiful_huang/article/details/107026903