UDAF
UDAF对应一行输入forward多行输出
eg:
//TODO define input and output types,e.g.,"string,string->string,bigint".
@Resolve({"string,bigint->string,double"})
public class byeFellows extends UDTF
{
@Override
public void process(Object[] args) throws UDFException
{
String a=(String)args[0];
Long b=(Long)args[1];
int cnt=1;
char[] c=a.toCharArray();
//拆分空格分隔的多值列
for(int i=0;i<a.length(),++i)
{
if(c[i]==' ')
{
cnt=cnt+1;
}
}
if(cnt==0)cnt=1;
//如果是多个空格隔开的多值列
for(String t:a.split("\\s+"))
{
forward(t,(double)b/cnt);
}
}
}
UDTF
UDTF做聚合操作输出一条统计信息
eg:计算变异系数
//实现writable的接口,实现读写方法
private static class AvgBuffer implements Writable
{
private double sum=0;
private long count=0;
private double sum2=0;
@Override
{
out.writableDouble(sum);
out.writableDouble(sum2);
out.writableLong(count);
}
@Override
public void readFields(DataInput in)throws IOException
{
sum=in.readDouble();
sum2=in.readDouble();
count=in.readLong();
}
@Override
public Writable newBuffer()
{
return new AvgBuffer();
}
}
@Override
//游标遍历每个小分片上的统计量信息
public void iterate(Writable buffer,Writable[] args)throws UDFException
{
DoubleWritable arg=(DoubleWritable)args[0];
AvgBuffer buf=(AvgBuffer)buffer;
if(arg!=null)
{
buf.count+=1;
buf.sum+=arg.get();
buf.sum2+=Math.pow(arg.get(),2);
}
}
@Override
//所有分片结果合并
public void merge(Writable buffer,Writable partial)throws UDFException
{
AvgBuffer buf=(AvgBuffer)buffer;
AvgBuffer p=(AvgBuffer)partial;
buf.sum+=p.sum;
buf.sum2+=p.sum2;
buf.count+=p.count;
}
private DoubleWritable ret=new DoubleWritable();
@Override
//merge后进行业务逻辑处理
public Writable terminate(Writable buffer)throws UDFException
{
AvgBuffer buf=(AvgBuffer)buffer;
if(buf.count==0)
{
ret.set(0);
}else if (buf.count==1)
{
ret.set(buf.sum);
}else
{
ret.set(Math.sqrt((buf.sum2-buf.sum*(buf.sum/buf.count))/(buf.count-1))/(buf.sum/buf.count));
}
return ret;
}