一个Hive UDAF 实现相邻去重

内置的两个聚合函数(UDAF)

  1. collect_list():多行字符串拼接为一行
  2. collect_set():多行字符串拼接为一行并去重

多行字符串拼接为一行并相邻去重UDAF:Concat()

concat_udaf.jar

package com.tcc.udaf;

import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;

public class Concat extends UDAF
{
  public static class ConcatUDAFEvaluator
    implements UDAFEvaluator
  {
    private PartialResult partial;

    public void init()
    {
      this.partial = null;
    }

    public boolean iterate(String value, String deli)
    {
      if (value == null) {
        return true;
      }
      if (this.partial == null) {
        this.partial = new PartialResult();
        this.partial.result = new String("");
        if ((deli == null) || (deli.equals("")))
        {
          this.partial.delimiter = new String(",");
        }
        else
        {
          this.partial.delimiter = new String(deli);
        }
      }

      if (this.partial.result.length() > 0)
      {
        this.partial.result = this.partial.result.concat(this.partial.delimiter);
      }

      this.partial.result = this.partial.result.concat(value);

      return true;
    }

    public PartialResult terminatePartial() {
      return this.partial;
    }

    public boolean merge(PartialResult other) {
      if (other == null) {
        return true;
      }
      if (this.partial == null) {
        this.partial = new PartialResult();
        this.partial.result = new String(other.result);
        this.partial.delimiter = new String(other.delimiter);
      }
      else
      {
        if (this.partial.result.length() > 0)
        {
          this.partial.result = this.partial.result.concat(this.partial.delimiter);
        }
        this.partial.result = this.partial.result.concat(other.result);
      }
      return true;
    }

    public String terminate() {
      String s = new String(this.partial.result);

      if (s.indexOf(this.partial.delimiter) != -1) {
        String[] str = s.split(this.partial.delimiter);

        StringBuffer sb = new StringBuffer();

        int i = 0; int j = 1;
        while (i < str.length - 1) {
          while (j < str.length) {
            if (str[j].equals(str[i])) {
              if (j == str.length - 1) {
                sb.append(str[i]);
                break;
              }
              j++;
            } else {
              sb.append(str[i]);
              sb.append(this.partial.delimiter);
              break;
            }
          }
          i = j;
          j = i + 1;
        }
        if ((i == str.length - 1) && (!str[i].equals(str[(i - 1)]))) {
          sb.append(str[i]);
        }
        return sb.toString();
      }
      return s;
    }

    public static class PartialResult
    {
      String result;
      String delimiter;
    }
  }
}

使用:

add jar concat_udaf.jar;
create temporary function Concat as 'com.tcc.udaf.Concat';
select a,concat(b,',') from concat_test group by a;

more:

  1. http://hugh-wangp.iteye.com/blog/1472371
  2. http://www.cnblogs.com/ggjucheng/archive/2013/02/01/2888051.html
  3. http://www.linuxidc.com/Linux/2013-04/82879.htm
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值