Mapreduce实例-分组排重(group by distinct)

需要实现以下几个类,代码太多,列了下主要代码,可根据排重数据的特征判读是否需要添加combiner来提速。

 

public class GroupComparator implements RawComparator<MyBinaryKey> {
 
 @Override
 public int compare(MyBinaryKey o1, MyBinaryKey o2) {
  return o1.toString().compareTo(o2.toString());
 }

 @Override
 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
  return WritableComparator.compareBytes(b1, s1, Long.SIZE / 8 + Integer.SIZE / 8 * 2, b2, s2,  Long.SIZE / 8 + Integer.SIZE / 8 * 2);
 }

}

public abstract class UVBinaryKey  extends BinaryComparable implements WritableComparable<BinaryComparable>{
 //根据需要添加属性,get set方法
  @Override
 public void readFields(DataInput in) throws IOException {
        this.channelid = in.readLong();
        this.scope = in.readInt();
        this.type = in.readInt();
        int attrlen = in.readInt();
        attr = new byte[attrlen];
        in.readFully(attr);
        this.vv = in.readInt();
        this.vedioends = in.readInt();
        this.playtime = in.readInt();
} 

@Override
 public byte[] getBytes() {
      ByteArrayOutputStream buf = new ByteArrayOutputStream();
      DataOutput out = new DataOutputStream(buf);
      try {
             out.writeLong(this.channelid);
             out.writeInt(this.scope);
             out.writeInt(this.type);
             out.writeInt(this.attr.length);//需要添加比较属性长度,避免比较时属性长度不等且前面相同是匹配为相同
             out.write(this.attr);
             out.writeInt(this.vv);
             out.writeInt(this.vedioends);
             out.writeInt(this.playtime);
             return buf.toByteArray();
       } catch (IOException e) {
             e.printStackTrace();
       }
       return null;
}

}

public class MyPartitioner extends Partitioner<MyBinaryKey, NullWritable> {

 /**
  * 根据uv/ip取模分区,保证相同uv/ip落在同一分区
  */
 @Override
 public int getPartition(MyBinaryKey key, NullWritable value, int numPartitions) {
  
  int k=0;
  for(byte b : key.getAttr()){
   k+=b&0xff;
  }
  return k%numPartitions;
 }

}



  job.setMapOutputKeyClass(UVBinaryKey.class);
  job.setGroupingComparatorClass(GroupComparator.class);
   job.setPartitionerClass(MyPartitioner.class);

map略
combiner(根据需要添加)
reduce中的实现:
       @Override
        protected void reduce(UVBinaryKey key, Iterable<NullWritable> values, Context context)
                throws IOException,
                InterruptedException {
            long count = 0;
            byte[] tbsign = null;
            for (NullWritable nullWritable : values) {
                byte[] attr = key.getAttr();
                if (tbsign == null) {
                    tbsign = attr;
                    count++;
                }
                if (tbsign != null) {
                    if (tbsign.length != attr.length) {
                        count++;
                        tbsign = attr;
                    } else {
                        for (int i = 0; i < tbsign.length; i++) {
                            if (tbsign[i] != attr[i]) {
                                count++;
                                tbsign = attr;
                                break;
                            }
                        }
                    }
                }

            }
            StringBuffer out = new StringBuffer();
            out.append(new String(key.getChannelId()))
                    .append(Constants.FIELDS_TERMINATED).append(count);
            context.write(new Text(out.toString()), NullWritable.get());

        }


 

 

 

 

  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值