Mapreduce实例-分组排重（group by distinct）

最新推荐文章于 2024-01-22 09:40:56 发布

weixin_34329187

最新推荐文章于 2024-01-22 09:40:56 发布

阅读量150

点赞数

文章标签：大数据

 1 public class GroupComparator implements RawComparator<MyBinaryKey> {
 2  
 3  @Override
 4  public int compare(MyBinaryKey o1, MyBinaryKey o2) {
 5   return o1.toString().compareTo(o2.toString());
 6  }
 7 
 8  @Override
 9  public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
10   return WritableComparator.compareBytes(b1, s1, Long.SIZE / 8 + Integer.SIZE / 8 * 3, b2, s2,  Long.SIZE / 8 + Integer.SIZE / 8 * 3);
11  }
12 
13 }
14 
15 public abstract class UVBinaryKey  extends BinaryComparable implements WritableComparable<BinaryComparable>{
16  //根据需要添加属性；
17   @Override
18  public void readFields(DataInput in) throws IOException {
19 
20 } 
21 
22 @Override
23  public byte[] getBytes() {
24 
25 }
26 
27 }
28 
29 public class MyPartitioner extends Partitioner<MyBinaryKey, NullWritable> {
30 
31  /**
32   * 根据uv/ip取模分区，保证相同uv/ip落在同一分区
33   */
34  @Override
35  public int getPartition(MyBinaryKey key, NullWritable value, int numPartitions) {
36   
37   int k=0;
38   for(byte b : key.getAttr()){
39    k+=b&0xff;
40   }
41   return k%numPartitions;
42  }
43 
44 }
45 
46 
47 
48   job.setMapOutputKeyClass(UVBinaryKey.class);
49   job.setGroupingComparatorClass(GroupComparator.class);
50    job.setPartitionerClass(MyPartitioner.class);
51 
52   map 略

 1 combiner（根据需要添加）
 2 reduce中的实现：
 3        @Override
 4         protected void reduce(UVBinaryKey key, Iterable<NullWritable> values, Context context)
 5                 throws IOException,
 6                 InterruptedException {
 7             long count = 0;
 8             byte[] tbsign = null;
 9             for (NullWritable nullWritable : values) {
10                 byte[] attr = key.getAttr();
11                 if (tbsign == null) {
12                     tbsign = attr;
13                     count++;
14                 }
15                 if (tbsign != null) {
16                     if (tbsign.length != attr.length) {
17                         count++;
18                         tbsign = attr;
19                     } else {
20                         for (int i = 0; i < tbsign.length; i++) {
21                             if (tbsign[i] != attr[i]) {
22                                 count++;
23                                 tbsign = attr;
24                                 break;
25                             }
26                         }
27                     }
28                 }
29 
30             }
31             StringBuffer out = new StringBuffer();
32             out.append(new String(key.getCity()))
33                     .append(Constants.FIELDS_TERMINATED).append(count);
34             context.write(new Text(out.toString()), NullWritable.get());
35 
36         }

weixin_34329187

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Mapreduce实例-分组排重（group by distinct）

1 public class GroupComparator implements RawComparator&lt;MyBinaryKey&gt; { 2 3 @Override 4 public int compare(MyBinaryKey o1, MyBinaryKey o2) { 5 return o1.toString().compareTo(o2.t...
复制链接

扫一扫