Hadoop笔记之自定义分组实现

最新推荐文章于 2019-11-09 12:31:23 发布

IT菜籽U

最新推荐文章于 2019-11-09 12:31:23 发布

阅读量470

点赞数

分类专栏： MR

本文链接：https://blog.csdn.net/xiaoshunzi111/article/details/48526503

版权

MR 专栏收录该内容

79 篇文章 2 订阅

订阅专栏

自定义分组实现：

一

static class MyGroupComparator implements RawComparator<NewK2>{

       @Override
       public int compare(NewK2 o1, NewK2 o2) {

           return (int)(o1.first-o2.first);
       }
       /**
       * @param arg0
       *            表示第一个参与比较的字节数组
       * @param arg1
       *            表示第一个参与比较的字节数组的起始位置
       * @param arg2
       *            表示第一个参与比较的字节数组的偏移量
       *
       * @param arg3
       *            表示第二个参与比较的字节数组
       * @param arg4
       *            表示第二个参与比较的字节数组的起始位置
       * @param arg5
       *            表示第二个参与比较的字节数组的偏移量

//在reduce中被隐式调用

       @Override
       public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

return WritableComparator.compareBytes(b1, s1, 8, b2, s2, 8);
       }

}

二实现WritableComparable类

static class NewKey2 implements WritableComparable<NewKey2>{
       Long first;
       Long second;

}

三在Reduce中：
            long min=Long.MAX_VALUE; /Long中的最大值
           for (LongWritable v2 : v2s) {
               if(v2.get()<min){
                   min=v2.get();
               }
           }

context.write(new LongWritable(k2.first), new LongWritable(min));

四在Main方法中：

job.setGroupingComparatorClass(MyGroupComparator.class);

/* job.setPartitionerClass(HashPartitioner.class);

job.setNumReduceTasks(1);//系统默认为1*/

//******************************************************************************************************************//

|=具体实现代码=|

//*****************************************************************************************************************//

package day0917;

import java.io.*;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

/**********************自定义分组实现(   老师完成de)*********************/
public class DefaultPartition {
//   static final String INPUT_PATH="hdfs://master:9000/input2";
//   static final String OUT_PATH="hdfs://master:9000/out";

   public static void main(String[] args) throws Exception{
       Configuration conf=new Configuration(); //创建配置对象
       Job job=new Job(conf,DefaultPartition.class.getSimpleName()); //创建作业对象

       FileSystem fileSystem = FileSystem.get(new URI(args[0]), conf);
       if(fileSystem.exists(new Path(args[1]))){
           fileSystem.delete(new Path(args[1]), true);
       }
       // 1.1 指定输入文件路径
       FileInputFormat.setInputPaths(job, new Path(args[0]));
       // 指定哪个类用来格式化输入文件
       //job.setInputFormatClass(TextInputFormat.class);
       job.setInputFormatClass(TextInputFormat.class);
       // 1.2指定自定义的Mapper类
       job.setMapperClass(MyMap.class);
       // 指定输出<k2,v2>的类型
       job.setMapOutputKeyClass(NewKey2.class);
       job.setMapOutputValueClass(LongWritable.class);

       job.setPartitionerClass(HashPartitioner.class);
       job.setNumReduceTasks(1);
       job.setGroupingComparatorClass(MyGroupComparator.class);///

       job.setReducerClass(MyReduce.class);
       job.setOutputKeyClass(LongWritable.class);
       job.setOutputValueClass(LongWritable.class);
       FileOutputFormat.setOutputPath(job, new Path(args[1]));
       job.setOutputFormatClass(TextOutputFormat.class);

       // 把代码提交给JobTracker执行
       job.waitForCompletion(true   );
   }
   //v2的类型是Text
   static int count=0;
   static class MyMap extends Mapper<LongWritable, Text, NewKey2, LongWritable>{
       protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,NewKey2,LongWritable>.Context context) throws java.io.IOException ,InterruptedException {
           System.out.println("输入的k1："+key+" ，输入的v1: "+value);//map函数从hdfs中读取数据，一行一行读取，key为索引，value为每次行的内容

           String [] valueStrings=value.toString().split("\t");
           NewKey2 k2=new NewKey2(Long.parseLong(valueStrings[0]),Long.parseLong(valueStrings[1]));
           LongWritable v2=new LongWritable(Long.parseLong(valueStrings[1]));
           context.write(k2, v2);
           System.out.println("输出的k2："+k2.first+" ，输出的v2:"+v2+"\n");
       };
   }
   static class MyReduce extends Reducer<NewKey2, LongWritable, LongWritable, LongWritable>{
       protected void reduce(NewKey2 k2, java.lang.Iterable<LongWritable> v2s, org.apache.hadoop.mapreduce.Reducer<NewKey2,LongWritable,LongWritable,LongWritable>.Context context) throws java.io.IOException ,InterruptedException {
           long max=Long.MIN_VALUE; /Long中的最小值
           for (LongWritable v2 : v2s) {
               if(v2.get()>max){
                   max=v2.get();
               }
           }
           context.write(new LongWritable(k2.first), new LongWritable(max));
       };
   }
   static class NewKey2 implements WritableComparable<NewKey2>{
       Long first;
       Long second;

       public NewKey2(){}
       public NewKey2(Long first,Long second){
           this.first=first;
           this.second=second;
       }
       @Override
       public void write(DataOutput out) throws IOException {
           out.writeLong(first);
           out.writeLong(second);
       }

       @Override
       public void readFields(DataInput in) throws IOException {
           this.first=in.readLong();
           this.second=in.readLong();
           System.out.println("这是readFields()方法！first:"+this.first+"，second"+this.second);
       }

       @Override
       public int compareTo(NewKey2 o) {
           long minus = this.first-o.first;
           if(minus!=0){
               System.out.println("这是minus的值："+minus);
               return (int) minus;
           }
           System.out.println("这是this.second-o.second的值："+(this.second-o.second));
           return (int)(this.second-o.second);
       }
   }
   static class MyGroupComparator implements RawComparator<NewKey2>{

       @Override
       public int compare(NewKey2 o1, NewKey2 o2) {
           System.out.println("MyGroupComparator类中的compare(NewKey2 o1, NewKey2 o2)方法");
           return (int)(o1.first-o2.first);
       }
       /**
       * @param arg0
       *            表示第一个参与比较的字节数组
       * @param arg1
       *            表示第一个参与比较的字节数组的起始位置
       * @param arg2
       *            表示第一个参与比较的字节数组的偏移量
       *
       * @param arg3
       *            表示第二个参与比较的字节数组
       * @param arg4
       *            表示第二个参与比较的字节数组的起始位置
       * @param arg5
       *            表示第二个参与比较的字节数组的偏移量
       */
       @Override
       public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
           System.out.println("MyGroupComparator类中的compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)方法");
           System.out.println(b1.length+"\t"+s1+"\t"+b2.length+"\t"+s2);
           return WritableComparator.compareBytes(b1, s1, 8, b2, s2, 8);
       }

   }
}
源数据:

----------

11   11
33   23
22   12
33   33
11   12
22   22
33   11
22   22
33   12

分析:

------

11 11
11 12

22 12
22 22
22 22

33 11
33 12
33 23
33 33

输出结果:
-------------
11 12
22 22
33 33

IT菜籽U

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Hadoop笔记之自定义分组实现

自定义分组实现：static class MyGroupComparator implements RawComparator{ @Override public int compare(NewK2 o1, NewK2 o2) { return (int)(o1.first-o2.first);
复制链接

扫一扫