MapReduce知识点(2)

计数器

在这里插入图片描述

  1. 内置计数器

在这里插入图片描述

  1. 定义计数器

    1. 使用上下文对象context的getCounter方法,两个参数,计数器类型和名字
    2. 枚举创建

排序和序列化

在这里插入图片描述

原数据:

在这里插入图片描述

需求:

  • 第一列按字典顺序进行排列
  • 第一列相同时,第二列按升序进行排序
  1. 定义sortbean类实现WritableComparable接口,按照需求定义成员变量,重写compareTo方法和write、readFields方法

    package com.bigdata.mapreduce.sort;
    
    import org.apache.hadoop.io.WritableComparable;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    public class sortbean implements WritableComparable<sortbean> {
    
        private String word;
        private int num;
    
        //实现比较器,指定比较规则
        /*
            排序规则:先排第一个,按字典顺序
            第一个相同,第二列按升序排
         */
        @Override
        public int compareTo(sortbean o) {
            //将成员和传进来的进行比较,对第一列排序
            //compareTo 字符串自带方法
            int result = this.word.compareTo(o.word);
            //如果第一列相同则按照第二列进行排序
            //升序
            //正数则说明this.num比o.num大将this.num放在后面
            //负数则说明this.num比o.num小将this.num放在前面
            if(result == 0){
                return this.num - o.num;
            }
            return result;
        }
    
        //实现序列化
        @Override
        public void write(DataOutput dataOutput) throws IOException {
            //write后看成员是上面类型
            //固定步骤
            dataOutput.writeUTF(word);
            dataOutput.writeInt(num);
        }
    
        //反序列化
        @Override
        public void readFields(DataInput dat0aInput) throws IOException {
            //固定步骤
            this.word = dat0aInput.readUTF();
            this.num = dat0aInput.readInt();
        }
    
        public String getWord() {
            return word;
        }
    
        public void setWord(String word) {
            this.word = word;
        }
    
        public int getNum() {
            return num;
        }
    
        public void setNum(int num) {
            this.num = num;
        }
    
        @Override
        public String toString() {
            return word + '\t' + num ;
        }
    }
    
    
  2. mapper类

    package com.bigdata.mapreduce.sort;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class s_mapper extends
            Mapper<LongWritable, Text,sortbean, NullWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //将文本数据V1拆分,并封装到sortbean对象
            String[] split = value.toString().split("\t");
            sortbean sb = new sortbean();
            sb.setWord(split[0]);
            sb.setNum(Integer.parseInt(split[1]));
            //将K2,V2写入上下问对象
            context.write(sb,NullWritable.get());
        }
    }
    
    
  3. Reducer类

    package com.bigdata.mapreduce.sort;
    
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class s_reducer extends
            Reducer<sortbean, NullWritable,sortbean,NullWritable> {
        @Override
        protected void reduce(sortbean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            context.write(key,NullWritable.get());
        }
    }
    
    
  4. 主类

    package com.bigdata.mapreduce.sort;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    public class s_job extends Configured implements Tool {
        @Override
        public int run(String[] strings) throws Exception {
            //创建job对象
            Job job = Job.getInstance(super.getConf(),"flow");
    
            //配置job对象
            //1. 配置输入类和输入路径
            job.setInputFormatClass(TextInputFormat.class);
            TextInputFormat.addInputPath(job,new Path("file:///D:\\mapreduce_demo\\input"));
    
            //2. 设置mapper类和数据类型(K2,V2)
            job.setMapperClass(s_mapper.class);
            job.setMapOutputKeyClass(sortbean.class);
            job.setMapOutputValueClass(NullWritable.class);
            //3. 分区
            //4.排序
                //当定义了sortbean类后在类中制定了排序规则,无需做其他操作
            //5.规约 6.分组
    
            //7. 设置reducer类和数据类型(K3,V3)
            job.setReducerClass(s_reducer.class);
            job.setOutputKeyClass(sortbean.class);
            job.setOutputValueClass(NullWritable.class);
            //8.设置输出类型和路径
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job,new Path("file:///D:\\mapreduce_demo\\sort_out"));
    
            //等待任务结束
            boolean flag = job.waitForCompletion(true);
    
            return flag ? 0 : 1;
        }
    
        public static void main(String[] args) throws Exception {
            Configuration entries = new Configuration();
            int run = ToolRunner.run(entries, new s_job(), args);
            System.exit(run);
        }
    }
    
    

规约

在这里插入图片描述

在这里插入图片描述

综合案例

在这里插入图片描述

  1. flowbean

    package com.bigdata.mapreduce.example1;
    
    import org.apache.hadoop.io.Writable;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    public class flowbean implements Writable {
        private Integer upFlow;
        private Integer downFlow;
        private Integer upCountFlow;
        private Integer downCountFlow;
    
    
        @Override
        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeInt(upFlow);
            dataOutput.writeInt(downFlow);
            dataOutput.writeInt(upCountFlow);
            dataOutput.writeInt(downCountFlow);
        }
    
        @Override
        public void readFields(DataInput dataInput) throws IOException {
            this.upFlow = dataInput.readInt();
            this.downFlow = dataInput.readInt();
            this.upCountFlow = dataInput.readInt();
            this.downCountFlow = dataInput.readInt();
        }
    
        public Integer getUpFlow() {
            return upFlow;
        }
    
        public void setUpFlow(Integer upFlow) {
            this.upFlow = upFlow;
        }
    
        public Integer getDownFlow() {
            return downFlow;
        }
    
        public void setDownFlow(Integer downFlow) {
            this.downFlow = downFlow;
        }
    
        public Integer getUpCountFlow() {
            return upCountFlow;
        }
    
        public void setUpCountFlow(Integer upCountFlow) {
            this.upCountFlow = upCountFlow;
        }
    
        public Integer getDownCountFlow() {
            return downCountFlow;
        }
    
        public void setDownCountFlow(Integer downCountFlow) {
            this.downCountFlow = downCountFlow;
        }
    
        @Override
        public String toString() {
            return  upFlow +
                    "\t" + downFlow +
                    "\t" + upCountFlow +
                    "\t" + downCountFlow
                    ;
        }
    }
    
    
  2. mapper

    package com.bigdata.mapreduce.example1;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class FlowCountMapper extends
            Mapper<LongWritable, Text,Text,flowbean> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            flowbean fb = new flowbean();
            Text text = new Text(split[1]);
            fb.setUpFlow(Integer.parseInt(split[6]));
            fb.setDownFlow(Integer.parseInt(split[7]));
            fb.setUpCountFlow(Integer.parseInt(split[8]));
            fb.setDownCountFlow(Integer.parseInt(split[9]));
            context.write(text,fb);
    
        }
    }
    
    
  3. reducer

    package com.bigdata.mapreduce.example1;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class FlowCountReducer extends
            Reducer<Text, flowbean, Text, flowbean> {
        @Override
        protected void reduce(Text key, Iterable<flowbean> values, Context context) throws IOException, InterruptedException {
            flowbean fb = new flowbean();
            Integer upFlow = 0;
            Integer downFlow = 0;
            Integer upCountFlow = 0;
            Integer downCountFlow = 0;
            for (flowbean f : values) {
                upFlow += f.getUpFlow();
                downFlow += f.getDownFlow();
                upCountFlow += f.getUpCountFlow();
                downCountFlow += f.getDownCountFlow();
            }
            fb.setUpFlow(upFlow);
            fb.setDownFlow(downFlow);
            fb.setUpCountFlow(upCountFlow);
            fb.setDownCountFlow(downCountFlow);
            context.write(key,fb);
        }
    }
    
    
  4. jobmain

    package com.bigdata.mapreduce.example1;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    public class FlowJobMain extends Configured implements Tool {
        @Override
        public int run(String[] strings) throws Exception {
            //创建job任务对象
            Job job = Job.getInstance(super.getConf(),"example1");
            //设置job配置
            //设置job输入类型和路径
            job.setInputFormatClass(TextInputFormat.class);
            TextInputFormat.addInputPath(job,new Path(""));
            //设置mapper类和数据类型(K2,V2)
            job.setMapperClass(FlowCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(flowbean.class);
            //分区、排序、规约、分组
            //设置reducer类和数据类型(K3,V3)
            job.setReducerClass(FlowCountReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(flowbean.class);
            //设置输入类型和路径
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job,new Path(""));
            //等待任务结束
            boolean flag = job.waitForCompletion(true);
            return flag?0:1;
        }
    
        public static void main(String[] args) throws Exception {
            Configuration entries = new Configuration();
            int run = ToolRunner.run(entries, new FlowJobMain(), args);
            System.exit(run);
        }
    }
    
    

在这里插入图片描述

  1. FlowBean

    package com.bigdata.mapreduce.example2;
    
    import org.apache.hadoop.io.WritableComparable;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    public class FlowBean implements WritableComparable<FlowBean> {
        private Integer upFlow;
        private Integer downFlow;
        private Integer upCountFlow;
        private Integer downCountFlow;
    
        @Override
        public int compareTo(FlowBean o) {
            return o.upFlow - this.upFlow;
        }
    
        @Override
        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeInt(upFlow);
            dataOutput.writeInt(downFlow);
            dataOutput.writeInt(upCountFlow);
            dataOutput.writeInt(downCountFlow);
        }
    
        @Override
        public void readFields(DataInput dataInput) throws IOException {
            this.upFlow = dataInput.readInt();
            this.downFlow = dataInput.readInt();
            this.upCountFlow = dataInput.readInt();
            this.downCountFlow = dataInput.readInt();
        }
    
        public Integer getUpFlow() {
            return upFlow;
        }
    
        public void setUpFlow(Integer upFlow) {
            this.upFlow = upFlow;
        }
    
        public Integer getDownFlow() {
            return downFlow;
        }
    
        public void setDownFlow(Integer downFlow) {
            this.downFlow = downFlow;
        }
    
        public Integer getUpCountFlow() {
            return upCountFlow;
        }
    
        public void setUpCountFlow(Integer upCountFlow) {
            this.upCountFlow = upCountFlow;
        }
    
        public Integer getDownCountFlow() {
            return downCountFlow;
        }
    
        public void setDownCountFlow(Integer downCountFlow) {
            this.downCountFlow = downCountFlow;
        }
    
        @Override
        public String toString() {
            return  upFlow +
                    "\t" + downFlow +
                    "\t" + upCountFlow +
                    "\t" + downCountFlow
                    ;
        }
    }
    
    
  2. mapper

    package com.bigdata.mapreduce.example2;
    
    import com.bigdata.mapreduce.example1.flowbean;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class smapper extends
            Mapper<LongWritable, Text,FlowBean,Text> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            FlowBean fb = new FlowBean();
            Text text = new Text(split[0]);
            fb.setUpFlow(Integer.parseInt(split[1]));
            fb.setDownFlow(Integer.parseInt(split[2]));
            fb.setUpCountFlow(Integer.parseInt(split[3]));
            fb.setDownCountFlow(Integer.parseInt(split[4]));
            context.write(fb,text);
        }
    }
    
    
  3. reducer

    package com.bigdata.mapreduce.example2;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class sreducer extends
            Reducer<FlowBean, Text,Text,FlowBean> {
        @Override
        protected void reduce(FlowBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            for (Text value : values) {
                context.write(value,key);
            }
        }
    }
    
    
  4. jobmain

    package com.bigdata.mapreduce.example2;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    public class sjobmain extends Configured implements Tool {
        @Override
        public int run(String[] strings) throws Exception {
            Job job = Job.getInstance(super.getConf(),"sortflow");
            job.setInputFormatClass(TextInputFormat.class);
            TextInputFormat.addInputPath(job,new Path(""));
            job.setMapperClass(smapper.class);
            job.setMapOutputKeyClass(FlowBean.class);
            job.setMapOutputValueClass(Text.class);
            //3 4 5 6
            job.setReducerClass(sreducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(FlowBean.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job,new Path(""));
            boolean flag = job.waitForCompletion(true);
            return flag?0:1;
        }
    
        public static void main(String[] args) throws Exception {
            Configuration entries = new Configuration();
            int run = ToolRunner.run(entries, new sjobmain(), args);
            System.exit(run);
        }
    }
    
    

在这里插入图片描述

在需求1的基础上添加分区类对主类进行修改即可

  1. partitioner

    package com.bigdata.mapreduce.example3;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Partitioner;
    
    public class flowPartitioner extends
            Partitioner<Text,flowbean> {
        @Override
        public int getPartition(Text text, flowbean flowbean, int i) {
            if(text.toString().startsWith("135")){
                return 0;
            }else if (text.toString().startsWith("136")){
                return 1;
            }else if (text.toString().startsWith("137")){
                return 2;
            }else{
                return 3;
            }
        }
    }
    
    
  2. jobmain

    package com.bigdata.mapreduce.example3;
    
    import com.bigdata.mapreduce.example1.FlowCountMapper;
    import com.bigdata.mapreduce.example1.FlowCountReducer;
    import com.bigdata.mapreduce.example1.flowbean;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    public class FlowJobMain extends Configured implements Tool {
        @Override
        public int run(String[] strings) throws Exception {
            //创建job任务对象
            Job job = Job.getInstance(super.getConf(),"flow");
            //设置job配置
            //设置job输入类型和路径
            job.setInputFormatClass(TextInputFormat.class);
            TextInputFormat.addInputPath(job,new Path(""));
            //设置mapper类和数据类型(K2,V2)
            job.setMapperClass(FlowCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(flowbean.class);
            //分区、排序、规约、分组
           //设置分区类
            job.setPartitionerClass(flowPartitioner.class);
            //设置分区数
            job.setNumReduceTasks(4);
    
            //设置reducer类和数据类型(K3,V3)
            job.setReducerClass(FlowCountReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(flowbean.class);
            //设置输入类型和路径
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job,new Path(""));
            //等待任务结束
            boolean flag = job.waitForCompletion(true);
            return flag?0:1;
        }
    
        public static void main(String[] args) throws Exception {
            Configuration entries = new Configuration();
            int run = ToolRunner.run(entries, new FlowJobMain(), args);
            System.exit(run);
        }
    }
    
    

MapReduce知识点(1)
MapReduce知识点(2)
MapReduce知识点(3)
MapReduce知识点(4)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值