案例2 -- 在IDEA上运行GetMaxTemperature程序模拟MapReduce的计算功能

最新推荐文章于 2022-04-02 21:52:07 发布

99滴神

最新推荐文章于 2022-04-02 21:52:07 发布

阅读量144

点赞数

分类专栏： MapReduce 文章标签： hadoop mapreduce 大数据

本文链接：https://blog.csdn.net/li1351026473/article/details/116138242

版权

MapReduce 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

题目要求：求出每月的最高温度

题目文件中内容：

2018010123.4
2018010133.4
2018010113.4
2018020426.8
2018050829.1
2018050713.4
2018070433.2
2018090123.4

注：具体流程，请参考workcount程序，我只展现代码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Iterator;

public class GetMaxTemperature {
    public static class GetMaxTemperatureMapper extends Mapper<LongWritable, Text, Text, DoubleWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //转换成普通的字符串
            String line = value.toString();
            //切出月份和温度
            String month = line.substring(0, 6);
            String temperature = line.substring(8);
            double t = Double.parseDouble(temperature);
            context.write(new Text(month), new DoubleWritable(Double.parseDouble(temperature)));

        }
    }

    public static class GetMaxTemperatureReduce extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {
        @Override
        protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
            //遍历values，找出最大值
            double temp = 0;
            Iterator<DoubleWritable> iterator = values.iterator();
            while (iterator.hasNext()){
                DoubleWritable doubleWritable = iterator.next();
                if (temp<doubleWritable.get()){
                    temp = doubleWritable.get();
                }
            }
            DoubleWritable doubleWritable = new DoubleWritable(temp);
            //输出月份和最大值
            context.write(key, doubleWritable);
        }
    }

    //注册驱动，封装job对象，并提交给集群（单机测试）
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //添加设置信息
        Configuration conf = new Configuration();

        //创建job,可以给job起个名字，也可以不起
        Job job = Job.getInstance(conf, "getMaxTemperature");
        //设置驱动类 --  这里参数的名字必须是主类的名
        job.setJarByClass(GetMaxTemperature.class);

        //设置mapper  --  这里的名字必须是对应的mapper类的名字
        job.setMapperClass(GetMaxTemperatureMapper.class);
        //必须是map方法的输出的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DoubleWritable.class);


        //设置reducer  --  这里的名字必须是对应reducer类的名字
        job.setReducerClass(GetMaxTemperatureReduce.class);
        //必须是reducer最终输出数据的类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);


        //设置的输入路径和输出路径
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        //提交job
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

注： 1、若是添加这两个文件，则需要开启linux中集群后才能正常运行，因为优先级core-site-default.xml<core-site.xml,并且他会在集群里跑，会输出到hdfs文件系统上，来源也是hdfs文件系统上的文件。

2、

//使用本地路径    file://input  file://output
//使用分布式系统    hdfs://master:8020/input hdfs://master:8020/output
//使用分布式系统的服务器,使用本地的输入输出数据路径时出的错:
// hdfs://master:8020/F:\ideaUProgram\mapReduce1\src\data

优化：使用bean 来存储month 和 temperature，这样做更方便直接存和取三个数据

package li;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.*;
/*
题目:要求求出每个月的最高温度
2018010123.4
2018010133.4
2018010113.4
2018020426.8
2018050829.1
2018050713.4
2018070433.2
2018090123.4
 */

public class GetMaxTemperature {
    public static class MyMapper extends Mapper<LongWritable, Text,Text, DoubleWritable>{
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //转换成普通的字符串
            String line = value.toString();
            //切出月份和温度
            String month = line.substring(0,6);
            String temp  = line.substring(8);
            context.write(new Text(month),new DoubleWritable(Double.parseDouble(temp)));
        }
    }

    public static class MyReducer extends Reducer<Text,DoubleWritable,Text,DoubleWritable>{
        //做的时初始化的工作,在reducer开始工作之前执行一次
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            super.setup(context);
        }

        //让list存储所有的月份和最高温度
        ArrayList<Bean> list = new ArrayList();

        @Override
        protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {

            //遍历values,找出最大值
            double temp = 0;
            for (DoubleWritable value : values) {
                if (temp < value.get()){
                    temp = value.get();
                }
            }
            Bean bean = new Bean(key.toString(),temp);
            list.add(bean);
            System.out.println("reduce后:"+list);
        }
        //收尾方法
        //在reduce方法之后执行
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            //排序,求top3,输出
            list.sort(new Comparator<Bean>() {
                @Override
                public int compare(Bean o1, Bean o2) {
                    return o1.compareTo(o2);                 //Bean利用了WritableComparable接口进行了序列化 需要实现compareTo方法 故此直接调用就可以
                }
            });

            //遍历输出前三个数据
            for (int i = 0; i < 3; i++) {
                Text text = new Text(list.get(i).getMonth());
                DoubleWritable d = new DoubleWritable(list.get(i).getTemp());
                context.write(text,d);
            }

        }
    }
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //添加设置信息
        Configuration configuration = new Configuration();

        //设置访问服务器时的用户名字
        //System.setProperty("HADOOP_USER_NAME", "root");
      
        configuration.set("fs.defaultFS","file:///");

        //创建job
        Job job = Job.getInstance(configuration,"maxtemprautre");

        //设置主类
        job.setJarByClass(GetMaxTemperature.class);

        //设置mapper
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DoubleWritable.class);

        //设置reducer
        job.setReducerClass(MyReducer.class);
        //设置输出数据的类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        //设置输入输出
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        //设置提交
        System.exit(job.waitForCompletion(true)?0:1);
    }


}

Bean.java

package li;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
//让Bean实现序列化
//同时要保证Bean里面的属性都要支持序列化和反序列化
public class Bean implements WritableComparable<Bean> {
    private String month;
    private double temp;
    public Bean(){}
    public Bean(String month, double temp) {
        this.month = month;
        this.temp = temp;
    }

    @Override
    public String toString() {
        return "Bean{" +
                "month='" + month + '\'' +
                ", temp=" + temp +
                '}';
    }

    public String getMonth() {
        return month;
    }

    public void setMonth(String month) {
        this.month = month;
    }

    public double getTemp() {
        return temp;
    }

    public void setTemp(double temp) {
        this.temp = temp;
    }

    //序列化
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(month);
        dataOutput.writeDouble(temp);
    }

    //逆序列化
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.month = dataInput.readUTF();
        this.temp = dataInput.readDouble();
    }

    @Override
    public int compareTo(Bean o) {
        return (int)Math.ceil( o.getTemp()-this.getTemp());
    }
}