mapreduce实例,计算最高气温

转自:http://my.oschina.net/itblog/blog/275294

功能简介:计算每年的最高气温,数据源中前八位是日期,后两位是温度

 

数据源:

[zhoulx]$ cat input.txt

2014010114

2014010216

2014010317

2014010410

2014010506

2012010609

2012010732

2012010812

2012010919

2012011023

2001010116

2001010212

2001010310

2001010411

2001010529

2013010619

2013010722

2013010812

2013010929

2013011023

2008010105

2008010216

2008010337

2008010414

2008010516

2007010619

2007010712

2007010812

2007010999

2007011023

2010010114

2010010216

2010010317

2010010410

2010010506

2015010649

2015010722

2015010812

2015010999

2015011023

 

 

 

[zhoulx]$ hadoop dfs -ls  hdfs://hadoop-namenode/tmp/zhoulx/output

DEPRECATED: Use of this script to execute hdfs command is deprecated.

Instead use the hdfs command for it.

 

^[[A-rw-r--r--   3 sdp_dp hadoop          0 2016-01-19 14:59 hdfs://hadoop-namenode/tmp/zhoulx/output/_SUCCESS

-rw-r--r--   3 sdp_dp hadoop         64 2016-01-19 14:59 hdfs://hadoop-namenode/tmp/zhoulx/output/part-r-00000

 

计算结果:

[zhoulx]$ hadoop dfs -cat  hdfs://hadoop-namenode/tmp/zhoulx/output/part-r-00000

DEPRECATED: Use of this script to execute hdfs command is deprecated.

Instead use the hdfs command for it.

 

2001    29

2007    99

2008    37

2010    17

2012    32

2013    29

2014    17

2015    99

 

 

 

 

 

package test;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 

public class Temperature {

    /**

     * 四个泛型类型分别代表:

     * KeyIn        Mapper的输入数据的Key,这里是每行文字的起始位置(0,11,...

     * ValueIn      Mapper的输入数据的Value,这里是每行文字

 

     * KeyOut       Mapper的输出数据的Key,这里是每行文字中的“年份”

     * ValueOut     Mapper的输出数据的Value,这里是每行文字中的“气温”

     */

    static class TempMapper extends

 

            Mapper<LongWritable, Text, Text, IntWritable> {

        @Override

 

        public void map(LongWritable key, Text value, Context context)

                throws IOException, InterruptedException {

            // 打印样本: Before Mapper: 0, 2000010115

 

            System.out.print("Before Mapper: " + key + ", " + value);

            String line = value.toString();

            String year = line.substring(0, 4);

            int temperature = Integer.parseInt(line.substring(8));

            context.write(new Text(year), new IntWritable(temperature));

            // 打印样本: After Mapper:2000, 15

 

            System.out.println(

                    "======" +

                    "After Mapper:" + new Text(year) + ", " + new IntWritable(temperature));

        }

    }

 

    /**

     * 四个泛型类型分别代表:

     * KeyIn        Reducer的输入数据的Key,这里是每行文字中的“年份”

     * ValueIn      Reducer的输入数据的Value,这里是每行文字中的“气温”

     * KeyOut       Reducer的输出数据的Key,这里是不重复的“年份”

     * ValueOut     Reducer的输出数据的Value,这里是这一年中的“最高气温”

     */

    static class TempReducer extends

 

            Reducer<Text, IntWritable, Text, IntWritable> {

        @Override

 

        public void reduce(Text key, Iterable<IntWritable> values,

                Context context) throws IOException, InterruptedException {

            int maxValue = Integer.MIN_VALUE;

            StringBuffer sb = new StringBuffer();

            //values的最大值

 

            for (IntWritable value : values) {

                maxValue = Math.max(maxValue, value.get());

                sb.append(value).append(", ");

            }

            // 打印样本: Before Reduce: 2000, 15, 23, 99, 12, 22,

            System.out.print("Before Reduce: " + key + ", " + sb.toString());

            context.write(key, new IntWritable(maxValue));

            // 打印样本: After Reduce: 2000, 99

 

            System.out.println(

                    "======" +

                    "After Reduce: " + key + ", " + maxValue);

        }

    }

 

    public static void main(String[] args) throws Exception {

        //输入路径

        String dst = "/tmp/zhoulx/input.txt";

        //输出路径,必须是不存在的,空文件加也不行。

        String dstOut = "/tmp/zhoulx/output";

        Configuration hadoopConfig = new Configuration();

        

        hadoopConfig.set("fs.hdfs.impl",

            org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()

        );

        hadoopConfig.set("fs.file.impl",

            org.apache.hadoop.fs.LocalFileSystem.class.getName()

        );

        Job job = new Job(hadoopConfig);

        

        //如果需要打成jar运行,需要下面这句

 

        job.setJarByClass(Temperature.class);

 

        //job执行作业时输入和输出文件的路径

 

        FileInputFormat.addInputPath(job, new Path(dst));

        FileOutputFormat.setOutputPath(job, new Path(dstOut));

 

        //指定自定义的MapperReducer作为两个阶段的任务处理类

 

        job.setMapperClass(TempMapper.class);

        job.setReducerClass(TempReducer.class);

        

        //设置最后输出结果的KeyValue的类型

 

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);

         

        //执行job,直到完成

 

        job.waitForCompletion(true);

        System.out.println("Finished");

    }

}

 

 

 

        //如果需要打成jar运行,需要下面这句

        job.setJarByClass(Temperature.class);

 

 

打包需要打成Runable JAR file

 

需要依赖这四个包:

commons-configuration-1.6.jar

commons-lang-2.5.jar

commons-logging-1.1.1.jar

hadoop-core-1.0.3.jar

 

MANIFEST.MF内容如下:

Manifest-Version: 1.0

Class-Path: lib/commons-configuration-1.6.jar lib/commons-lang-2.5.jar lib/commons-logging-1.1.1.jar lib/hadoop-core-1.0.3.jar

Main-Class: test.Temperature

 

这两个都是hdfs目录,可以省略hdfs://hadoop-name:9000/

        //输入路径

        String dst = "/tmp/zhoulx/input.txt";

        //输出路径,必须是不存在的,空文件加也不行。

        String dstOut = "/tmp/zhoulx/output";

运行要用:

hadoop jar testr.jar

 

 

hadoop dfs -cat  hdfs://hadoop-namenode/tmp/zhoulx/output/part-r-00000

hadoop jar testr.jar

hadoop dfs -put input.txt hdfs://hadoop-namenode/tmp/zhoulx/

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值