MapReduce编程案例——气象分析

最新推荐文章于 2019-11-28 20:31:46 发布

jgzquanquan

最新推荐文章于 2019-11-28 20:31:46 发布

阅读量1.7k

点赞数

分类专栏：大数据技术文章标签： mapreduce 编程 java

本文链接：https://blog.csdn.net/jgzquanquan/article/details/78423869

版权

大数据技术专栏收录该内容

4 篇文章 0 订阅

订阅专栏

MapReduce编程案例——气象分析

描述：

·通过采集的气象数据分析每年的最高温度

·初始数据：

0067011990999991950051507004888888889999999N9+00001+9999999999999999999999

0067011990999991950051512004888888889999999N9+00221+9999999999999999999999

0067011990999991950051518004888888889999999N9-00111+9999999999999999999999

0067011990999991949032412004888888889999999N9+01111+9999999999999999999999

0067011990999991950032418004888888880500001N9+00001+9999999999999999999999

0067011990999991950051507004888888880500001N9+00781+9999999999999999999999

数据说明：

第15-19个字符是year

第45-50位是温度表示，+表示零上 -表示零下，且温度的值不能是9999，9999表示异常数据

第50位值只能是0、1、4、5、9几个数字

·结果：

1949 111

1950 78

·分析：

Map函数只是一个数据准备阶段，通过这种方式来准备数据，使reduce函数能够继续对它进行处理，即找出最大温度。

MAP部分：数据准备阶段取year 和温度作为k2和v2

REDUCE部分：取最大温度

代码说明：

package org.apache.hadoop.mapreduce;

import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
 
public class WeatherMain {
	final static String INPUT_PATH="hdfs://localhost:9000/input";
	final static String OUTPUT_PATH="hdfs://localhost:9000/output";
	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		Configuration configuration = new Configuration();
		FileSystem fileSystem=FileSystem.get(new URI(INPUT_PATH),configuration);
		if (fileSystem.exists(new Path(OUTPUT_PATH))) {
			fileSystem.delete(new Path(OUTPUT_PATH),true);
		}
		Job job = new Job(configuration,"WeatherMax");
		FileInputFormat.setInputPaths(job, INPUT_PATH);
		FileOutputFormat.setOutputPath(job,new Path(OUTPUT_PATH));
		job.setJarByClass(WeatherMain.class);
		job.setMapperClass(weathermapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(weatherreducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		job.waitForCompletion(true);
	}


 
    public static class weatherreducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable result = new IntWritable();
 
        public weatherreducer() {
        }
 
        protected void reduce(Text key2, Iterable<IntWritable> value2, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            Text key3=key2;
            int maxValue=0;

            for(IntWritable intWritable :value2) {
            	maxValue = Math.max(maxValue, intWritable.get());
            }
            IntWritable value4 =new IntWritable(maxValue);
            context.write(key3, value4);
            			
        }
    }
 
    public static class weathermapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        private static final int FAIL_DATA=9999;
        public void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            
            String line =value.toString();
            String year =line.substring(15,19);
            String tmp =line.substring(45,46);
            int val=0;
            if(tmp=="+")
            {
            	val=Integer.parseInt(line.substring(46,50));
            }
            else
            {
            	val=Integer.parseInt(line.substring(45,50));
            }
            if(Integer.parseInt(line.substring(46,50))!=FAIL_DATA &&line.substring(50,51).matches("[01459]"))
            {
            	context.write(new Text(year), new IntWritable(val));
            }
 
        }
    }
}