MapReduce实践之气温问题

输入文件:temp:

1949-10-01 14:21:02 34c
1949-10-01 19:21:02 38c
1949-10-02 14:01:02 36c
1950-01-01 11:21:02 32c
1950-10-01 12:21:02 37c
1951-12-01 12:21:02 23c
1950-10-02 12:21:02 41c
1950-10-03 12:21:02 27c
1951-07-01 12:21:02 45c
1951-07-02 12:21:02 46c
1951-07-03 12:21:03 47c

自定义map输出结果的key

需求:年升序,月升序,温度降序

package TQ;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class TQKey implements WritableComparable<TQKey>{

	private int year;
	private int month;
	private int day;
	private int wd;
	
	public TQKey() {
		super();
	}
	
	public int getYear() {
		return year;
	}

	public void setYear(int year) {
		this.year = year;
	}

	public int getMonth() {
		return month;
	}

	public void setMonth(int month) {
		this.month = month;
	}

	public int getDay() {
		return day;
	}

	public void setDay(int day) {
		this.day = day;
	}

	public int getWd() {
		return wd;
	}

	public void setWd(int wd) {
		this.wd = wd;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(year);
		out.writeInt(month);
		out.writeInt(day);
		out.writeInt(wd);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.year = in.readInt();
		this.month = in.readInt();
		this.day = in.readInt();
		this.wd = in.readInt();
	}

	@Override
	public int compareTo(TQKey that) {
		//先比较年
		int r1 = Integer.compare(this.getYear(), that.getYear());
		if(r1 == 0){
			//年相等,比较月
			int r2 = Integer.compare(this.getMonth(), that.getMonth());
			if(r2 == 0){
				//月相等,温度降序
				return -Integer.compare(this.getWd(), that.getWd());
			}
			return r2;
		}
		return r1;
	}

	@Override
	public String toString() {
		return "TQKey [year=" + year + ", month=" + month + ", day=" + day + ", wd=" + wd + "]";
	}
	
}

自定义分组比较器

作用:由于map结果的key是自定义类型,在默认规约过程中,会把key完全相同<key,value>规约为<key,[value1],[value2]…>,用于reducer端的迭代,所以使用自定义分组比较器规定什么样的key为形同的key。
这里是把年月相同的放在一起

package TQ;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class TQGroupingComparator extends WritableComparator{

	public TQGroupingComparator() {
		super(TQKey.class, true);
	}
	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		TQKey t1 = (TQKey) a;
		TQKey t2 = (TQKey) b;
		
		int r1 = Integer.compare(t1.getYear(), t2.getYear());
		if(r1 == 0){
			return Integer.compare(t1.getMonth(), t2.getMonth());
		}
		return r1;
	}
}

Map类

package TQ;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TQ_mapper extends Mapper<Object, Text, TQKey, IntWritable>{

	IntWritable wd = new IntWritable();
	TQKey tqKey = new TQKey();
	@Override
	protected void map(Object key, Text value, Mapper<Object, Text, TQKey, IntWritable>.Context context)
			throws IOException, InterruptedException {
		//行 1949-10-01 14:21:02 34c
		String line = value.toString();
		//切割 1949-10-01   14:21:02   34c
		String[] split = line.split(" ");
		//1949-10-01
		String time = split[0];
		//1949  10   01
		String[] ymd = time.split("-");
		tqKey.setYear(Integer.parseInt(ymd[0]));
		tqKey.setMonth(Integer.parseInt(ymd[1]));
		tqKey.setDay(Integer.parseInt(ymd[2]));
		
		//34
		String str_wd = split[2].substring(0,2);
		tqKey.setWd(Integer.parseInt(str_wd));
		
		wd.set(Integer.parseInt(str_wd));
		context.write(tqKey, wd);
	}
	
	
}

Reducer类

package TQ;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class TQ_reduce extends Reducer<TQKey, IntWritable, Text, Text>{
	@Override
	protected void reduce(TQKey k, Iterable<IntWritable> v, Reducer<TQKey, IntWritable, Text, Text>.Context context)
			throws IOException, InterruptedException {

		int flag = 1;
		int firstWD = 0;
		int secondWD = 0;
		String firstDay = "";
		String secondDay = "";
		for(IntWritable wd : v){
			if(flag == 1){
				firstWD = wd.get();
				firstDay = k.getYear()+"-"+k.getMonth()+"-"+k.getDay();
				flag++;
			}
			else if(flag == 2){
				secondWD = wd.get();
				secondDay = k.getYear()+"-"+k.getMonth()+"-"+k.getDay();
				flag++;
			}else{
				break;
			}
		}
		context.write(new Text(firstDay), new Text(firstWD+""));
		if(!secondDay.equals("")){
			context.write(new Text(secondDay), new Text(secondWD+""));
		}
	}

	
}

Runner类

package TQ;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import MapReduce.WeaMapper;
import MapReduce.WeaReducer;
import MapReduce.WeaRunner;

public class TQ_runner {
	private static Configuration conf;
	private static FileSystem fs;
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		/*
		 * 1. 准备过程
		 * 通过配置文件创建和你虚拟机对应的文件系统,并且通过getInstance(conf)创建一个与fs有联系的job
		 */
		conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://node2:8020");
		conf.set("yarn.resourcemanager.hostname", "node2:8088");
		fs = FileSystem.get(conf);
		/*
		 * 2. Create a new Job
		 * 可以通过set设置很多参数自己看,最重要的都在下面了
		 */
		Job job = Job.getInstance(conf);
		job.setJobName("TQ");
		job.setJarByClass(TQ_runner.class);
		//job.setNumReduceTasks(3);
		//2-1设置job下面的具体逻辑代码类
		job.setMapperClass(TQ_mapper.class);
		
		job.setReducerClass(TQ_reduce.class);
		
		//2-2还需要设置map端和reduce端的最终结果输出格式
		job.setMapOutputKeyClass(TQKey.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		job.setGroupingComparatorClass(TQGroupingComparator.class);
		/*
		 * 3.输入输出路径
		 * 注意这下面两个类和job源码中demo是不同的。新类新方法
		 */
		FileInputFormat.setInputPaths(job, new Path("/input/temp"));
		Path output = new Path("/TQ/1");
		if(fs.exists(output)){
			fs.delete(output, true);
			System.out.println("/文件原路径原本存在,已删除");
		}
		FileOutputFormat.setOutputPath(job, output);

		/*
		 * 4.Submit the job, then poll for progress until the job is complete
		 * 提交任务并且一直等待到结束
		 */
		boolean flag = job.waitForCompletion(true);
		System.exit(flag ? 0 : 1);
	}

}

结果

1949-10-1 38
1949-10-2 36
1950-1-1 32
1950-10-2 41
1950-10-1 37
1951-7-3 47
1951-7-2 46
1951-12-1 23

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值