[hadoop]hadoop2.5 温度排序(九)

一、Eclipse创建hadoop2.x项目

  • 引入jar包
    hadoop-2.5.1\share\hadoop\common
    -hadoop-common-2.5.1.jar
    -hadoop-common-2.5.1-tests.jar
    -hadoop-nfs-2.5.1.jar
    hadoop-2.5.1\share\hadoop\common\lib
    所有jar包
    hadoop-2.5.1\share\hadoop\hdfs
    -hadoop-hdfs-2.5.1.jar
    -hadoop-hdfs-2.5.1-tests.jar
    -hadoop-hdfs-nfs-2.5.1.jar
    hadoop-2.5.1\share\hadoop\mapreduce
    所有jar包(9个)
    hadoop-2.5.1\share\hadoop\yarn
    所有jar包(11个)
二、案例需求
  • 排序数据文件data
    1949-10-01 14:21:02 34℃
    1949-10-02 14:01:02 36℃
    1950-01-01 11:21:02 32℃
    1950-10-01 12:21:02 37℃
    1951-12-01 12:21:02 23℃
    1950-10-02 12:21:02 41℃
    1950-10-03 12:21:02 27℃
    1951-07-01 12:21:02 45℃
    1951-07-02 12:21:02 46℃

  • 计算1949-1955年,每年温度最高的时间
  • 思路分析
    ①Mapper,按照年份升序排序,同时每年的温度降序
    ②Reduce,按照年份分组, 每年对应一个reduce任务
三、功能实现
  • 自定义封装类
    Mapping:key即是自定义封装类对象
    自定义封装类:类要实现WritableComparable接口,重写readFields,write,compareTo三个方法,重写hasCode和toString方法
    package com.all58.mr;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    import org.apache.hadoop.io.WritableComparable;
    
    /** 
     * 自定义封装类
    */
    public class KeyPair implements WritableComparable<KeyPair> {
    	/**
    	 * 年份
    	 */
    	private int year;
    	/**
    	 * 温度
    	 */
    	private int hot;
    	
    	/**
    	 * 数据的反序列化
    	 */
    	@Override
    	public void readFields(DataInput in) throws IOException {
    		this.year = in.readInt();
    		this.year = in.readInt();
    	}
    	
    	/**
    	 * 数据的序列化
    	 */
    	@Override
    	public void write(DataOutput out) throws IOException {
    		out.writeInt(this.year);
    		out.writeInt(this.hot);
    	}
    	
    	/**
    	 * 先按年份升序排序
    	 * 再按温度降序排序
    	 */
    	@Override
    	public int compareTo(KeyPair o) {
    		int res = Integer.compare(this.year, o.getYear());
    		if (res != 0) {
    			return res;
    		}
    		return Integer.compare(this.hot, o.getHot());
    	}
    	
    	@Override
    	public int hashCode() {
    		return new Integer(year+hot).hashCode();
    	}
    
    	@Override
    	public String toString() {
    		return year + " " + hot;
    	}
    
    	public int getYear() {
    		return year;
    	}
    
    	public void setYear(int year) {
    		this.year = year;
    	}
    
    	public int getHot() {
    		return hot;
    	}
    
    	public void setHot(int hot) {
    		this.hot = hot;
    	}
    	
    }

package com.all58.mr;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/** 
 * 排序
 * 按年份升序排序,同时按年份降序排序
*/
public class SortHot extends WritableComparator {
	
	public SortHot() {
		super(KeyPair.class, true);
	}
	
	/**
	 * 按年份升序排序,
	 * 同时按年份降序排序
	 */
	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		KeyPair o1 = (KeyPair) a;
		KeyPair o2 = (KeyPair) b;
		int res = Integer.compare(o1.getYear(), o2.getYear());
		if (res != 0) {
			return res;
		}
		return -Integer.compare(o1.getHot(), o2.getHot());//降序排列
	}
	
}

  • 分区
    按年分区,每年一个Reudce任务
    自定义分区:继承Partitioner,重写getPartition方法
    package com.all58.mr;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Partitioner;
    
    /** 
     * 按年份分区
    */
    public class FirstPartition extends Partitioner<KeyPair, Text> {
    	
    	/**
    	 * 按年份分区
    	 * @param num reduce的数量
    	 */
    	@Override
    	public int getPartition(KeyPair key, Text value, int num) {
    		return key.getYear() * 127 % num;
    	}
    
    }

  • 分组
    按年分组
    package com.all58.mr;
    
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.io.WritableComparator;
    
    /** 
     * 按年分组
    */
    public class GroupHot extends WritableComparator {
    	
    	public GroupHot() {
    		super(KeyPair.class, true);
    	}
    	
    	@Override
    	public int compare(WritableComparable a, WritableComparable b) {
    		KeyPair o1 = (KeyPair) a;
    		KeyPair o2 = (KeyPair) b;
    		return Integer.compare(o1.getYear(), o2.getYear());
    	}
    	
    }
    


  • 启动
    package com.all58.mr;
    
    import java.io.IOException;
    import java.text.ParseException;
    import java.text.SimpleDateFormat;
    import java.util.Calendar;
    import java.util.Date;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    /** 
     * 描述
    */
    public class RunJob {
    	
    	static class HotMapper extends Mapper<LongWritable, Text, KeyPair, Text> {
    		
    		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    		
    		@Override
    		protected void map(LongWritable key, Text value, Context context)
    				throws IOException, InterruptedException {
    			String line = value.toString();
    			String[] str = line.split("\t");
    			if (str.length == 2) {
    				Date date;
    				try {
    					date = sdf.parse(str[0]);
    					Calendar c =  Calendar.getInstance();
    					c.setTime(date);
    					int year = c.get(1);
    					String hot = str[1].substring(0, str[1].indexOf("℃"));
    					KeyPair kp = new KeyPair();
    					kp.setYear(year);
    					kp.setHot(Integer.parseInt(hot));
    					context.write(kp, value);
    				} catch (ParseException e) {
    					e.printStackTrace();
    				}
    			}
    		}
    	}
    	
    	static class HotReducer extends Reducer<KeyPair, Text, KeyPair, Text> {
    		
    		@Override
    		protected void reduce(KeyPair key, Iterable<Text> iter, Context context)
    				throws IOException, InterruptedException {
    			for (Text text : iter) {
    				context.write(key, text);
    			}
    		}
    	}
    	
    	public static void main(String[] args) {
    		Configuration conf = new Configuration();
    		
    		try {
    			Job job = new Job(conf);
    			job.setJobName("hot");
    			job.setJarByClass(RunJob.class);
    			job.setMapperClass(HotMapper.class);
    			job.setReducerClass(HotReducer.class);
    			job.setMapOutputKeyClass(KeyPair.class);
    			job.setMapOutputValueClass(Text.class);
    			
    			job.setNumReduceTasks(3);//设置reduce任务的个数
    			job.setPartitionerClass(FirstPartition.class);
    			job.setSortComparatorClass(SortHot.class);
    			job.setGroupingComparatorClass(GroupHot.class);
    			
    			//mapreduce输入数据所在目录或文件
    			FileInputFormat.addInputPath(job, new Path("/usr/file/hot"));
    			//mapreduce执行之后的输出数据的目录
    		    FileOutputFormat.setOutputPath(job, new Path("/usr/file/hot/output"));
    		    System.exit(job.waitForCompletion(true) ? 0 : 1);
    		    
    		} catch (Exception e) {
    			e.printStackTrace();
    		}
    	}
    	
    }
    

    结果:

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值