一、Eclipse创建hadoop2.x项目
- 引入jar包
hadoop-2.5.1\share\hadoop\common
-hadoop-common-2.5.1.jar
-hadoop-common-2.5.1-tests.jar
-hadoop-nfs-2.5.1.jar
hadoop-2.5.1\share\hadoop\common\lib
所有jar包
hadoop-2.5.1\share\hadoop\hdfs
-hadoop-hdfs-2.5.1.jar
-hadoop-hdfs-2.5.1-tests.jar
-hadoop-hdfs-nfs-2.5.1.jar
hadoop-2.5.1\share\hadoop\mapreduce
所有jar包(9个)
hadoop-2.5.1\share\hadoop\yarn
所有jar包(11个)
二、案例需求
- 排序数据文件data
1949-10-01 14:21:02 34℃ 1949-10-02 14:01:02 36℃ 1950-01-01 11:21:02 32℃ 1950-10-01 12:21:02 37℃ 1951-12-01 12:21:02 23℃ 1950-10-02 12:21:02 41℃ 1950-10-03 12:21:02 27℃ 1951-07-01 12:21:02 45℃ 1951-07-02 12:21:02 46℃
计算1949-1955年,每年温度最高的时间- 思路分析
①Mapper,按照年份升序排序,同时每年的温度降序
②Reduce,按照年份分组, 每年对应一个reduce任务
三、功能实现
- 自定义封装类
Mapping:key即是自定义封装类对象
自定义封装类:类要实现WritableComparable接口,重写readFields,write,compareTo三个方法,重写hasCode和toString方法
package com.all58.mr; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; /** * 自定义封装类 */ public class KeyPair implements WritableComparable<KeyPair> { /** * 年份 */ private int year; /** * 温度 */ private int hot; /** * 数据的反序列化 */ @Override public void readFields(DataInput in) throws IOException { this.year = in.readInt(); this.year = in.readInt(); } /** * 数据的序列化 */ @Override public void write(DataOutput out) throws IOException { out.writeInt(this.year); out.writeInt(this.hot); } /** * 先按年份升序排序 * 再按温度降序排序 */ @Override public int compareTo(KeyPair o) { int res = Integer.compare(this.year, o.getYear()); if (res != 0) { return res; } return Integer.compare(this.hot, o.getHot()); } @Override public int hashCode() { return new Integer(year+hot).hashCode(); } @Override public String toString() { return year + " " + hot; } public int getYear() { return year; } public void setYear(int year) { this.year = year; } public int getHot() { return hot; } public void setHot(int hot) { this.hot = hot; } }
package com.all58.mr;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* 排序
* 按年份升序排序,同时按年份降序排序
*/
public class SortHot extends WritableComparator {
public SortHot() {
super(KeyPair.class, true);
}
/**
* 按年份升序排序,
* 同时按年份降序排序
*/
@Override
public int compare(WritableComparable a, WritableComparable b) {
KeyPair o1 = (KeyPair) a;
KeyPair o2 = (KeyPair) b;
int res = Integer.compare(o1.getYear(), o2.getYear());
if (res != 0) {
return res;
}
return -Integer.compare(o1.getHot(), o2.getHot());//降序排列
}
}
- 分区
按年分区,每年一个Reudce任务
自定义分区:继承Partitioner,重写getPartition方法
package com.all58.mr; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; /** * 按年份分区 */ public class FirstPartition extends Partitioner<KeyPair, Text> { /** * 按年份分区 * @param num reduce的数量 */ @Override public int getPartition(KeyPair key, Text value, int num) { return key.getYear() * 127 % num; } }
- 分组
按年分组
package com.all58.mr; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; /** * 按年分组 */ public class GroupHot extends WritableComparator { public GroupHot() { super(KeyPair.class, true); } @Override public int compare(WritableComparable a, WritableComparable b) { KeyPair o1 = (KeyPair) a; KeyPair o2 = (KeyPair) b; return Integer.compare(o1.getYear(), o2.getYear()); } }
- 启动
package com.all58.mr; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * 描述 */ public class RunJob { static class HotMapper extends Mapper<LongWritable, Text, KeyPair, Text> { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] str = line.split("\t"); if (str.length == 2) { Date date; try { date = sdf.parse(str[0]); Calendar c = Calendar.getInstance(); c.setTime(date); int year = c.get(1); String hot = str[1].substring(0, str[1].indexOf("℃")); KeyPair kp = new KeyPair(); kp.setYear(year); kp.setHot(Integer.parseInt(hot)); context.write(kp, value); } catch (ParseException e) { e.printStackTrace(); } } } } static class HotReducer extends Reducer<KeyPair, Text, KeyPair, Text> { @Override protected void reduce(KeyPair key, Iterable<Text> iter, Context context) throws IOException, InterruptedException { for (Text text : iter) { context.write(key, text); } } } public static void main(String[] args) { Configuration conf = new Configuration(); try { Job job = new Job(conf); job.setJobName("hot"); job.setJarByClass(RunJob.class); job.setMapperClass(HotMapper.class); job.setReducerClass(HotReducer.class); job.setMapOutputKeyClass(KeyPair.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(3);//设置reduce任务的个数 job.setPartitionerClass(FirstPartition.class); job.setSortComparatorClass(SortHot.class); job.setGroupingComparatorClass(GroupHot.class); //mapreduce输入数据所在目录或文件 FileInputFormat.addInputPath(job, new Path("/usr/file/hot")); //mapreduce执行之后的输出数据的目录 FileOutputFormat.setOutputPath(job, new Path("/usr/file/hot/output")); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (Exception e) { e.printStackTrace(); } } }
结果: