Hadoop中如何自定义排序,自定义分区,自定义分组。
--需求:
1.计算在1949-1955年,每年温度最高的时间
2.计算在1949-1955年,每年温度最高前十天
--思路:
1.按照年份升序,同时每一年中温度降序排序
2.按照年份分组,每一年对应一个reduce任务
mapper输出,key为封装对象。
--目的:
自定义排序
自定义分区
自定义分组
--输入数据
1949-10-01 14:21:02 34°C
1949-10-02 14:01:02 36°C
1950-01-01 11:21:02 32°C
1950-19-01 12:21:02 37°C
1951-12-01 12:21:02 23°C
1950-10-02 12:21:02 41°C
1950-10-03 12:21:02 27°C
1951-07-01 12:21:02 45°C
1951-07-02 12:21:02 46°C
--时间和温度之间是制表符\t
--需求:1
--中间数据
1949-10-02 14:01:02 36°C
1950-19-01 12:21:02 37°C
1951-07-02 12:21:02 46°C
--目标数据
1949 36
1950 37
1951 46
--需求:2
--先分组,后降序
1949 36 1949-10-02 14:01:02 36°C
1949 34 1949-10-01 14:21:02 34°C
1950 41 1950-10-02 12:21:02 41°C
1950 37 1950-19-01 12:21:02 37°C
1950 32 1950-01-01 11:21:02 32°C
1950 27 1950-10-03 12:21:02 27°C
1951 46 1951-07-02 12:21:02 46°C
1951 45 1951-07-01 12:21:02 45°C
1951 23 1951-12-01 12:21:02 23°C
1949-10-01 14:21:02 34°C-->(1949,34)-->49,50,51(分组)-->温度排序-->
--下面是程序的实现:
--Mapper
package com.lhj.www;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class HotReduce extends Reducer<KeyPair, Text, KeyPair, Text>{
@Override
protected void reduce(KeyPair kp, Iterable<Text> i,Context context)
throws IOException, InterruptedException {
for (Text text : i) {
context.write(kp, text);
}
}
}
--Reducer
package com.lhj.www;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**shuffle阶段:
* partition》combiner(可省略)》sorte》group
* @author Administrator
*
*/
public class HotMapper extends Mapper<LongWritable, Text, KeyPair, Text>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line=value.toString();
String[] ss=line.split("\t");
if (ss.length==2){
int year=Integer.parseInt(ss[0].substring(0, 4));
int hot=Integer.parseInt(ss[1].substring(0, ss[1].indexOf("°C")));
KeyPair kp=new KeyPair();
kp.setYear(year);
kp.setHot(hot);
context.write(kp, value);
}
}
}
--KeyPair
package com.lhj.www;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class KeyPair implements WritableComparable<KeyPair> {
private int year;
private int hot;
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getHot() {
return hot;
}
public void setHot(int hot) {
this.hot = hot;
}
@Override
public void readFields(DataInput in) throws IOException {
this.year=in.readInt();
this.hot=in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(year);
out.writeInt(hot);
}
@Override
public int compareTo(KeyPair o) {
// System.out.println(Integer.compare(1, 1));//0
// System.out.println(Integer.compare(1, 2));//-1
//先对比年,如果相等,结果为0,返回
int result=Integer.compare(year, o.getYear());
if (result!=0){
return result;
}
return Integer.compare(hot, o.hot);
}
@Override
public String toString() {
return year+"\t"+hot;
}
@Override
public int hashCode() {
return new Integer(year+hot).hashCode();
}
}
--HotPartition
package com.lhj.www;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class HotPartition extends Partitioner<KeyPair, Text>{
@Override
public int getPartition(KeyPair key, Text value, int num) {
return (key.getYear()*127%num);
}
}
package com.lhj.www;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
--HotSort
public class HotSort extends WritableComparator{
public HotSort() {
super(KeyPair.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
KeyPair o1=(KeyPair) a;
KeyPair o2=(KeyPair) b;
int res=Integer.compare(o1.getYear(), o2.getYear());
if (res!=0){
return res;
}
return -Integer.compare(o1.getHot(),o2.getHot());//降序排序
}
}
--HotGroup
package com.lhj.www;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class HotGroup extends WritableComparator{
public HotGroup() {
super(KeyPair.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
KeyPair o1=(KeyPair) a;
KeyPair o2=(KeyPair) b;
return Integer.compare(o1.getYear(),o2.getYear());
}
}
--打包后查看
[hadoop@node1 test]$ jar
Usage: jar {ctxui}[vfm0Me] [jar-file] [manifest-file] [entry-point] [-C dir] files ...
Options:
-c create new archive
-t list table of contents for archive
[hadoop@node1 test]$ jar -tvf hot.jar
25 Sat Jan 16 17:07:22 CST 2016 META-INF/MANIFEST.MF
384 Fri Nov 06 15:23:16 CST 2015 .project
3918 Fri Nov 06 15:32:22 CST 2015 .classpath
731 Sat Jan 16 15:51:46 CST 2016 com/lhj/www/HotGroup.class
2006 Sat Jan 16 14:16:32 CST 2016 com/lhj/www/HotReduce.class
836 Sat Jan 16 15:51:30 CST 2016 com/lhj/www/HotSort.class
1951 Sat Jan 16 17:00:20 CST 2016 com/lhj/www/JobRun.class
861 Sat Jan 16 15:51:00 CST 2016 com/lhj/www/HotPartition.class
2383 Sat Jan 16 16:49:24 CST 2016 com/lhj/www/HotMapper.class
2093 Wed Jan 13 08:27:32 CST 2016 com/lhj/www/KeyPair.class
[hadoop@node1 test]$
--运行
[hadoop@node1 test]$ hadoop jar hot.jar com.lhj.www.JobRun
16/01/16 17:13:12 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 4 items
-rw-r--r-- 1 hadoop supergroup 0 2016-01-16 17:11 /user/hadoop/output/hot/_SUCCESS
-rw-r--r-- 1 hadoop supergroup 136 2016-01-16 17:11 /user/hadoop/output/hot/part-r-00000
-rw-r--r-- 1 hadoop supergroup 102 2016-01-16 17:11 /user/hadoop/output/hot/part-r-00001
-rw-r--r-- 1 hadoop supergroup 68 2016-01-16 17:11 /user/hadoop/output/hot/part-r-00002
[hadoop@node1 test]$ hadoop fs -cat /user/hadoop/output/hot/part-r-00000
16/01/16 17:13:36 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
1950 41 1950-10-02 12:21:02 41°C
1950 37 1950-19-01 12:21:02 37°C
1950 32 1950-01-01 11:21:02 32°C
1950 27 1950-10-03 12:21:02 27°C
[hadoop@node1 test]$ hadoop fs -cat /user/hadoop/output/hot/part-r-00001
16/01/16 17:13:45 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
1951 46 1951-07-02 12:21:02 46°C
1951 45 1951-07-01 12:21:02 45°C
1951 23 1951-12-01 12:21:02 23°C
[hadoop@node1 test]$ hadoop fs -cat /user/hadoop/output/hot/part-r-00002
16/01/16 17:13:56 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
1949 36 1949-10-02 14:01:02 36°C
1949 34 1949-10-01 14:21:02 34°C
Hadoop 自定义排序,自定义分区,自定义分组
最新推荐文章于 2021-07-15 15:28:08 发布