对温度进行排序实例(实例来源于《从零开始学Hadoop大数据分析》)
如有下温度数据,根据这些温度信息找出每年每月3个最高温度的年月及温度并按照温度进行降序排列
2010-01-01 12:00:21 8
2010-01-02 12:00:21 12
2010-01-03 12:00:21 10
2010-01-04 12:00:21 8
2010-01-05 12:00:21 8
2010-01-06 12:00:21 8
2010-01-07 12:00:21 8
2010-02-01 12:00:21 8
2010-02-02 12:00:21 12
2010-02-03 12:00:21 10
2010-02-04 12:00:21 8
2010-02-05 12:00:21 8
2010-02-06 12:00:21 8
2010-02-07 12:00:21 8
2010-03-01 12:00:21 8
2010-03-02 12:00:21 12
2010-03-03 12:00:21 10
2010-03-04 12:00:21 8
2010-03-05 12:00:21 8
2010-03-06 12:00:21 8
2010-03-07 12:00:21 8
2011-01-01 12:00:21 8
2011-01-02 12:00:21 12
2011-01-03 12:00:21 10
2011-01-04 12:00:21 8
2011-01-05 12:00:21 8
2011-01-06 12:00:21 8
2011-01-07 12:00:21 8
2011-02-01 12:00:21 8
2011-02-02 12:00:21 12
2011-02-03 12:00:21 10
2011-02-04 12:00:21 8
2011-02-05 12:00:21 8
2011-02-06 12:00:21 8
2011-02-07 12:00:21 8
2011-03-01 12:00:21 8
2011-03-02 12:00:21 12
2011-03-03 12:00:21 10
2011-03-04 12:00:21 8
2011-03-05 12:00:21 8
2011-03-06 12:00:21 8
2011-03-07 12:00:21 8
2012-01-01 12:00:21 8
2012-01-02 12:00:21 12
2012-01-03 12:00:21 10
2012-01-04 12:00:21 8
2012-01-05 12:00:21 8
2012-01-06 12:00:21 8
2012-01-07 12:00:21 8
2012-02-01 12:00:21 8
2012-02-02 12:00:21 12
2012-02-03 12:00:21 10
2012-02-04 12:00:21 8
2012-02-05 12:00:21 8
2012-02-06 12:00:21 8
2012-02-07 12:00:21 8
2012-03-01 12:00:21 8
2012-03-02 12:00:21 12
2012-03-03 12:00:21 10
2012-03-04 12:00:21 8
2012-03-05 12:00:21 8
2012-03-06 12:00:21 8
2012-03-07 12:00:21 8
2013-01-01 12:00:21 8
2013-01-02 12:00:21 12
2013-01-03 12:00:21 10
2013-01-04 12:00:21 8
2013-01-05 12:00:21 8
2013-01-06 12:00:21 8
2013-01-07 12:00:21 8
2013-02-01 12:00:21 8
2013-02-02 12:00:21 12
2013-02-03 12:00:21 10
2013-02-04 12:00:21 8
2013-02-05 12:00:21 8
2013-02-06 12:00:21 8
2013-02-07 12:00:21 8
2013-03-01 12:00:21 8
2013-03-02 12:00:21 12
2013-03-03 12:00:21 10
2013-03-04 12:00:21 8
2013-03-05 12:00:21 8
2013-03-06 12:00:21 8
2013-03-07 12:00:21 8
注:年月日时分秒后有一个tab键后面再跟上一个温度值
对时间和温度的封装类
MyKey
/**
* 封装年月及温度,实现序列化与反序列化
*/
public class MyKey implements WritableComparable {
private int year; //年
private int month; //月
private double t; //温度
//getter及setter方法
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getMonth() {
return month;
}
public void setMonth(int month) {
this.month = month;
}
public double getT() {
return t;
}
public void setT(double t) {
this.t = t;
}
@Override
public int compareTo(Object o) {
return this==o?0:-1;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
//序列化过程
dataOutput.writeInt(year);
dataOutput.writeInt(month);
dataOutput.writeDouble(t);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
//反序列化
year = dataInput.readInt();
month = dataInput.readInt();
t = dataInput.readDouble();
}
}
Mapper任务MyMapper
/**
* 这个类把数据解析为key-value的形式
* 这里输入的是key和value都是Text类型,把年、月进行切割后,输出为封装后的MyKey,温度是Text
*/
public class MyMapper extends Mapper<Text,Text,MyKey,Text> {
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
//年月日通过-分割
String[] strArray = key.toString().split("-");
//对MyKey进行封装
MyKey myKey = new MyKey();
myKey.setYear(Integer.parseInt(strArray[0]));
myKey.setMonth(Integer.parseInt(strArray[0]));
myKey.setT(Double.parseDouble(value.toString()));
context.write(myKey,new Text(key.toString() + "\t" + value));
}
}
数据分组MyGroup
public class MyGroup extends WritableComparator {
//继承WritableComparator类来实现排序
public MyGroup(){
super(MyKey.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
MyKey myKey1 = (MyKey) a;
MyKey myKey2 = (MyKey) b;
//以年做对比,如果在同一年则返回所在月份,不在同一年则返回比较结果
int r1 = Integer.compare(myKey1.getYear(),myKey2.getYear());
if(r1 == 0){
//同年
return Integer.compare(myKey1.getMonth(),myKey2.getMonth());
}
//非同年
return r1;
}
}
排序类MySort
public class MySort extends WritableComparator {
public MySort(){
super(MyKey.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
//通过MyKey进行排序处理分组合并
MyKey myKey1 = (MyKey) a;
MyKey myKey2 = (MyKey) b;
//以年作为比较
int r1 = Integer.compare(myKey1.getYear(),myKey2.getYear());
if(r1 == 0){
//同年,则比较月,年不同则返回年的比较结果
int r2 = Integer.compare(myKey1.getMonth(),myKey2.getMonth());
if(r2 == 0){
//月相等则把温度倒序排,月不同则返回月的比较结果
return -Double.compare(myKey1.getT(),myKey2.getT());
}
return r2;
}
return r1;
}
}
数据分区MyPartitioner
/**
* 分区,用来控制Reducer的数量
*/
public class MyPartitioner extends Partitioner<MyKey,Text> {
@Override
public int getPartition(MyKey myKey, Text text, int i) {
//以年份作为分区
return myKey.getYear()%i;
}
}
Reducer任务MyReducer
public class MyReducer extends Reducer<MyKey,Text,NullWritable,Text> {
//取出前三个
@Override
protected void reduce(MyKey key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
int sum = 0; //这是一个计数器
for(Text t:values){
sum++;
//如果大于3则跳出来
if(sum > 3){
break;
} else {
context.write(NullWritable.get(),t);
}
}
}
}
主函数RunJob
public class RunJob {
public static void main(String[] args) {
Configuration conf = new Configuration();
//NameNode的入口
conf.set("fs.defaultFS","hdfs://192.168.2.4:8020");
FileSystem fs = null;
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
e.printStackTrace();
}
Job job = null;
try {
//定义任务
job = Job.getInstance(conf,"weather");
} catch (IOException e) {
e.printStackTrace();
}
//主方法
job.setJarByClass(RunJob.class);
//mapper方法
job.setMapperClass(MyMapper.class);
//InputFormat方法
job.setInputFormatClass(KeyValueTextInputFormat.class);
//Reducer方法
job.setReducerClass(MyReducer.class);
//Partitioner方法
job.setPartitionerClass(MyPartitioner.class);
//SortComparator方法
job.setSortComparatorClass(MySort.class);
//GroupingComparator方法
job.setGroupingComparatorClass(MyGroup.class);
//Reducer Text的数量
job.setNumReduceTasks(3);
//Map输出key类型
job.setOutputKeyClass(MyKey.class);
//Map输出value类型
job.setOutputValueClass(Text.class);
//读取文件的位置
File f = new File("ETLDemo2\\temp");
//System.out.println(f.getAbsolutePath());
Path inpuPath = new Path("/usr/input/data/weather");
Path path = new Path(f.getAbsolutePath());
try {
//创建目录(目录不存在时创建)
if(!fs.exists(inpuPath)){
fs.mkdirs(inpuPath);
}
//上传文件(文件不存在时上传)
Path filePath = new Path(inpuPath.toString() + "/temp");
if(!fs.exists(filePath)) {
fs.copyFromLocalFile(path, filePath);
}
FileInputFormat.addInputPath(job,inpuPath);
} catch (IOException e) {
e.printStackTrace();
}
try {
//输出文件位置
Path outPath = new Path("/usr/output/data/weather");
if(fs.exists(outPath)){
fs.delete(outPath,true);
}
FileOutputFormat.setOutputPath(job,outPath);
} catch (IOException e) {
e.printStackTrace();
}
try {
job.waitForCompletion(true);
} catch (Exception e) {
e.printStackTrace();
}
}
}