介绍
根据天气年历,通过自定义排序/自定义分区/自定义分组,实现获取天气相关的统计信息。
目标
1. 获取2010年至2012年,每年温度最高的时刻
2. 获取2010年至2012年,每年温度最高的前十天
思路
1. 按照年份升序排序,同时每一年温度降序排序
2. 按照年份分组,每年对应一个reduce任务
DataPartition.java
package com.hadoop.demo2.weather;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* @Description: 自定义分区:不同年份的数据分到不同的地方
*/
public class DataPartition extends Partitioner<KeyPair, Text> {
/**
* 数据分区
*
* @param key 自定义的数据key
* @param value 温度
* @param num 分区个数
* @return 分区文件下标
*/
@Override
public int getPartition(KeyPair key, Text value, int num) {
return (key.getYear() * 100) % num;
}
}
GroupTemperature.java
package com.hadoop.demo2.weather;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* @Description: 自定义分组
*/
public class GroupTemperature extends WritableComparator {
public GroupTemperature() {
super(KeyPair.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
KeyPair k1 = (KeyPair) a;
KeyPair k2 = (KeyPair) b;
return Integer.compare(k1.getYear(), k2.getYear());
}
}
KeyPair.java
package com.hadoop.demo2.weather;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @Description: mapper输出封装对象
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
public class KeyPair implements WritableComparable<KeyPair> {
private int year;
private int temperature;
// public KeyPair(int yearpara, int temperaturepara) {
// year=yearpara;
// temperature=temperaturepara;
// }
public int compareTo(KeyPair o) {
int result = Integer.compare(year, o.getYear());
if (result != 0) {
return result;
}
return Integer.compare(temperature, o.getTemperature());
}
/**
* 序列化
*/
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(this.year);
dataOutput.writeInt(this.temperature);
}
/**
* 反序列化
*/
public void readFields(DataInput dataInput) throws IOException {
this.year = dataInput.readInt();
this.temperature = dataInput.readInt();
}
@Override
public String toString() {
return year + "\t" + temperature;
}
@Override
public int hashCode() {
return new Integer(year + temperature).hashCode();
}
public int getYear() {
return year;
}
public int getTemperature() {
return temperature;
}
}
SortTemperature
package com.hadoop.demo2.weather;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* @Description: 自定义排序
*/
public class SortTemperature extends WritableComparator {
public SortTemperature() {
super(KeyPair.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
KeyPair k1 = (KeyPair) a;
KeyPair k2 = (KeyPair) b;
int result = Integer.compare(k1.getYear(), k2.getYear());
if (result != 0) {
return result;
}
return Integer.compare(k2.getTemperature(), k1.getTemperature());
}
}
TemperatureMapper.java
package com.hadoop.demo2.weather;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.time.LocalDate;
/**
* @Description: 自定义mapper
*/
public class TemperatureMapper extends Mapper<LongWritable, Text, KeyPair, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();//读取每行数据
String[] str = line.split(" ");//分割数据
if (str[0] != null) {
System.out.println(str[0]);
int year = LocalDate.parse(str[0]).getYear();
int temperature = Integer.parseInt(str[2].substring(0, str[2].lastIndexOf("℃")));
context.write(new KeyPair(year, temperature), value);
}
}
}
TemperatureReduce.java
package com.hadoop.demo2.weather;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @Description:
*/
public class TemperatureReduce extends Reducer<KeyPair, Text, KeyPair, Text> {
@Override
protected void reduce(KeyPair key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text v : values) {
context.write(key, v);
}
}
}
Main.java
package com.hadoop.demo2.weather;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.File;
import java.io.IOException;
/**
* @Description: 按年份统计气温降序
*/
public class Main {
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
Job job = Job.getInstance(conf, "Weather");
job.setJarByClass(Main.class);
job.setMapperClass(TemperatureMapper.class);
job.setReducerClass(TemperatureReduce.class);
job.setMapOutputKeyClass(KeyPair.class);
job.setMapOutputValueClass(Text.class);
job.setSortComparatorClass(SortTemperature.class);
job.setPartitionerClass(DataPartition.class);
job.setGroupingComparatorClass(GroupTemperature.class);
job.setNumReduceTasks(3);//设置任务数量
FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/demo/data/hadoop/input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/demo/data/hadoop/output_weather"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
}