需求:
每个月气温最高的2天
数据集:
1949-10-01 14:21:02 34c
1949-10-01 19:21:02 38c
1949-10-02 14:01:02 36c
1950-01-01 11:21:02 32c
1950-10-01 12:21:02 37c
1951-12-01 12:21:02 23c
1950-10-02 12:21:02 41c
1950-10-03 12:21:02 27c
1951-07-01 12:21:02 45c
1951-07-02 12:21:02 46c
1951-07-03 12:21:03 47c
客户端:
package com.ny.mapreduce.weather;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MyTQ {
public static void main(String[] args) throws Exception {
//创建conf对象 读取配置文件
Configuration conf = new Configuration(true);
//创建job对象
Job job = Job.getInstance(conf);
//Jar包
job.setJarByClass(MyTQ.class);
//----conf---------配置环节---
//输入格式化类的创建
// job.setInputFormatClass(null);
/**-- MAP阶段-->*/
//map类处理输入来的数据
job.setMapperClass(TMapper.class);
//Map类处理后产生得K,V TQ天气的类型定义在TQ类中
/*Map输出的类型*/
job.setMapOutputKeyClass(TQ.class);
job.setMapOutputValueClass(IntWritable.class);
/* (k,v,p) 分 区*/
job.setPartitionerClass(TPartitioner.class);
/*缓冲区去 排序*/
job.setSortComparatorClass(TSortComparator.class);
/* Combine*/
// job.setCombinerClass(TCombiner.class);
/**Map阶段结束*/
/**Reduce环节*/
//分组比较器
job.setGroupingComparatorClass(TGroupComparator.class);
//Reduce类处理
job.setReducerClass(Treduce.class);
/**Reduce阶段结束*/
/** 输入输出路径*/
Path input = new Path("/data/weather/input");
FileInputFormat.addInputPath(job, input);
Path output = new Path("/data/weather/output");
FileOutputFormat.setOutputPath(job, output );
/**设置reducetask的数量*/
job.setNumReduceTasks(2);
job.waitForCompletion(true);
}
}
Map类:
package com.ny.mapreduce.weather;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
//默认输入的格式化类为TextInputFormat.class <KEYIN, VALUEIN, KEYOUT, VALUEOUT>
public class TMapper extends Mapper<LongWritable, Text, TQ, IntWritable>{
TQ mkey = new TQ();
IntWritable mval = new IntWritable();
//重写map()
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, TQ, IntWritable>.Context context)
throws IOException, InterruptedException {
try {
//从value中拿去 1951-07-01 12:21:02 45c
String[] strs = StringUtils.split(value.toString(), '\t');
//转换为指定格式时间
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = sdf.parse(strs[0]);
Calendar cal = Calendar.getInstance();
cal.setTime(date);
//对key赋值
mkey.setYear(cal.get(Calendar.YEAR));
mkey.setMonth(cal.get(Calendar.MONDAY) +1);
mkey.setDay(cal.get(Calendar.DAY_OF_MONTH));
//获取温度数值
int wd = Integer.parseInt(strs[1].substring(0, strs[1].length()-1));
mkey.setWd(wd);
//对value赋值
mval.set(wd);
//map输出
context.write(mkey, mval);
} catch (Exception e) {
e.printStackTrace();
}
}
}
设置Map输入key的类型
package com.ny.mapreduce.weather;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
//key实现接口
public class TQ implements WritableComparable<TQ>{
private int year;
private int month;
private int day;
private int wd;
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getMonth() {
return month;
}
public void setMonth(int month) {
this.month = month;
}
public int getDay() {
return day;
}
public void setDay(int day) {
this.day = day;
}
public int getWd() {
return wd;
}
public void setWd(int wd) {
this.wd = wd;
}
//序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(year);
out.writeInt(month);
out.writeInt(day);
out.writeInt(wd);
}
//反序列化
@Override
public void readFields(DataInput in) throws IOException {
this.year = in.readInt();
this.month = in.readInt();
this.day = in.readInt();
this.wd = in.readInt();
}
//按日期时间做正序
@Override
public int compareTo(TQ that) {
int c1 = Integer.compare(this.year, that.getYear());
//年相同按月比
if(c1 ==0) {
int c2 = Integer.compare(this.month, that.getMonth());
//月相同按日比
if(c2 ==0) {
return Integer.compare(this.day, that.getDay());
}
}
return 0;
}
}
分区类:
package com.ny.mapreduce.weather;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class TPartitioner extends Partitioner<TQ, IntWritable>{
//对key进行分组 (分区)
@Override
public int getPartition(TQ key, IntWritable value, int numPartitions) {
return key.getYear() % numPartitions;
}
}
排序类:
package com.ny.mapreduce.weather;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
//排序类
public class TSortComparator extends WritableComparator{
public TSortComparator() {
super(TQ.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
TQ t1 = (TQ) a;
TQ t2 = (TQ) b;
int c1 = Integer.compare(t1.getYear(), t2.getYear());
if(c1 ==0) {//年相等比较月
int c2 = Integer.compare(t1.getMonth(), t2.getMonth());
if(c2 == 0) {//月相等比较温度
return -Integer.compare(t1.getWd(), t2.getWd());
}
return c2;
}
return c1;
}
}
分组类:
package com.ny.mapreduce.weather;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class TGroupComparator extends WritableComparator{
public TGroupComparator() {
super(TQ.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
TQ t1 = (TQ) a;
TQ t2 = (TQ) b;
int c1 = Integer.compare(t1.getYear(), t2.getYear());
if(c1 ==0) {//年相等比较月 年相同月相同表示为一组 reduce 年相同月不同不是一组
return Integer.compare(t1.getMonth(), t2.getMonth());
}
return c1;
}
}
reduce类 :
package com.ny.mapreduce.weather;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
public class Treduce extends Reducer<TQ, IntWritable, Text, IntWritable>{
Text rkey = new Text();
IntWritable rval = new IntWritable();
@Override
protected void reduce(TQ key, Iterable<IntWritable> valuese, Context context)
throws IOException, InterruptedException {
//相同的key为一组1949-10-01 value 23
// 1949 10 01 23
// 1949 10 01 25
int flag=0;
int day=0;
for (IntWritable v : valuese) {
if(flag ==0) {
rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay()+":"+key.getWd());
rval.set(key.getWd());
flag++;
day=key.getDay();
context.write(rkey, rval);
}
if(flag!=0 && day!=key.getDay()) {
rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay()+":"+key.getWd());
rval.set(key.getWd());
context.write(rkey, rval);
break;
}
}
}
}
出现了数据倾斜