#################需求为求每个月温度最高的两天
主运行job文件:
public class MyTQ {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration(true);
Job job = Job.getInstance(conf);
//配置
job.setJarByClass(MyTQ.class);
job.setJobName("tq");
//basic
//input/output path
Path infile = new Path("/data/tq/input");
FileInputFormat.addInputPath(job, infile );
Path outfile = new Path("/data/tq/output");
if(outfile.getFileSystem(conf).exists(outfile))
outfile.getFileSystem(conf).delete(outfile,true);
FileOutputFormat.setOutputPath(job, outfile );
//1maptask
//inputformat
// job.setInputFormatClass(ooxx.class);
//map
job.setMapperClass(TMapper.class);
job.setMapOutputKeyClass(TQ.class);
job.setMapOutputValueClass(IntWritable.class);
//partitioner,自定义怎么分区
job.setPartitionerClass(TPartitioner.class);
//sortComp...自定义排序的核心代码比较,通过比较我们定义什么才算大值,什么才算小智
job.setSortComparatorClass(TSortComparator.class);
//2reducetask
//groupComp....
/* 我们已经接收到mapper端传来的同一个区的数据,按照年月升序温度降序排列。
我们设置同一分区内同一组的排序的核心代码比较*/
job.setGroupingComparatorClass(TGroupingComparator.class);
//reduce
job.setReducerClass(TReducer.class);
job.setNumReduceTasks(2);
//submit
job.waitForCompletion(true);
}
mapper文件:
public class TMapper extends Mapper<LongWritable, Text, TQ, IntWritable> {
private TQ mkey = new TQ();
private IntWritable mval = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// 1949-10-01 14:21:02 34c
// 1949-10-01 19:21:02 38c
try {
String[] strs = StringUtils.split(value.toString(),'\t');
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date data = sdf.parse(strs[0]);
Calendar cal = Calendar.getInstance();
cal.setTime(data);
mkey.setYear(cal.get(Calendar.YEAR));
mkey.setMonth(cal.get(Calendar.MONTH)+1);
mkey.setDay(cal.get(Calendar.DAY_OF_MONTH));
int wd = Integer.parseInt( strs[1].substring(0, strs[1].length()-1));
mkey.setWd(wd);
mval.set(wd);
context.write(mkey, mval);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
reduce文件:
public class TReducer extends Reducer<TQ, IntWritable, Text, IntWritable>{
private Text rkey = new Text();
private IntWritable rval = new IntWritable();
@Override
protected void reduce(TQ key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
//原语:相同的key为一组,这一组数据调用一次reduce方法,方法内迭代这一组数据进行计算
//1970,2,22,33 33
//1970,2,12,31 31
//1970,2,23,28 28
//我们的需求是求每个月温度最高的两天,所以我们根据分组后又是根据年月相同温度降序的数据
//所以我们只需要取前两条天不重复的数据就可以了
int day = 0;
int flg = 0;
for (IntWritable v : values) {
if(flg == 0){
rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
rval.set(key.getWd());
day = key.getDay();
context.write(rkey , rval);
flg++;
}
if(flg != 0 && day != key.getDay()){
rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
rval.set(key.getWd());
context.write(rkey , rval);
break;
}
}
}
}
自定义实体类:
public class TQ implements WritableComparable{
private int year =0;
private int month = 0;
private int day = 0;
private int wd = 0;
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getMonth() {
return month;
}
public void setMonth(int month) {
this.month = month;
}
public int getDay() {
return day;
}
public void setDay(int day) {
this.day = day;
}
public int getWd() {
return wd;
}
public void setWd(int wd) {
this.wd = wd;
}
//序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(year);
out.writeInt(month);
out.writeInt(day);
out.writeInt(wd);
}
//反序列化
@Override
public void readFields(DataInput in) throws IOException {
this.year= in.readInt();
this.month = in.readInt();
this.day = in.readInt();
this.wd = in.readInt();
}
@Override
public int compareTo(TQ that) {
int c1 = Integer.compare(this.year, that.getYear());
if(c1==0){
int c2 = Integer.compare(this.month, that.getMonth());
if(c2==0){
return Integer.compare(this.day, that.getDay());
}
return c2;
}
return c1;
}
}
mapper端自定义分区:
public class TPartitioner extends Partitioner<TQ, IntWritable>{
/分区的制定策略为年份相同的就属于同一分区/
@Override
public int getPartition(TQ key, IntWritable value, int numPartitions) {
return key.getYear() % numPartitions;
}
}
mapper端自定义怎么进行快速和归并排序:
public class TSortComparator extends WritableComparator{
//比较前要将自定义类传过去,调用父类的方法将TQ类包装
public TSortComparator() {
super(TQ.class,true);
}
/*我们定义:在mapper阶段,缓冲区向磁盘中写时要先经过分区,在上一阶段已经进行过分区
这一阶段我们写的是分区后区内的快速以及归并排序的比较代码。*/
@Override
public int compare(WritableComparable a, WritableComparable b) {
TQ t1 = (TQ)a;
TQ t2 = (TQ)b;
/* 我们定义:年月升序排列,如果年月相同温度降序排列*/
int c1 = Integer.compare(t1.getYear(), t2.getYear());
if(c1 == 0 ){
int c2 = Integer.compare(t1.getMonth(), t2.getMonth());
if(c2 == 0){
return - Integer.compare(t1.getWd(), t2.getWd());
}
return c2;
}
return c1;
}
}
reduce端自定义怎么分组:
public class TGroupingComparator extends WritableComparator{
//比较前要将自定义类传过去,调用父类的方法将TQ类包装
public TGroupingComparator() {
super(TQ.class,true);
}
//我们定义同一分区内是否属于同一组的比较策略,年月相同的为一组
@Override
public int compare(WritableComparable a, WritableComparable b) {
TQ t1 = (TQ)a;
TQ t2 = (TQ)b;
int c1 = Integer.compare(t1.getYear(), t2.getYear());
if(c1 == 0 ){
return Integer.compare(t1.getMonth(), t2.getMonth());
}
return c1;
}
}