mapreduce各自定义组件demo

图为mapreduce的整个大概流程
#################需求为求每个月温度最高的两天
主运行job文件:
public class MyTQ {

public static void main(String[] args) throws Exception {
	
	
	
	Configuration conf = new Configuration(true);
	Job job = Job.getInstance(conf);
	//配置
	
	job.setJarByClass(MyTQ.class);
	job.setJobName("tq");
	//basic
	
	//input/output  path
	
	Path infile = new Path("/data/tq/input");
	FileInputFormat.addInputPath(job, infile );
	
	
	Path outfile = new Path("/data/tq/output");
	if(outfile.getFileSystem(conf).exists(outfile))
		outfile.getFileSystem(conf).delete(outfile,true);
	FileOutputFormat.setOutputPath(job, outfile );
	
	//1maptask
		//inputformat

// job.setInputFormatClass(ooxx.class);

		//map
	job.setMapperClass(TMapper.class);
	job.setMapOutputKeyClass(TQ.class);
	job.setMapOutputValueClass(IntWritable.class);
	
		
		//partitioner,自定义怎么分区
	job.setPartitionerClass(TPartitioner.class);
	
		//sortComp...自定义排序的核心代码比较,通过比较我们定义什么才算大值,什么才算小智
	job.setSortComparatorClass(TSortComparator.class);
	
	
	//2reducetask
		//groupComp....
	/*  我们已经接收到mapper端传来的同一个区的数据,按照年月升序温度降序排列。
	  我们设置同一分区内同一组的排序的核心代码比较*/
	job.setGroupingComparatorClass(TGroupingComparator.class);
		//reduce
	job.setReducerClass(TReducer.class);
	
	
	job.setNumReduceTasks(2);
	//submit
	job.waitForCompletion(true);
	
}

mapper文件:
public class TMapper extends Mapper<LongWritable, Text, TQ, IntWritable> {

private TQ mkey = new TQ();
private IntWritable mval = new IntWritable();



@Override
protected void map(LongWritable key, Text value, Context context)
		throws IOException, InterruptedException {

// 1949-10-01 14:21:02 34c
// 1949-10-01 19:21:02 38c

	try {
		String[] strs = StringUtils.split(value.toString(),'\t');
		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
		Date data = sdf.parse(strs[0]);
		Calendar cal = Calendar.getInstance();
		cal.setTime(data);
		
		mkey.setYear(cal.get(Calendar.YEAR));
		mkey.setMonth(cal.get(Calendar.MONTH)+1);
		mkey.setDay(cal.get(Calendar.DAY_OF_MONTH));
		
		 int wd = Integer.parseInt( strs[1].substring(0, strs[1].length()-1));
		mkey.setWd(wd);
		mval.set(wd);
		
		context.write(mkey, mval);
		
	} catch (ParseException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	
}

}

reduce文件:
public class TReducer extends Reducer<TQ, IntWritable, Text, IntWritable>{

private Text rkey = new Text();
private IntWritable rval = new IntWritable();

@Override
protected void reduce(TQ key, Iterable<IntWritable> values, Context context)
		throws IOException, InterruptedException {
	
	//原语:相同的key为一组,这一组数据调用一次reduce方法,方法内迭代这一组数据进行计算
	//1970,2,22,33  33
	//1970,2,12,31  31
	//1970,2,23,28  28
	//我们的需求是求每个月温度最高的两天,所以我们根据分组后又是根据年月相同温度降序的数据
	//所以我们只需要取前两条天不重复的数据就可以了
	int day = 0;
	int flg = 0;
	for (IntWritable v : values) {
		if(flg == 0){
			rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
			rval.set(key.getWd());
			day = key.getDay();
			context.write(rkey	, rval);
			flg++;
		}
		
		if(flg != 0  &&  day !=  key.getDay()){
			rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
			rval.set(key.getWd());
			context.write(rkey	, rval);
			break;
		}
		
		
	}
	
	
}

}

自定义实体类:
public class TQ implements WritableComparable{

private int year =0;
private int month = 0;
private int day = 0;
private int wd = 0;







public int getYear() {
	return year;
}

public void setYear(int year) {
	this.year = year;
}

public int getMonth() {
	return month;
}

public void setMonth(int month) {
	this.month = month;
}

public int getDay() {
	return day;
}

public void setDay(int day) {
	this.day = day;
}

public int getWd() {
	return wd;
}

public void setWd(int wd) {
	this.wd = wd;
}

//序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(year);
out.writeInt(month);
out.writeInt(day);
out.writeInt(wd);
}
//反序列化
@Override
public void readFields(DataInput in) throws IOException {
this.year= in.readInt();
this.month = in.readInt();
this.day = in.readInt();
this.wd = in.readInt();
}

@Override
public int compareTo(TQ that) {
	int c1 = Integer.compare(this.year, that.getYear());
	if(c1==0){
		int c2 = Integer.compare(this.month, that.getMonth());
		if(c2==0){
				return  Integer.compare(this.day, that.getDay());
		}
		return c2;
	}
		
	return c1;
}

}
mapper端自定义分区:
public class TPartitioner extends Partitioner<TQ, IntWritable>{
/分区的制定策略为年份相同的就属于同一分区/
@Override
public int getPartition(TQ key, IntWritable value, int numPartitions) {
return key.getYear() % numPartitions;
}

}

mapper端自定义怎么进行快速和归并排序:
public class TSortComparator extends WritableComparator{

//比较前要将自定义类传过去,调用父类的方法将TQ类包装
public TSortComparator() {
	super(TQ.class,true);
}

/*我们定义:在mapper阶段,缓冲区向磁盘中写时要先经过分区,在上一阶段已经进行过分区
这一阶段我们写的是分区后区内的快速以及归并排序的比较代码。*/
@Override
public int compare(WritableComparable a, WritableComparable b) {
	TQ t1 = (TQ)a;
	TQ t2 = (TQ)b;
/*	我们定义:年月升序排列,如果年月相同温度降序排列*/
	int c1 = Integer.compare(t1.getYear(), t2.getYear());
	if(c1 == 0 ){
		int c2 = Integer.compare(t1.getMonth(), t2.getMonth());
		if(c2 == 0){
			return  - Integer.compare(t1.getWd(), t2.getWd());
		}
		return c2;
	}
	
	return c1;
	
	
}

}

reduce端自定义怎么分组:
public class TGroupingComparator extends WritableComparator{

//比较前要将自定义类传过去,调用父类的方法将TQ类包装
public TGroupingComparator() {
	super(TQ.class,true);
	
}
//我们定义同一分区内是否属于同一组的比较策略,年月相同的为一组
@Override
public int compare(WritableComparable a, WritableComparable b) {
	TQ t1 = (TQ)a;
	TQ t2 = (TQ)b;
	
	int c1 = Integer.compare(t1.getYear(), t2.getYear());
	if(c1 == 0 ){
		return Integer.compare(t1.getMonth(), t2.getMonth());
	}
	return c1;
	
	
}	

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值