SecondarySort二次排序代码

package com.zhiyou.bd17.mr1014;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


//二次排序
public class SecondarySort {
	//自定义封装类型,封装二次排序的第一个字段和第二个字段
	//自定义排序规则:第一个字段不同按照第一个字段排序,第一个字段相同按照第二个排序
	
	public static class TwoFields implements WritableComparable<TwoFields>{
		private String firstFied;
		private int secondField;
		
		public String getFirstFied() {
			return firstFied;
		}
		public void setFirstFied(String firstFied) {
			this.firstFied = firstFied;
		}
		public int getSecondField() {
			return secondField;
		}
		public void setSecondField(int secondField) {
			this.secondField = secondField;
		}
		
		//序列化
		public void write(DataOutput out) throws IOException {
			out.writeUTF(firstFied);
			out.writeInt(secondField);
		}
		//反序列化
		public void readFields(DataInput in) throws IOException {
			this.firstFied = in.readUTF();
			this.secondField = in.readInt();
		}
		//比较方法:先比较第一个字段,第一个字段相同的再用第二个字段的比较结果
		public int compareTo(TwoFields o) {
			if (this.firstFied.equals(o.firstFied)) {
				
				return this.secondField - o.secondField;
			}else {
				return this.firstFied.compareTo(o.firstFied);
			}		
		}	
	}
	
	// 自定义分区,用来将第一个字段相同的key值分区到同一个reducer节点上
	public static class TwoFieldPartitoner extends Partitioner<TwoFields, NullWritable>{
		
		// 返回值是一个int数字,这个数字是reducer的标号
		@Override
		public int getPartition(TwoFields key, NullWritable value, int numPartitions) {
			int reducerNo = (key.firstFied.hashCode()&Integer.MAX_VALUE) % numPartitions;
			return reducerNo;
		}		
	}
	
	//定义分组比较器,让不同key值的第一个字段相同的kv 调用同一个 reducer方法
	public static class GroupToReducerComparetor extends WritableComparator {
		//构造方法里面要向父类传递比较器要比较的数据类型
		public GroupToReducerComparetor() {
			//1.比较器比较的类型参数
			//2.是否实例化对象
			super(TwoFields.class,true);
		}
		
		//重写compare方法自定义排序规则(根据第一字段分组)
		@Override
		public int compare(WritableComparable a, WritableComparable b) {
			TwoFields ca = (TwoFields) a;
			TwoFields cb = (TwoFields) b;
			
			return ca.getFirstFied().compareTo(cb.getFirstFied());
		}			
	}
	
	//定义map
	public static class SecondarySortMap extends Mapper<Text, Text, TwoFields, NullWritable> {

		private final NullWritable oValue = NullWritable.get();
		
		@Override
		protected void map(Text key, Text value, Mapper<Text, Text, TwoFields, NullWritable>.Context context)
				throws IOException, InterruptedException {
			
			//将两个字段中的数据封装到一个twoFields对象中
			TwoFields twoFields = new TwoFields();
			twoFields.setFirstFied(key.toString());
			twoFields.setSecondField(Integer.valueOf(value.toString()));
			context.write(twoFields, oValue);
		}
	}
	
	//定义reducer
	public static class SecondarySortReducer extends Reducer<TwoFields, NullWritable, Text, Text> {

		private Text oKey = new Text();
		private Text oValue = new Text();
		
		@Override
		protected void reduce(TwoFields key, Iterable<NullWritable> values,
				Reducer<TwoFields, NullWritable, Text, Text>.Context context) throws IOException, InterruptedException {
			//输出一组内容
			for (NullWritable value : values) {
				oKey.set(key.firstFied);
				oValue.set(String.valueOf(key.secondField));
				context.write(oKey, oValue);
			}
			//一组内容输出完,添加一条分割线
			oKey.set("-------------");
			oValue.set("");
			context.write(oKey, oValue);
		}
	}
	

	
	//设置并启动job
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration configuration = new Configuration();
		
		Job job = Job.getInstance(configuration);
		job.setJarByClass(SecondarySort.class);
		job.setJobName("二次排序");
		
		job.setMapperClass(SecondarySortMap.class);
		job.setReducerClass(SecondarySortReducer.class);
		
		job.setMapOutputKeyClass(TwoFields.class);
		job.setMapOutputValueClass(NullWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		Path inputPath = new Path("/bd17/secondaryorder");
		Path outputDir = new Path("/bd17/output/secondaryorder1");
		outputDir.getFileSystem(configuration).delete(outputDir,true);
		FileInputFormat.addInputPath(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputDir);
		
		// 把文件内容以kv的形式读取出来发送给map
		job.setInputFormatClass(KeyValueTextInputFormat.class);
		
		//设置partition
		job.setPartitionerClass(TwoFieldPartitoner.class);
		
		//设置分组比较器
		job.setGroupingComparatorClass(GroupToReducerComparetor.class);
		
		System.exit(job.waitForCompletion(true)?0:1);
	}
}





源数据:




处理结果:



























  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值