SecondarySort二次排序代码

最新推荐文章于 2021-04-30 00:25:43 发布

流云晨风

最新推荐文章于 2021-04-30 00:25:43 发布

阅读量636

点赞数

分类专栏： hadoop 文章标签： MapReduce

本文链接：https://blog.csdn.net/MA1147773433/article/details/78255602

版权

hadoop 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

package com.zhiyou.bd17.mr1014;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


//二次排序
public class SecondarySort {
	//自定义封装类型，封装二次排序的第一个字段和第二个字段
	//自定义排序规则：第一个字段不同按照第一个字段排序，第一个字段相同按照第二个排序
	
	public static class TwoFields implements WritableComparable<TwoFields>{
		private String firstFied;
		private int secondField;
		
		public String getFirstFied() {
			return firstFied;
		}
		public void setFirstFied(String firstFied) {
			this.firstFied = firstFied;
		}
		public int getSecondField() {
			return secondField;
		}
		public void setSecondField(int secondField) {
			this.secondField = secondField;
		}
		
		//序列化
		public void write(DataOutput out) throws IOException {
			out.writeUTF(firstFied);
			out.writeInt(secondField);
		}
		//反序列化
		public void readFields(DataInput in) throws IOException {
			this.firstFied = in.readUTF();
			this.secondField = in.readInt();
		}
		//比较方法:先比较第一个字段，第一个字段相同的再用第二个字段的比较结果
		public int compareTo(TwoFields o) {
			if (this.firstFied.equals(o.firstFied)) {
				
				return this.secondField - o.secondField;
			}else {
				return this.firstFied.compareTo(o.firstFied);
			}		
		}	
	}
	
	// 自定义分区，用来将第一个字段相同的key值分区到同一个reducer节点上
	public static class TwoFieldPartitoner extends Partitioner<TwoFields, NullWritable>{
		
		// 返回值是一个int数字，这个数字是reducer的标号
		@Override
		public int getPartition(TwoFields key, NullWritable value, int numPartitions) {
			int reducerNo = (key.firstFied.hashCode()&Integer.MAX_VALUE) % numPartitions;
			return reducerNo;
		}		
	}
	
	//定义分组比较器，让不同key值的第一个字段相同的kv 调用同一个 reducer方法
	public static class GroupToReducerComparetor extends WritableComparator {
		//构造方法里面要向父类传递比较器要比较的数据类型
		public GroupToReducerComparetor() {
			//1.比较器比较的类型参数
			//2.是否实例化对象
			super(TwoFields.class,true);
		}
		
		//重写compare方法自定义排序规则（根据第一字段分组）
		@Override
		public int compare(WritableComparable a, WritableComparable b) {
			TwoFields ca = (TwoFields) a;
			TwoFields cb = (TwoFields) b;
			
			return ca.getFirstFied().compareTo(cb.getFirstFied());
		}			
	}
	
	//定义map
	public static class SecondarySortMap extends Mapper<Text, Text, TwoFields, NullWritable> {

		private final NullWritable oValue = NullWritable.get();
		
		@Override
		protected void map(Text key, Text value, Mapper<Text, Text, TwoFields, NullWritable>.Context context)
				throws IOException, InterruptedException {
			
			//将两个字段中的数据封装到一个twoFields对象中
			TwoFields twoFields = new TwoFields();
			twoFields.setFirstFied(key.toString());
			twoFields.setSecondField(Integer.valueOf(value.toString()));
			context.write(twoFields, oValue);
		}
	}
	
	//定义reducer
	public static class SecondarySortReducer extends Reducer<TwoFields, NullWritable, Text, Text> {

		private Text oKey = new Text();
		private Text oValue = new Text();
		
		@Override
		protected void reduce(TwoFields key, Iterable<NullWritable> values,
				Reducer<TwoFields, NullWritable, Text, Text>.Context context) throws IOException, InterruptedException {
			//输出一组内容
			for (NullWritable value : values) {
				oKey.set(key.firstFied);
				oValue.set(String.valueOf(key.secondField));
				context.write(oKey, oValue);
			}
			//一组内容输出完，添加一条分割线
			oKey.set("-------------");
			oValue.set("");
			context.write(oKey, oValue);
		}
	}
	

	
	//设置并启动job
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration configuration = new Configuration();
		
		Job job = Job.getInstance(configuration);
		job.setJarByClass(SecondarySort.class);
		job.setJobName("二次排序");
		
		job.setMapperClass(SecondarySortMap.class);
		job.setReducerClass(SecondarySortReducer.class);
		
		job.setMapOutputKeyClass(TwoFields.class);
		job.setMapOutputValueClass(NullWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		Path inputPath = new Path("/bd17/secondaryorder");
		Path outputDir = new Path("/bd17/output/secondaryorder1");
		outputDir.getFileSystem(configuration).delete(outputDir,true);
		FileInputFormat.addInputPath(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputDir);
		
		// 把文件内容以kv的形式读取出来发送给map
		job.setInputFormatClass(KeyValueTextInputFormat.class);
		
		//设置partition
		job.setPartitionerClass(TwoFieldPartitoner.class);
		
		//设置分组比较器
		job.setGroupingComparatorClass(GroupToReducerComparetor.class);
		
		System.exit(job.waitForCompletion(true)?0:1);
	}
}

源数据：

处理结果：

流云晨风

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
SecondarySort二次排序代码

package com.zhiyou.bd17.mr1014;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.P
复制链接

扫一扫