Hadoop系列-MapReduce自定义排序(十三)

Github代码下载地址:

1,JAVA工程代码


测试数据:



实现代码如下

package com.hadoop.minbo.mapreduce.sort;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

/**
 * mapreduce之自定义排序算法
 */
public class SortTest {

	static class MyMapper extends Mapper<LongWritable, Text, KeySort, LongWritable> {
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String[] split = value.toString().split(" ");
			KeySort kSort = new KeySort(Long.parseLong(split[0]), Long.parseLong(split[1]));
			context.write(kSort, new LongWritable(Long.parseLong(split[1])));
		}
	}

	static class MyReduce extends Reducer<KeySort, LongWritable, LongWritable, LongWritable> {
		@Override
		protected void reduce(KeySort kSort, Iterable<LongWritable> values, Context context)
				throws IOException, InterruptedException {
			context.write(new LongWritable(kSort.key), new LongWritable(kSort.value));
		}
	}

	public static String path1 = "input2";
	public static String path2 = "output2";

	public static void main(String[] args) throws Exception {
		// Window下运行设置
		System.setProperty("hadoop.home.dir", "F:\\hadoop\\hadoop-2.7.3"); // 设置hadoop安装路径
		System.setProperty("HADOOP_USER_NAME", "hadoop"); // 用户名

		Configuration conf = new Configuration();
		FileSystem fileSystem = FileSystem.get(conf);
		if (fileSystem.exists(new Path(path2))) {
			fileSystem.delete(new Path(path2), true);
		}

		// 新建一个job
		Job job = Job.getInstance(conf);

		// 设置jar包所在路径
		job.setJarByClass(SortTest.class);

		// 指定mapper和reducer类
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReduce.class);

		// 指定maptask的输出类型
		job.setMapOutputKeyClass(KeySort.class);
		job.setMapOutputValueClass(LongWritable.class);

		// 指定reducetask的输出类型
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(LongWritable.class);

		// 指定该mapreduce程序数据的输入输出路径
		Path inputPath = new Path(path1);
		Path outputPath = new Path(path2);
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);

		// 指定分区类
		job.setPartitionerClass(HashPartitioner.class);
		job.setNumReduceTasks(1);

		// 最后提交任务,给JobTracker执行
		job.waitForCompletion(true);

		// 查看运行结果:
		FSDataInputStream fr = fileSystem.open(new Path(path2 + "/part-r-00000"));
		IOUtils.copyBytes(fr, System.out, 2048, true);
	}
}

排序类:

package com.hadoop.minbo.mapreduce.sort;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;

public class KeySort implements WritableComparable<KeySort>{

	public long key;
	public long value;
	
	public KeySort() { }

	public KeySort(long key, long value) {
		this.key = key;
		this.value = value;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeLong(key);
		out.writeLong(value);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.key=in.readLong();
		this.value=in.readLong();
	}
	
//	/**
//	 * 当第一列不同时,升序;当第一列相同时,第二列升序
//	 */
//	@Override
//	public int compareTo(KeySort my) {
//		long temp=this.key-my.key;
//		if(temp!=0){
//			return (int) temp;
//		}
//		return (int) (this.value-my.value);
//	}
	
	/**
	 * 第一列:降序,当第一列相同时,第二列:升序
	 */
	@Override
	public int compareTo(KeySort my) {
		long temp=this.key-my.key;
		if(temp>0){
			temp=-1;
			return (int) temp;
		}else if(temp<0){
			temp=1;
			return (int) temp;
		}
		return (int) (this.value-my.value);
	}
}

基本用法已实现


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

贺佬湿

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值