Map/Reduce进行排序与逆排序

由于MR中的Reudce中默认是按Key的字典序由小到大排序的,顺序排序代码

输入数据

23112
321
2
213
21
4
21
421
5
12
2313
25
21
21
5
25
215
2
5


输出数据

2
2
4
5
5
5
12
21
21
21
21
25
25
213
215
321
421
2313
23112


为了可以输出重复的数,我们这里在map输出中产生了一个随机数,也可以用1,只要能够统计reduce中values迭代器中有多少个数就行

import java.io.*;
import java.util.Random;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.io.*;

public class sort extends Configured implements Tool{
	enum Counter
	{
		LineSkip,
	}
	public static class map extends Mapper<Object,Text,IntWritable,IntWritable>
	{
		private static Text line=new Text();
		@Override
		public void map(Object key,Text value,Context context)throws IOException,InterruptedException
		{
			line=value;
			IntWritable i=new IntWritable(Integer.parseInt(line.toString()));
			context.write(i, new IntWritable(new Random().nextInt()));
		}
	}
	public static class reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>
	{
		private static Text line=new Text();
		@Override
		public void reduce(IntWritable key,Iterable<IntWritable>value,Context context)throws IOException,InterruptedException
		{
			
			while(value.iterator().hasNext())
			{
				context.write(key, null);
				value.iterator().next();
			}
		
		}
	}
	public int run(String[] args)throws Exception
	{
		Configuration conf=getConf();
		Job job=new Job(conf,"sort");
		job.setJarByClass(sort.class);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		job.setMapperClass(map.class);
		job.setReducerClass(reduce.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(IntWritable.class);
		job.waitForCompletion(true);
		return job.isSuccessful()?0:1;
	}
	public static void main(String[] args)throws Exception
	{
		int res=ToolRunner.run(new Configuration(),new sort(),args);
		System.exit(res);
	}
}

为了输出

23112
2313
421
321
215
213
25
25
21
21
21
21
12
5
5
5
4
2
2
我们这里采用了比较器,Job提供了setSortComparrtorClass接口,可以修改Reduce中对Key的默认排序

import java.io.*;
import java.util.Random;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.io.*;

public class reversesort extends Configured implements Tool{
	enum Counter
	{
		LineSkip,
	}
	public static class map extends Mapper<Object,Text,IntWritable,IntWritable>
	{
		private static Text line=new Text();
		@Override
		public void map(Object key,Text value,Context context)throws IOException,InterruptedException
		{
			line=value;
			IntWritable i=new IntWritable(Integer.parseInt(line.toString()));
			context.write(i, new IntWritable(new Random().nextInt()));
		}
	}
	public static class reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>
	{
		private static Text line=new Text();
		@Override
		public void reduce(IntWritable key,Iterable<IntWritable>value,Context context)throws IOException,InterruptedException
		{
			
			while(value.iterator().hasNext())
			{
				context.write(key, null);
				value.iterator().next();
			}
		
		}
	}
	public static class intwritablecompare extends IntWritable.Comparator
	{
		@Override
		public int compare(byte[] b1,int s1,int e1,byte[] b2,int s2,int e2)
		{
			return super.compare(b2, s2,e2,b1,s1,e1);
		}
	}
	public int run(String[] args)throws Exception
	{
		Configuration conf=getConf();
		Job job=new Job(conf,"sort");
		job.setJarByClass(reversesort.class);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		job.setMapperClass(map.class);
		job.setReducerClass(reduce.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(IntWritable.class);
		job.setSortComparatorClass(intwritablecompare.class);
		job.waitForCompletion(true);
		return job.isSuccessful()?0:1;
	}
	public static void main(String[] args)throws Exception
	{
		int res=ToolRunner.run(new Configuration(),new reversesort(),args);
		System.exit(res);
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值