Map/Reduce进行排序与逆排序

最新推荐文章于 2024-04-17 16:49:54 发布

hzn11

最新推荐文章于 2024-04-17 16:49:54 发布

阅读量704

点赞数

分类专栏： hadoop权威指南例子文章标签： hadoop mapreduce 排序逆排序

本文链接：https://blog.csdn.net/qq_21003657/article/details/58137552

版权

hadoop权威指南例子专栏收录该内容

6 篇文章 0 订阅

订阅专栏

由于MR中的Reudce中默认是按Key的字典序由小到大排序的，顺序排序代码

输入数据

输出数据

为了可以输出重复的数，我们这里在map输出中产生了一个随机数，也可以用1，只要能够统计reduce中values迭代器中有多少个数就行

import java.io.*;
import java.util.Random;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.io.*;

public class sort extends Configured implements Tool{
	enum Counter
	{
		LineSkip,
	}
	public static class map extends Mapper<Object,Text,IntWritable,IntWritable>
	{
		private static Text line=new Text();
		@Override
		public void map(Object key,Text value,Context context)throws IOException,InterruptedException
		{
			line=value;
			IntWritable i=new IntWritable(Integer.parseInt(line.toString()));
			context.write(i, new IntWritable(new Random().nextInt()));
		}
	}
	public static class reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>
	{
		private static Text line=new Text();
		@Override
		public void reduce(IntWritable key,Iterable<IntWritable>value,Context context)throws IOException,InterruptedException
		{
			
			while(value.iterator().hasNext())
			{
				context.write(key, null);
				value.iterator().next();
			}
		
		}
	}
	public int run(String[] args)throws Exception
	{
		Configuration conf=getConf();
		Job job=new Job(conf,"sort");
		job.setJarByClass(sort.class);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		job.setMapperClass(map.class);
		job.setReducerClass(reduce.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(IntWritable.class);
		job.waitForCompletion(true);
		return job.isSuccessful()?0:1;
	}
	public static void main(String[] args)throws Exception
	{
		int res=ToolRunner.run(new Configuration(),new sort(),args);
		System.exit(res);
	}
}

为了输出

我们这里采用了比较器,Job提供了setSortComparrtorClass接口，可以修改Reduce中对Key的默认排序

import java.io.*;
import java.util.Random;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.io.*;

public class reversesort extends Configured implements Tool{
	enum Counter
	{
		LineSkip,
	}
	public static class map extends Mapper<Object,Text,IntWritable,IntWritable>
	{
		private static Text line=new Text();
		@Override
		public void map(Object key,Text value,Context context)throws IOException,InterruptedException
		{
			line=value;
			IntWritable i=new IntWritable(Integer.parseInt(line.toString()));
			context.write(i, new IntWritable(new Random().nextInt()));
		}
	}
	public static class reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>
	{
		private static Text line=new Text();
		@Override
		public void reduce(IntWritable key,Iterable<IntWritable>value,Context context)throws IOException,InterruptedException
		{
			
			while(value.iterator().hasNext())
			{
				context.write(key, null);
				value.iterator().next();
			}
		
		}
	}
	public static class intwritablecompare extends IntWritable.Comparator
	{
		@Override
		public int compare(byte[] b1,int s1,int e1,byte[] b2,int s2,int e2)
		{
			return super.compare(b2, s2,e2,b1,s1,e1);
		}
	}
	public int run(String[] args)throws Exception
	{
		Configuration conf=getConf();
		Job job=new Job(conf,"sort");
		job.setJarByClass(reversesort.class);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		job.setMapperClass(map.class);
		job.setReducerClass(reduce.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(IntWritable.class);
		job.setSortComparatorClass(intwritablecompare.class);
		job.waitForCompletion(true);
		return job.isSuccessful()?0:1;
	}
	public static void main(String[] args)throws Exception
	{
		int res=ToolRunner.run(new Configuration(),new reversesort(),args);
		System.exit(res);
	}
}