Map/Reduce二次排序与分组分区用法

我对二次排序的定义就是先按照Key值排序,在Key相同的时候比较Value的值

数据来源于某程序网的测试数据集

输入

1,mr1,3234
2,mr2,123
3,mr3,9877
4,mr4,348
5,mr5,12345
6,mr6,6646
7,mr7,98
8,mr8,12345
1,mr1,334
2,mr2,3123
3,mr3,97
4,mr4,231
5,mr5,122
6,mr6,3455
7,mr7,1222
8,mr8,12345
4,mr4,123
输出

mr1	3234,334
mr2	3123,123
mr3	9877,97
mr4	348,231,123
mr5	12345,122
mr6	6646,3455
mr7	1222,98
mr8	12345,12345

解释一下输入数据的三个参数含义,第一个是id号,第二个是名字,第三个是花费金额,要输出这个人的名称所花费的金额,且这些金额按照从大到小排序。


代码:
import java.io.*;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.util.*;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.io.*;
import org.apache.hadoop.fs.*;
public class ThreeSort extends Configured implements Tool{

	enum Counter
	{
		LINKSKIP,
	}
	public static class textpair implements WritableComparable<textpair>
	{
		String name;
		int cost;
		public textpair(String name,int cost)
		{
			this.name=name;
			this.cost=cost;
		}
		public textpair(){}
		public void readFields(DataInput arg0) throws IOException {
			// TODO Auto-generated method stub
			name=arg0.readUTF();
			cost=arg0.readInt();
		}
		public void write(DataOutput arg0) throws IOException {
			// TODO Auto-generated method stub
			arg0.writeUTF(name);
			arg0.writeInt(cost);
		}
		public int compareTo(textpair o) {
			// TODO Auto-generated method stub
			String name1=o.name;
			int cost1=o.cost;
			if(!(name.equals(name1)))
				return name.compareTo(name1)<0?-1:1;
			else if(cost!=cost1)
				return cost>cost1?-1:1;
			return 0;
		}
		public String getname()
		{
			return this.name.toString();
		}
		public int getcost()
		{
			return this.cost;
		}
		@Override
		public int hashCode() {  
	        return this.cost;  
	    }
		@Override
		public boolean equals(Object o)
		{
			if(o==null)
				return false;
			if(this==o)
				return true;
			if(o instanceof textpair)
			{
				textpair o1=(textpair)o;
				return o1.name.equals(this.name)&&o1.cost==this.cost;
			}
			return false;
		}
	}
	public static class MyPartitioner extends Partitioner<textpair,IntWritable>
	{

		@Override
		public int getPartition(textpair key, IntWritable value, int numPartitions) {
			// TODO Auto-generated method stub
			return Math.abs((key.getcost()*127)%numPartitions);
		}
		
	}
	public static class GroupingComparator extends WritableComparator
	{
		public GroupingComparator()
		{
			super(textpair.class,true);
		}
		@Override
		public int compare(WritableComparable a,WritableComparable b)
		{
			textpair a1=(textpair)a;
			textpair b1=(textpair)b;
			return a1.getname().compareTo(b1.getname());
		}
	}
	public static class map extends Mapper<Object,Text,textpair,IntWritable>
	{
		private IntWritable cost=new IntWritable();
		@Override
		public void map(Object key,Text value,Context context)throws IOException,InterruptedException
		{
			String line=value.toString();
			try
			{
				String[] linesplit=line.split(",");
				String name=linesplit[1];
				int id=Integer.parseInt(linesplit[0]);
				cost.set(Integer.valueOf(linesplit[2]));
				textpair p1=new textpair(name,Integer.valueOf(linesplit[2]));
				context.write(p1, cost);
			}
			catch(java.lang.ArrayIndexOutOfBoundsException e)
			{
				context.getCounter(Counter.LINKSKIP).increment(1);
				return ;
			}
		}
	}
	public static class reduce extends Reducer<textpair,IntWritable,Text,Text>
	{
		public Text okey=new Text();
		public Text ovalue=new Text();
		@Override
		public void reduce(textpair key,Iterable<IntWritable> values,Context context)throws IOException,InterruptedException
		{
			String str1="";
			for(IntWritable value:values)
			{
				str1+=key.getcost()+",";
			}
			String str2=str1.substring(0,str1.length()-1);
			okey.set(key.getname());
			ovalue.set(str2);
			context.write(okey, ovalue);
		}
	}
	public int run(String[] args)throws Exception
	{
		Configuration conf=getConf();
		Job job=new Job(conf,"ThreeSort");
		job.setJarByClass(ThreeSort.class);
		job.setMapperClass(map.class);
		job.setMapOutputKeyClass(textpair.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setPartitionerClass(MyPartitioner.class);
		job.setGroupingComparatorClass(GroupingComparator.class); 
		job.setReducerClass(reduce.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		/*Path outputDir=new Path(args[1]);
		FileSystem fs=FileSystem.get(conf);
		if(fs.exists(outputDir))
			fs.delete(outputDir,true);*/
		FileOutputFormat.setOutputPath(job,new Path(args[1]));
		job.waitForCompletion(true);
		return job.isSuccessful()?0:1;
	}
	public static void main(String[] args)throws Exception
	{
		int res=ToolRunner.run(new Configuration(),new ThreeSort(), args);
		System.exit(res);
	}
}






  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值