mapreduce分区排序实例

实例内容:按照年份进行分区,按照分数进行降序处理

package mapreduce_3xia.fourth;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import mapreduce_3xia.TopK.MRDPUtils;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class Subarea {
	
	public static class KeyPair implements WritableComparable<KeyPair>{
	    //年份
	    private int year;
	    //score
	    private int score;

	    public void setYear(int year) {
	        this.year = year;
	    }

	    public void setScore(int score) {
	        this.score = score;
	    }

	    public int getYear() {
	        return year;
	    }

	    public int getScore() {
	        return score;
	    }
	    
	    public KeyPair() {
	    	
	    }
	    
	    public KeyPair(int year, int score) {
	    	this.year = year;
	    	this.score = score;
	    }
	    @Override
	    public int compareTo(KeyPair o) {
	        //传过来的对象和当前的year比较 相等为0 不相等为1
	        int result=Integer.compare(year,o.getYear());
	        if(result != 0){
	            //两个year不相等
	            return 0;
	        }
	        //如果年份相等 比较score
	        return Integer.compare(score,o.getScore());
	    }

	    @Override
	    //序列化
	    public void write(DataOutput dataOutput) throws IOException {
	       dataOutput.writeInt(year);
	       dataOutput.writeInt(score);
	    }

	    @Override
	    //反序列化
	    public void readFields(DataInput dataInput) throws IOException {
	        this.year=dataInput.readInt();
	        this.score=dataInput.readInt();
	    }

	    @Override
	    public String toString() {
	        return year+"\t"+score;
	    }

	    @Override
	    public int hashCode() {
	        return new Integer(year+score).hashCode();
	    }
	}
	
	
	public static class MRDPUtils {
		public static final String[] REDIS_INSTANCES = { "p0", "p1", "p2", "p3", "p4", "p6" };

		// This helper function parses the stackoverflow into a Map for us.
		public static Map<String, String> transformXmlToMap(String xml) {
			Map<String, String> map = new HashMap<String, String>();
			try {
				String[] tokens = xml.trim().substring(5, xml.trim().length() - 3).split("\"");

				for (int i = 0; i < tokens.length - 1; i += 2) {
					String key = tokens[i].trim();
					String val = tokens[i + 1];

					map.put(key.substring(0, key.length() - 1), val);
				}
			} catch (StringIndexOutOfBoundsException e) {
				System.err.println(xml);
			}
			return map;
		}
	}
	
	public static class GroupTemp extends WritableComparator{

	    public GroupTemp() {
	        super(KeyPair.class,true);
	    }
	    @Override
	    public int compare(WritableComparable a, WritableComparable b) {
	        //年份相同返回的是0
	        KeyPair o1=(KeyPair)a;
	        KeyPair o2=(KeyPair)b;
	        return Integer.compare(o1.getYear(),o2.getYear());
	    }
	}
	
	public static class SortTemp extends WritableComparator{
	    public SortTemp() {
	        super(KeyPair.class,true);
	    }
	    //自定义排序
	    @Override
	    public int compare(WritableComparable a, WritableComparable b) {
	        //按照年份升序排序 按照score降序排序
	        KeyPair o1=(KeyPair)a;
	        KeyPair o2=(KeyPair)b;
	        int result=Integer.compare(o1.getYear(),o2.getYear());
	        //比较年份 如果年份不相等
	        if(result != 0){
	            return result;
	        }
	        //两个年份相等 对score进行降序排序,注意 - 号
	        return -Integer.compare(o1.getScore(),o2.getScore());
	    }
	}
	
	
	//自定义分区
	//每一个年份生成一个reduce任务
	public static class FirstPartition extends Partitioner<KeyPair,Text>{
	    @Override
	    public int getPartition(KeyPair key, Text value, int num) {
	        //按照年份进行分区 年份相同,返回的是同一个值
	        return (key.getYear()*127)%num;
	    }
	}
	
	
	
    //字符串转日期format
    //public static SimpleDateFormat SDF=new SimpleDateFormat("yyyy-MM-ddTHH:mm:ss.730");
    /**
     * Mapper
     * 输出的Key是自定义的KeyPair
     */
    static class TempMapper extends Mapper<LongWritable,Text,KeyPair,Text>{
    	protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
        	
        	Map<String, String> parsed = MRDPUtils.transformXmlToMap(value.toString());
        	String strDate = parsed.get("CreationDate");
        	String strScore = parsed.get("Score");
        	String strId = parsed.get("Id");
        	int year = Integer.parseInt(strDate.substring(0,4));
        	int score = Integer.parseInt(strScore);
        	KeyPair kp = new KeyPair(year, score);
        	String strResult = "Date:" + strDate + "  Id:" + strId + "  Score:" + strScore;
        	Text result = new Text(strResult);
        	context.write(kp, result);
        }
    }
    /**
     *  Reduce 区域
     *  Map的输出是Reduce的输出
     */
    static class TempReducer extends Reducer<KeyPair,Text,KeyPair,Text> {
        @Override
        protected void reduce(KeyPair kp, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            for (Text value:values){
                context.write(kp,value);
            }
        }
    }

    //client
    public static void main(String args[]) throws IOException, InterruptedException{
        //获取配置
        Configuration conf=new Configuration();

        //修改命令行的配置
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length != 2) {
            System.err.println("Usage: temp <in> <out>");
            System.exit(2);
        }
        //创建Job
        Job job=new Job(conf,"temp");
        //1.设置job运行的类
        job.setJarByClass(Subarea.class);
        //2.设置map和reduce的类
        job.setMapperClass(Subarea.TempMapper.class);
        job.setReducerClass(Subarea.TempReducer.class);
        //3.设置map的输出的key和value 的类型
        job.setMapOutputKeyClass(KeyPair.class);
        job.setMapOutputValueClass(Text.class);
        //4.设置输入文件的目录和输出文件的目录
        FileInputFormat.addInputPath(job,new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
        //5.设置Reduce task的数量 每个年份对应一个reduce task
        job.setNumReduceTasks(15);//15个年份
        //5.设置partition sort Group的class
        job.setPartitionerClass(FirstPartition.class);
        job.setSortComparatorClass(SortTemp.class);
        job.setGroupingComparatorClass(GroupTemp.class);
        //6.提交job 等待运行结束并在客户端显示运行信息
        boolean isSuccess= false;
        try {
            isSuccess = job.waitForCompletion(true);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
        //7.结束程序
        System.exit(isSuccess ?0:1);
    }
}

导出jar包后,输入命令 :

hadoop jar jarname.jar     /datapath/data.xml     /out_name 运行。

 

data.xml:

<row Id="1" PostId="35314" Score="31" Text="not sure why this is getting do is correct! Double !" CreationDate="2018-09-06T09:07:10.730" UserId="1" />
<row Id="1" PostId="35315" Score="32" Text="not sure why this is getting downvoted -- it is correct! Double check itm!" CreationDate="2007-09-06T02:05:33.730" UserId="1" />
<row Id="1" PostId="35316" Score="33" Text="not get -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:07:10.730" UserId="1" />
<row Id="1" PostId="35317" Score="34" Text="not sure why this is getting downvoted -- it is cobelieve him!" CreationDate="2008-08-06T04:07:26.730" UserId="1" />
<row Id="2" PostId="35318" Score="35" Text="not sure why this ist! Double check it in your compiler if you don't believe him!" CreationDate="2008-05-06T08:11:10.730" UserId="1" />
<row Id="2" PostId="35319" Score="36" Text="not sure why tf you don'jdslfjdkfj kjf jkdjkfjd ksjfk t believe him!" CreationDate="2008-09-06T01:12:10.730" UserId="1" />
<row Id="2" PostId="35320" Score="37" Text="not sure why this is get! Double check it in your compiler if you don't believe him!" CreationDate="2008-06-06T08:03:10.730" UserId="1" />
<row Id="2" PostId="35321" Score="38" Text="not sure why this is gcorrect! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:07:10.880" UserId="1" />
<row Id="2" PostId="35322" Score="39" Text="not sure why ng downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2016-09-06T08:07:39.730" UserId="1" />
<row Id="2" PostId="35323" Score="40" Text="not sure wheve him!" CreationDate="2008-03-06T03:07:10.730" UserId="1" />
<row Id="3" PostId="35324" Score="41" Text="not sure why this is getting downvoted -- it is correct! Double check ie him!" CreationDate="2007-09-06T09:00:22.730" UserId="1" />
<row Id="3" PostId="35325" Score="42" Text="not sure why this is gettinyour compiler if you don't believe him!" CreationDate="2008-09-06T02:07:10.730" UserId="2" />
<row Id="3" PostId="35326" Score="42" Text="not sure why this is getting downvoted -- it is correct! Double check im!" CreationDate="2012-09-06T04:07:10.730" UserId="2" />
<row Id="3" PostId="35327" Score="43" Text="not sure why thble check it in your compiler if you don't believe him!" CreationDate="2008-07-06T01:05:10.730" UserId="2" />
<row Id="3" PostId="35314" Score="44" Text="is correct! Double check it in your compiler if you don't believe him!" CreationDate="2003-09-06T03:12:10.730" UserId="2" />
<row Id="3" PostId="35328" Score="45" Text="not surwnvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T02:07:11.730" UserId="2" />
<row Id="3" PostId="35329" Score="46" Text="not sure why this is gettiouble check it in your compiler if you don't believe him!" CreationDate="2010-08-06T03:08:10.730" UserId="2" />
<row Id="3" PostId="35330" Score="46" Text="not sure why this orrect! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T01:07:10.730" UserId="2" />
<row Id="1" PostId="35331" Score="47" Text="is getting downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:07:10.730" UserId="2" />
<row Id="1" PostId="35332" Score="48" Text="not sure why this is getrect! Double check it in your compiler if you don't believe him!" CreationDate="2014-09-06T09:07:45.730" UserId="2" />
<row Id="1" PostId="35333" Score="49" Text="not sure why this is getct! Double check it in your compiler if you don't believe him!" CreationDate="2008-02-06T04:07:10.730" UserId="3" />
<row Id="1" PostId="35334" Score="50" Text="not sure why this is gettin Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T09:07:10.730" UserId="3" />
<row Id="4" PostId="35335" Score="51" Text="nong downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2015-01-06T08:07:10.730" UserId="3" />
<row Id="4" PostId="35336" Score="52" Text="notvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T01:07:19.730" UserId="3" />
<row Id="4" PostId="35337" Score="53" Text="not sure why tct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T02:07:10.730" UserId="3" />
<row Id="4" PostId="35338" Score="54" Text="ng downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T07:07:10.730" UserId="3" />
<row Id="4" PostId="35339" Score="55" Text="not sure why this is getting downf you don't believe him!" CreationDate="2017-08-06T08:07:19.730" UserId="4" />
<row Id="4" PostId="35340" Score="56" Text="not sure why this is getting  Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T07:04:10.730" UserId="4" />
<row Id="4" PostId="35341" Score="57" Text="not downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2011-09-06T06:07:10.730" UserId="4" />
<row Id="4" PostId="35342" Score="58" Text="not sure why this is getting downvoten your compiler if you don't believe him!" CreationDate="2019-09-06T02:07:10.730" UserId="4" />
<row Id="4" PostId="35343" Score="59" Text="not sure why this is getting doDouble check it in your compiler if you don't believe him!" CreationDate="2008-04-06T04:02:18.730" UserId="4" />
<row Id="6" PostId="35344" Score="60" Text="not sure why this is gettirect! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:07:10.730" UserId="5" />
<row Id="6" PostId="35345" Score="60" Text="not sure why this is gettinguble check it in your compiler if you don't believe him!" CreationDate="2006-04-06T08:07:10.730" UserId="5" />
<row Id="6" PostId="35346" Score="61" Text="downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:08:10.730" UserId="5" />
<row Id="6" PostId="35347" Score="62" Text="not sure why this is getting downvoted -- it is correct! Double check it in yourm!" CreationDate="2011-08-06T08:07:45.730" UserId="5" />
<row Id="6" PostId="35348" Score="63" Text="not sure why this is getting downvoted -compiler if you don't believe him!" CreationDate="2008-09-06T08:04:10.730" UserId="5" />
<row Id="6" PostId="35349" Score="64" Text="not sure - it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2005-05-06T08:07:10.730" UserId="6" />
<row Id="6" PostId="35350" Score="65" Text="whgetting downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:06:10.730" UserId="6" />
<row Id="6" PostId="35351" Score="66" Text="not sure! Double check it our compiler if you don't believe him!" CreationDate="2008-05-06T08:07:23.730" UserId="6" />
<row Id="6" PostId="35352" Score="66" Text="not sureted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2099-09-06T08:09:19.730" UserId="6" />

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值