hadoop中compare函数

在看hadoop  的二次排序的时候,改写了下, 加了第三个参数,  本来以为是在

 

   public int compareTo(IntPair o) {
      System.out.println("-----------compareTo");
      if (first != o.first) {
        return first < o.first ? -1 : 1;
      } else if (second != o.second) {
        return second < o.second ? -1 : 1;
      }else if (third != o.third) {
    	        return third < o.third ? -1 : 1;}
      
        return 0;
      }

 本来以为排序在这里面进行, 后来发现不是,把比较第3个字段的代码去掉, 发现还是有序的。

后来通过打印得知在compare函数中,稍微改写了下

      public int compare(byte[] b1, int s1, int l1,
                         byte[] b2, int s2, int l2) { 
       // 二进制数组读取
       int intvalue = readInt(b1, s1);    
       System.out.println("s1 = " +  b1.length);
       
    // 验证b1中存储的数据
int third = 0; for(int i =s1 + 9; i<= s1+ 12; i++){ third += (b1[i]&0xff) << (24-8*i); } System.out.println("third = " + third); return compareBytes(b1, s1, l1, b2, s2, l2); } }

 有3个整形值, s1为开始位置, l1为长度12, 这样我们就可以读出我们的值

return compareBytes(b1, s1, l1, b2, s2, l2);调用 return FastByteComparisons.compareTo(b1, s1, l1, b2, s2, l2);

    public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2)
      {
        if ((buffer1 == buffer2) && (offset1 == offset2) && (length1 == length2))
        {
          return 0;
        }

        int end1 = offset1 + length1;
        int end2 = offset2 + length2;
        int i = offset1; for (int j = offset2; (i < end1) && (j < end2); ++j) {
          int a = buffer1[i] & 0xFF;
          int b = buffer2[j] & 0xFF;
          if (a != b)
            return (a - b);
          ++i;
        }

        return (length1 - length2);
      }
    }
  }

 从代码中就知道了,通过字节数组比较三个值, 这样就出来的结果就是有序的了

结论, 理论上N个字段这样出来的结果的都是有序的,只是比较的长度有所变化

测试又加了一个字段, 输出结果都是有序的。

测试代码

  public static class IntPair 
                      implements WritableComparable<IntPair> {
    private int first = 0;
    private int second = 0;
    private int third = 0;
    private int fourth = 0;
    
    /**
     * Set the left and right values.
     */
    public void set(int left, int right, int third, int fourth) {
      first = left;
      second = right;
      this.third = third;
      this.fourth = fourth;
    }
    
    public int getFirst() {
      return first;
    }
    
    public int getSecond() {
      return second;
    }
    
    public int getThird() {
        return third;
      }
    
    public int getFourth() {
        return fourth;
      }
    
    @Override
	public String toString() {
    	System.out.println("third = " + third);
    	return first + "\t" + second + "\t" + third + "\t" + fourth;
	}

	/**
     * Read the two integers. 
     * Encoded as: MIN_VALUE -> 0, 0 -> -MIN_VALUE, MAX_VALUE-> -1
     */
    @Override
    public void readFields(DataInput in) throws IOException {
      first = in.readInt();// + Integer.MIN_VALUE;
      second = in.readInt();// + Integer.MIN_VALUE;
      third = in.readInt();// + Integer.MIN_VALUE;
      fourth = in.readInt();
    }
    @Override
    public void write(DataOutput out) throws IOException {
     /*
      out.writeInt(first - Integer.MIN_VALUE);
      out.writeInt(second - Integer.MIN_VALUE);
      out.writeInt(third - Integer.MIN_VALUE);
      */
        out.writeInt(first );
        out.writeInt(second );
        out.writeInt(third );
        out.writeInt(fourth);
    }
    @Override
    public int hashCode() {
      return first * 157 + second*10 + third;
    }
    
    @Override
    public boolean equals(Object right) {
      if (right instanceof IntPair) {
        IntPair r = (IntPair) right;
        return r.first == first && r.second == second && r.third == third && r.fourth == fourth;
      } else {
        return false;
      }
    }
    
    /** A Comparator that compares serialized IntPair. */ 
    public static class Comparator extends WritableComparator {
      public Comparator() {
        super(IntPair.class);
      }
      
      // 排序比较器,数据全部存在byte数组
      public int compare(byte[] b1, int s1, int l1,
                         byte[] b2, int s2, int l2) { 
       // 二进制数组读取
       int intvalue = readInt(b1, s1);	
       System.out.println("s1 = " +  b1.length);
       
       int third = 0;
       for(int i =s1 + 9; i<= s1+ 12; i++){
    	   third += (b1[i]&0xff) << (24-8*i);
    	}
       	System.out.println("third = " + third);
    	  
        return compareBytes(b1, s1, l1, b2, s2, l2);
      }
    }

    static {   // register this comparator
      WritableComparator.define(IntPair.class, new Comparator());
    }

    // 好像没用上    
    @Override
    public int compareTo(IntPair o) {
      System.out.println("-----------compareTo");
      if (first != o.first) {
        return first < o.first ? -1 : 1;
      } else if (second != o.second) {
        return second < o.second ? -1 : 1;
      }// else if (third != o.third) {
    	//        return third < o.third ? -1 : 1;}
      
        return 0;
      }
  }
  public static class StrPair 
                      implements WritableComparable<StrPair> {
    private Text first;
    private Text second ;
    private Text third ;
    private Text fourth;
    
    // 这句很重要, 要不读的时候会出错
    public StrPair(){
    	set(new Text(),new Text(),new Text(),new Text());
    }
    
    public void set(Text left, Text right, Text third, Text fourth) {
      this.first = left;
      this.second = right;
      this.third = third;
      this.fourth = fourth;
    }
    
    public Text getFirst() {
      return first;
    }
    
    public Text getSecond() {
      return second;
    }
    
    public Text getThird() {
        return third;
      }
    
    public Text getFourth() {
        return fourth;
      }
    
    @Override
	public String toString() {
    	return first + "\t" + second + "\t" + third + "\t" + fourth;
	}

    @Override
    public void readFields(DataInput in) throws IOException {
    	first.readFields(in);
    	second.readFields(in);
    	third.readFields(in);
    	fourth.readFields(in);
    }
    @Override
    public void write(DataOutput out) throws IOException {
    	System.out.println(out);
    	first.write(out);
    	second.write(out);
    	third.write(out);
    	fourth.write(out);
    	System.out.println("First = " + second.toString());
    }
    @Override
    public int hashCode() {
      return first.hashCode()* 157 + second.hashCode()*10 + third.hashCode();
    }
    
    @Override
    public boolean equals(Object right) {
      if (right instanceof StrPair) {
    	  StrPair r = (StrPair) right;
        return first.equals(r.first) && second.equals(r.second) && third.equals(r.third) && fourth.equals(r.fourth);
      } else {
        return false;
      }
    }
    
    /** A Comparator that compares serialized StrPair. */ 
    public static class Comparator extends WritableComparator {
      public Comparator() {
        super(StrPair.class);
      }
      
      // 排序比较器,数据全部存在byte数组
      public int compare(byte[] b1, int s1, int l1,
                         byte[] b2, int s2, int l2) { 
       // 二进制数组读取
       int intvalue = readInt(b1, s1);	
       System.out.println("s1 = " +  b1.length);
       /*
       int third = 0;
       for(int i =s1 + 9; i<= s1+ 12; i++){
    	   third += (b1[i]&0xff) << (24-8*i);
    	}
       	System.out.println("third = " + third);
    	
    	*/  
        return compareBytes(b1, s1, l1, b2, s2, l2);
      }
    }

    static {   // register this comparator
      WritableComparator.define(StrPair.class, new Comparator());
    }
   
    @Override
    public int compareTo(StrPair o) {/*
      if (first != o.first) {
        return first < o.first ? -1 : 1;
      } else if (second != o.second) {
        return second < o.second ? -1 : 1;
      }// else if (third != o.third) {
    	//        return third < o.third ? -1 : 1;}
      
        return 0;
        */
       return 0;
    }
  }

 
  /**
   * Partition based on the first part of the pair.
   */
  public static class FirstPartitioner extends Partitioner<StrPair,Text>{
    @Override    // 
    public int getPartition(StrPair key, Text value, 
                            int numPartitions) {
      return Math.abs(key.getFirst().hashCode() * 127) % numPartitions;
    }
  }

  /**
   * Compare only the first part of the pair, so that reduce is called once
   * for each value of the first part.
   */
  public static class FirstGroupingComparator 
                implements RawComparator<StrPair> {
    @Override
    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
      return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, 
                                             b2, s2, Integer.SIZE/8);
    }

    @Override
    public int compare(StrPair o1, StrPair o2) {
      Text l = o1.getFirst();
      Text r = o2.getFirst();
      return l.equals(r)?0:1;
     // return l == r ? 0 : (l < r ? -1 : 1);
    }
  }

 

转载于:https://www.cnblogs.com/chengxin1982/p/3853725.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Hadoop MapReduce编程,有一些常用的函数可以帮助我们进行数据处理,这些函数包括: 1. map函数:map函数是MapReduce任务的一个重要组成部分,它将输入数据的每个元素映射成一个键值对,并输出到Reduce任务进行处理。在map函数,可以进行各种数据处理操作,例如过滤、计算、转换等等。 2. reduce函数:reduce函数是MapReduce任务的另一个重要组成部分,它对Map任务输出的键值对进行合并和聚合,最终输出一个键值对列表。在reduce函数,可以进行各种聚合操作,例如求和、计数、取最大值等等。 3. partitioner函数:partitioner函数用于将Map任务输出的键值对分配到不同的Reduce任务进行处理。在partitioner函数,可以根据键的哈希值或其他特定的规则,将键值对分配到不同的Reduce任务。 4. combiner函数:combiner函数是在Map任务输出的键值对进行本地合并和聚合的函数,它可以减少数据传输量,提高MapReduce任务的效率。在combiner函数,可以进行各种本地聚合操作,例如求和、计数、取最大值等等。 5. inputformat函数:inputformat函数是用于读取输入数据的函数,它定义了输入数据的格式和读取方法。例如,可以使用TextInputFormat函数读取文本文件,使用SequenceFileInputFormat函数读取二进制文件等等。 6. outputformat函数:outputformat函数是用于输出结果数据的函数,它定义了输出数据的格式和输出方法。例如,可以使用TextOutputFormat函数将结果输出为文本文件,使用SequenceFileOutputFormat函数将结果输出为二进制文件等等。 这些函数Hadoop MapReduce编程常用的函数,掌握它们可以帮助我们更好地进行数据处理和分析。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值