LazyObject 及其子类分析

LazyObject

LazyObject 用一个字节数组和范围代表一个对象。可以代表任何对象。

public class LazyObject {

  protected byte[] bytes;
  protected int start;
  protected int length;
  
  protected LazyObject() {
    bytes = null;
    start = 0;
    length = 0;
  }
  
  protected LazyObject(byte[] bytes, int start, int length) {
    setAll(bytes, start, length);
  }

  protected void setAll(byte[] bytes, int start, int length) {
    this.bytes = bytes;
    this.start = start;
    this.length = length;
  }
  
}

LazyPrimitive

LazyPrimitive 是基本类型的通用 lazy 实现类,增加了 primitiveClass 字段。

public abstract class LazyPrimitive<T> extends LazyObject {

  Class<T> primitiveClass;
  
  protected LazyPrimitive(Class<T> primitiveClass) {
    this.primitiveClass = primitiveClass;
  }
  
  /**
   * Returns the actual primitive object represented by this LazyObject.
   */
  public abstract T getPrimitiveObject();
  
}
LazyByte,LazyLong, LazyInteger, LazyString, LazyDouble, LazyShort

PrimitiveByte 提供了 Byte 的 lazy 实现,仅当调用 getPrimitiveObject 时,才反序列化。LazyLong, LazyInteger, LazyString, LazyDouble, LazyShort 的实现也都类似。

public class LazyByte extends LazyPrimitive<Byte> {

  public LazyByte() {
    super(Byte.class);
  }
  
  @Override
  public Byte getPrimitiveObject() {
    if (bytes == null) return null;
    try {
      // Slower method: convert to String and then convert to Integer
      // return Byte.valueOf(LazyUtils.convertToString(bytes, start, length));
      return Byte.valueOf(parseByte(bytes, start, length));
    } catch (NumberFormatException e) {
      return null;
    }
  }

  
  /**
   * Parses the string argument as if it was a byte value and returns the
   * result. Throws NumberFormatException if the string does not represent a
   * single byte quantity.
   * 
   * @param bytes
   * @param start
   * @param length
   *            a UTF-8 encoded string representation of a single byte quantity.
   * @return byte the value represented by the argument
   * @throws NumberFormatException
   *             if the argument could not be parsed as a byte quantity.
   */
  public static byte parseByte(byte[] bytes, int start, int length) throws NumberFormatException {
    return parseByte(bytes, start, length, 10); 
  }
  
  /**
   * Parses the string argument as if it was a byte value and returns the
   * result. Throws NumberFormatException if the string does not represent a
   * single byte quantity. The second argument specifies the radix to use when
   * parsing the value.
   * 
   * @param bytes
   * @param start
   * @param length
   *            a UTF-8 encoded string representation of a single byte quantity.
   * @param radix
   *            the radix to use when parsing.
   * @return byte the value represented by the argument
   * @throws NumberFormatException
   *             if the argument could not be parsed as a byte quantity.
   */
  public static byte parseByte(byte[] bytes, int start, int length, int radix)
          throws NumberFormatException {
      int intValue = LazyInteger.parseInt(bytes, start, length, radix);
      byte result = (byte) intValue;
      if (result == intValue) {
          return result;
      }
      throw new NumberFormatException();
  }
  
}
LazyStruct
public class LazyStruct extends LazyObject {

  
  private static Log LOG = LogFactory.getLog(LazyStruct.class.getName());
  
  LazyObject[] fields;
  boolean[] fieldIsPrimitive;
  
  byte separator;
  Text nullSequence;
  boolean lastColumnTakesAll;
  
  boolean parsed;
  
  /**
   * Create a new LazyStruct Object.
   * @param fields     The field LazyObjects
   * @param separator  The separator for delimiting the fields in the byte[]
   * @param nullSequence  The sequence for null value
   * @param lastColumnTakesAll  whether the additional fields should be all put into the last column
   *                            in case the data contains more columns than the schema.  
   */
  public LazyStruct(LazyObject[] fields, byte separator,
      Text nullSequence, boolean lastColumnTakesAll) {
    this.fields = fields;
    this.separator = separator;
    this.nullSequence = nullSequence;
    this.lastColumnTakesAll = lastColumnTakesAll; 
      
    parsed = false;
    fieldIsPrimitive = new boolean[fields.length];
    for(int i=0; i<fields.length; i++) {
      fieldIsPrimitive[i] = (fields[i] instanceof LazyPrimitive);
    }
  }
  
  /**
   * Set the row data for this LazyStruct.
   */
  protected void setAll(byte[] bytes, int start, int length) {
    super.setAll(bytes, start, length);
    parsed = false;
  }
  
  
  boolean missingFieldWarned = false;
  boolean extraFieldWarned = false;
  /**
   * Parse the byte[] and fill each field.
   */
  private void parse() {
    
    int structByteEnd = start + length;
    int fieldId = 0;
    int fieldByteBegin = start;
    int fieldByteEnd = start;
    
    // Go through all bytes in the byte[]
    while (fieldByteEnd <= structByteEnd) {
      if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
        // end of field reached
        if (lastColumnTakesAll && fieldId == fields.length - 1) {
          fieldByteEnd = structByteEnd;
        }
        // Test the length first so in most cases we avoid doing a byte[] comparison.
        int fieldLength = fieldByteEnd - fieldByteBegin;
        if (fieldLength == nullSequence.getLength()
            && LazyUtils.compare(bytes, fieldByteBegin, fieldLength,
            nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) {
          fields[fieldId].setAll(null, 0, 0);
        } else {
          fields[fieldId].setAll(bytes, fieldByteBegin,
              fieldByteEnd - fieldByteBegin);
        }
        fieldId ++;
        if (fieldId == fields.length || fieldByteEnd == structByteEnd) {
          // all fields have been parsed, or all bytes have been parsed 
          break;
        }
        fieldByteBegin = fieldByteEnd + 1;
      }
      fieldByteEnd++;
    }
    
    // Extra bytes at the end?
    if (!extraFieldWarned && fieldByteEnd < structByteEnd) {
      extraFieldWarned = true;
      LOG.warn("Extra bytes detected at the end of the row! Ignoring similar problems.");
    }
    
    // Missing fields?
    if (!missingFieldWarned && fieldId < fields.length) {
      missingFieldWarned = true;
      LOG.warn("Missing fields! Expected " + fields.length + " fields but only got "
          + fieldId + "! Ignoring similar problems.");
    }
    
    // Fill all missing fields with nulls.
    for(; fieldId < fields.length; fieldId ++) {
      fields[fieldId].setAll(null, 0, 0);
    }
    
    parsed = true;
  }
  
  /**
   * Get one field out of the struct.
   * 
   * If the field is a primitive field, return the actual object.
   * Otherwise return the LazyObject.  This is because PrimitiveObjectInspector
   * does not have control over the object used by the user - the user simply
   * directly use the Object instead of going through 
   * Object PrimitiveObjectInspector.get(Object).  
   * 
   * @param i  the field ID
   * @return   the field as a LazyObject
   */
  public Object getField(int i) {
    if (!parsed) {
      parse();
    }
    if (!fieldIsPrimitive[i]) {
      return fields[i];
    } else {
      return ((LazyPrimitive)fields[i]).getPrimitiveObject();
    }
  }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值