ObjectInspector及其子类分析

ObjectInspector

ObjectInspector 帮助我们探查复杂对象的内部结构。一个 ObjectInspector 实例代表了一个具体的类型,和存储这种类型的数据在内存中的一个特定方法。
使用 ObjectInspectorFactory 创建 ObjectInspector 对象,因为可以可以保证相同的 ObjectInspector 仅有一个实例。

public interface ObjectInspector {

  public static enum Category {
    PRIMITIVE, LIST, MAP, STRUCT
  };

  /**
   * Returns the name of the data type that is inspected by this ObjectInspector.
   * This is used to display the type information to the user.
   * 
   * For primitive types, the type name is standardized.
   * For other types, the type name can be something like "list<int>", "map<int,string>",
   * java class names, or user-defined type names similar to typedef. 
   */
  public String getTypeName();
  
  /**
   * An ObjectInspector must inherit from one of the following interfaces
   * if getCategory() returns:
   * PRIMITIVE:  PrimitiveObjectInspector 
   * LIST:       ListObjectInspector 
   * MAP:        MapObjectInspector 
   * STRUCT:     StructObjectInspector 
   */
  public Category getCategory();

}

PrimitiveObjectInspector

public interface PrimitiveObjectInspector extends ObjectInspector{

  public Class<?> getPrimitiveClass();
}
StandardPrimitiveObjectInspector

StandardPrimitiveObjectInspector 提供了一种通用的处理各种 PrimitiveObjectInspector 的方法。他的构造方法的参数里需要 primitiveClass。通过 ObjectInspectorUtils,为各种 各种 PrimitiveObjectInspector 提供统一接口。

class StandardPrimitiveObjectInspector implements PrimitiveObjectInspector {

  Class<?> primitiveClass;
  
  /** Call ObjectInspectorFactory.getStandardPrimitiveObjectInspector instead.
   */
  protected StandardPrimitiveObjectInspector(Class<?> primitiveClass) {
    this.primitiveClass = primitiveClass;
  }

  public Class<?> getPrimitiveClass() {
    return primitiveClass;
  }

  public final Category getCategory() {
    return Category.PRIMITIVE;
  }

  public String getTypeName() {
    return ObjectInspectorUtils.getClassShortName(primitiveClass);
  }
}

ListObjectInspector

public interface ListObjectInspector extends ObjectInspector {

  // ** Methods that does not need a data object **
  public ObjectInspector getListElementObjectInspector();

  // ** Methods that need a data object **
  /** returns null for null list, out-of-the-range index.
   */
  public Object getListElement(Object data, int index);

  /** returns -1 for data = null.
   */
  public int getListLength(Object data);
  
  /** returns null for data = null.
   */
  public List<?> getList(Object data);

}
StandardListObjectInspector

StandardListObjectInspector 是 ListObjectInspector 的通用实现。数据可以存储为 List 或者 Array 对象。构建对象,需要提供元素的 ObjectInspector。

class StandardListObjectInspector implements ListObjectInspector {

  ObjectInspector listElementObjectInspector;
  
  /** Call ObjectInspectorFactory.getStandardListObjectInspector instead.
   */
  protected StandardListObjectInspector(ObjectInspector listElementObjectInspector) {
    this.listElementObjectInspector = listElementObjectInspector;
  }

  public final Category getCategory() {
    return Category.LIST;
  }

  // without data
  public ObjectInspector getListElementObjectInspector() {
    return listElementObjectInspector;
  }
  
  // with data
  public Object getListElement(Object data, int index) {
    List<?> list = getList(data);
    if (list == null || index < 0 || index >= list.size()) {
      return null;
    }
    return list.get(index);
  }
  
  public int getListLength(Object data) {
    List<?> list = getList(data);
    if (list == null) return -1;
    return list.size();
  }
  
  public List<?> getList(Object data) {
    if (data == null) return null;
    if (data.getClass().isArray()) {
      data = java.util.Arrays.asList((Object[])data);
    }
    List<?> list = (List<?>) data;
    return list;
  }

  public String getTypeName() {
    return org.apache.hadoop.hive.serde.Constants.LIST_TYPE_NAME 
        + "<" + listElementObjectInspector.getTypeName() + ">";
  }

}

MapObjectInspector

public interface MapObjectInspector extends ObjectInspector {

  // ** Methods that does not need a data object **
  // Map Type
  public ObjectInspector getMapKeyObjectInspector();

  public ObjectInspector getMapValueObjectInspector();

  // ** Methods that need a data object **
  // In this function, key has to be of the same structure as the Map expects.
  // Most cases key will be primitive type, so it's OK.
  // In rare cases that key is not primitive, the user is responsible for defining 
  // the hashCode() and equals() methods of the key class.
  public Object getMapValueElement(Object data, Object key);

  /** returns null for data = null.
   */
  public Map<?,?> getMap(Object data);

}
StandardMapObjectInspector

StandardMapObjectInspector 需要提供 key 和 value 的 ObjectInspector。

class StandardMapObjectInspector implements MapObjectInspector {

  ObjectInspector mapKeyObjectInspector;
  ObjectInspector mapValueObjectInspector;
  
  /** Call ObjectInspectorFactory.getStandardMapObjectInspector instead.
   */
  protected StandardMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
    this.mapKeyObjectInspector = mapKeyObjectInspector;
    this.mapValueObjectInspector = mapValueObjectInspector;
  }

  // without data
  public ObjectInspector getMapKeyObjectInspector() {
    return mapKeyObjectInspector;
  }
  public ObjectInspector getMapValueObjectInspector() {
    return mapValueObjectInspector;
  }

  // with data
  // TODO: Now we assume the key Object supports hashCode and equals functions.
  public Object getMapValueElement(Object data, Object key) {
    if (data == null || key == null) return null;
    Map<?,?> map = (Map<?,?>)data;
    return map.get(key);
  }
  int getMapSize(Object data) {
    if (data == null) return -1;
    Map<?,?> map = (Map<?,?>)data;
    return map.size();
  }
  public Map<?,?> getMap(Object data) {
    if (data == null) return null;
    Map<?,?> map = (Map<?,?>)data;
    return map;
  }

  public final Category getCategory() {
    return Category.MAP;
  }

  public String getTypeName() {
    return org.apache.hadoop.hive.serde.Constants.MAP_TYPE_NAME 
        + "<" + mapKeyObjectInspector.getTypeName() + "," 
        + mapValueObjectInspector.getTypeName() + ">";
  }
}

StructObjectInspector

public interface StructObjectInspector extends ObjectInspector {

  // ** Methods that does not need a data object **
  /** Returns all the fields. 
   */
  public List<? extends StructField> getAllStructFieldRefs();

  /** Look up a field.
   */
  public StructField getStructFieldRef(String fieldName);

  // ** Methods that need a data object **
  /** returns null for data = null.
   */
  public Object getStructFieldData(Object data, StructField fieldRef);

  /** returns null for data = null.
   */
  public List<Object> getStructFieldsDataAsList(Object data);
}
  • StructField
    StructField 提供字段名和该字段的 ObjectInspector。
/**
 * StructField is an empty interface.
 * 
 * Classes implementing this interface are considered to represent 
 * a field of a struct for this serde package.
 */
public interface StructField {

  /**
   * Get the name of the field.  The name should be always in lower-case.
   */
  String getFieldName();
  
  /**
   * Get the ObjectInspector for the field.
   */
  ObjectInspector getFieldObjectInspector();
  
}

StructObjectInspector 有四个子类,分别是 LazySimpleStructObjectInspector,

LazySimpleStructObjectInspector
public class LazySimpleStructObjectInspector implements StructObjectInspector {

  public static final Log LOG = LogFactory.getLog(LazySimpleStructObjectInspector.class.getName());
  
  protected static class MyField implements StructField {
    protected int fieldID;
    protected String fieldName;
    protected ObjectInspector fieldObjectInspector;
    
    public MyField(int fieldID, String fieldName, ObjectInspector fieldObjectInspector) {
      this.fieldID = fieldID;
      this.fieldName = fieldName.toLowerCase();
      this.fieldObjectInspector = fieldObjectInspector;
    }

    public int getFieldID() {
      return fieldID;
    }
    public String getFieldName() {
      return fieldName;
    }
    public ObjectInspector getFieldObjectInspector() {
      return fieldObjectInspector;
    }
    
    public String toString() {
      return "" + fieldID + ":" + fieldName;
    }
  }
  
  protected List<MyField> fields;
  
  public String getTypeName() {
    return ObjectInspectorUtils.getStandardStructTypeName(this);
  }
  
  /** Call ObjectInspectorFactory.getLazySimpleStructObjectInspector instead.
   */
  protected LazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
    init(structFieldNames, structFieldObjectInspectors);
  }
  protected void init(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
    assert(structFieldNames.size() == structFieldObjectInspectors.size());
    
    fields = new ArrayList<MyField>(structFieldNames.size()); 
    for(int i=0; i<structFieldNames.size(); i++) {
      fields.add(new MyField(i, structFieldNames.get(i), structFieldObjectInspectors.get(i)));
    }
  }
  
  protected LazySimpleStructObjectInspector(List<StructField> fields) {
    init(fields);
  }
  protected void init(List<StructField> fields) {
    this.fields = new ArrayList<MyField>(fields.size()); 
    for(int i=0; i<fields.size(); i++) {
      this.fields.add(new MyField(i, fields.get(i).getFieldName(), fields.get(i).getFieldObjectInspector()));
    }
  }

  
  public final Category getCategory() {
    return Category.STRUCT;
  }

  // Without Data
  public StructField getStructFieldRef(String fieldName) {
    return ObjectInspectorUtils.getStandardStructFieldRef(fieldName, fields);
  }
  public List<? extends StructField> getAllStructFieldRefs() {
    return fields;
  }

  // With Data
  @SuppressWarnings("unchecked")
  public Object getStructFieldData(Object data, StructField fieldRef) {
    if (data == null) {
      return null;
    }
    LazyStruct struct = (LazyStruct)data;
    MyField f = (MyField) fieldRef;
    
    int fieldID = f.getFieldID();
    assert(fieldID >= 0 && fieldID < fields.size());
    
    return struct.getField(fieldID);
  }

  @Override
  public List<Object> getStructFieldsDataAsList(Object data) {
    if (data == null) {
      return null;
    }
    List<Object> fieldsData = new ArrayList<Object>(fields.size());
    LazyStruct struct = (LazyStruct)data;
    for (int i=0; i<fields.size(); i++) {
      fieldsData.add(struct.getField(i));
    }
    return fieldsData;
  }

}
StandardStructObjectInspector
public class StandardStructObjectInspector implements StructObjectInspector {

  public static final Log LOG = LogFactory.getLog(StandardStructObjectInspector.class.getName());
  
  protected static class MyField implements StructField {
    protected int fieldID;
    protected String fieldName;
    protected ObjectInspector fieldObjectInspector;
    
    public MyField(int fieldID, String fieldName, ObjectInspector fieldObjectInspector) {
      this.fieldID = fieldID;
      this.fieldName = fieldName.toLowerCase();
      this.fieldObjectInspector = fieldObjectInspector;
    }

    public int getFieldID() {
      return fieldID;
    }
    public String getFieldName() {
      return fieldName;
    }
    public ObjectInspector getFieldObjectInspector() {
      return fieldObjectInspector;
    }
    
    public String toString() {
      return "" + fieldID + ":" + fieldName;
    }
  }
  
  protected List<MyField> fields;
  
  public String getTypeName() {
    return ObjectInspectorUtils.getStandardStructTypeName(this);
  }
  
  /** Call ObjectInspectorFactory.getStandardListObjectInspector instead.
   */
  protected StandardStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
    init(structFieldNames, structFieldObjectInspectors);
  }
  protected void init(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
    assert(structFieldNames.size() == structFieldObjectInspectors.size());
    
    fields = new ArrayList<MyField>(structFieldNames.size()); 
    for(int i=0; i<structFieldNames.size(); i++) {
      fields.add(new MyField(i, structFieldNames.get(i), structFieldObjectInspectors.get(i)));
    }
  }
  
  protected StandardStructObjectInspector(List<StructField> fields) {
    init(fields);
  }
  protected void init(List<StructField> fields) {
    this.fields = new ArrayList<MyField>(fields.size()); 
    for(int i=0; i<fields.size(); i++) {
      this.fields.add(new MyField(i, fields.get(i).getFieldName(), fields.get(i).getFieldObjectInspector()));
    }
  }

  
  public final Category getCategory() {
    return Category.STRUCT;
  }

  // Without Data
  public StructField getStructFieldRef(String fieldName) {
    return ObjectInspectorUtils.getStandardStructFieldRef(fieldName, fields);
  }
  public List<? extends StructField> getAllStructFieldRefs() {
    return fields;
  }

  boolean warned = false;
  // With Data
  @SuppressWarnings("unchecked")
  public Object getStructFieldData(Object data, StructField fieldRef) {
    List<Object> list = getStructFieldsDataAsList(data);
    if (list == null) {
      return null;
    }
    MyField f = (MyField) fieldRef;
    if (fields.size() != list.size() && !warned) {
      // TODO: remove this
      warned = true;
      LOG.warn("Trying to access " + fields.size() + " fields inside a list of " + list.size()
          + " elements: " + list);
      LOG.warn("ignoring similar errors.");
    }
    int fieldID = f.getFieldID();
    assert(fieldID >= 0 && fieldID < fields.size());
    
    return fieldID >= list.size() ? null : list.get(fieldID);
  }
  @SuppressWarnings("unchecked")
  public List<Object> getStructFieldsDataAsList(Object data) {
    if (data == null) {
      return null;
    }
    if (data.getClass().isArray()) {
      data = java.util.Arrays.asList((Object[])data);
    }
    List<Object> list = (List<Object>) data;
    assert(list.size() == fields.size());
    return list;
  }

  
}

MetadataListStructObjectInspector
public class MetadataListStructObjectInspector extends StandardStructObjectInspector {

  static HashMap<List<String>, MetadataListStructObjectInspector> cached
     = new HashMap<List<String>, MetadataListStructObjectInspector>();
  public static MetadataListStructObjectInspector getInstance(int fields) {
    return getInstance(ObjectInspectorUtils.getIntegerArray(fields));
  }
  public static MetadataListStructObjectInspector getInstance(List<String> columnNames) {
    MetadataListStructObjectInspector result = cached.get(columnNames);
    if (result == null) {
      result = new MetadataListStructObjectInspector(columnNames);
      cached.put(columnNames, result);
    }
    return result;
  }

  static ArrayList<ObjectInspector> getFieldObjectInspectors(int fields) {
    ArrayList<ObjectInspector> r = new ArrayList<ObjectInspector>(fields);
    for(int i=0; i<fields; i++) {
      r.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(String.class));
    }
    return r;
  }
  
  MetadataListStructObjectInspector(List<String> columnNames) {
    super(columnNames, getFieldObjectInspectors(columnNames.size()));
  }
  
  // Get col object out
  public Object getStructFieldData(Object data, StructField fieldRef) {
    if (data instanceof ColumnSet) {
      data = ((ColumnSet)data).col;
    }
    return super.getStructFieldData(data, fieldRef);
  }
  // Get col object out
  public List<Object> getStructFieldsDataAsList(Object data) {
    if (data instanceof ColumnSet) {
      data = ((ColumnSet)data).col;
    }
    return super.getStructFieldsDataAsList(data);
  }

}
UnionStructObjectInspector

UnionStructObjectInspector 实现了 Struct 嵌套Struct。

public class UnionStructObjectInspector implements StructObjectInspector {

  public static class MyField implements StructField {
    public int structID;
    StructField structField;
    public MyField(int structID, StructField structField) {
      this.structID = structID;
      this.structField = structField;
    }
    public String getFieldName() {
      return structField.getFieldName();
    }
    public ObjectInspector getFieldObjectInspector() {
      return structField.getFieldObjectInspector();
    }
  }
  
  List<StructObjectInspector> unionObjectInspectors;
  List<MyField> fields;
  
  protected UnionStructObjectInspector(List<StructObjectInspector> unionObjectInspectors) {
    init(unionObjectInspectors);
  }

  void init(List<StructObjectInspector> unionObjectInspectors) {
    this.unionObjectInspectors = unionObjectInspectors;
    
    int totalSize = 0;
    for (int i=0; i<unionObjectInspectors.size(); i++) {
      totalSize += unionObjectInspectors.get(i).getAllStructFieldRefs().size();
    }
    
    fields = new ArrayList<MyField>(totalSize); 
    for (int i=0; i<unionObjectInspectors.size(); i++) {
      StructObjectInspector oi = unionObjectInspectors.get(i);
      for(StructField sf: oi.getAllStructFieldRefs()) {
        fields.add(new MyField(i, sf));
      }
    }
  }
  
  
  public final Category getCategory() {
    return Category.STRUCT;
  }

  public String getTypeName() {
    return ObjectInspectorUtils.getStandardStructTypeName(this);
  }

  // Without Data
  public StructField getStructFieldRef(String fieldName) {
    return ObjectInspectorUtils.getStandardStructFieldRef(fieldName, fields);
  }
  public List<? extends StructField> getAllStructFieldRefs() {
    return fields;
  }

  // With Data
  @SuppressWarnings("unchecked")
  public Object getStructFieldData(Object data, StructField fieldRef) {
    if (data == null) {
      return null;
    }
    if (data.getClass().isArray()) {
      data = java.util.Arrays.asList((Object[])data);
    }
    MyField f = (MyField) fieldRef;
    List<Object> list = (List<Object>) data;
    assert(list.size() == unionObjectInspectors.size());
    return unionObjectInspectors.get(f.structID).getStructFieldData(list.get(f.structID), f.structField);
  }
  @SuppressWarnings("unchecked")
  public List<Object> getStructFieldsDataAsList(Object data) {
    if (data == null) {
      return null;
    }
    if (data.getClass().isArray()) {
      data = java.util.Arrays.asList((Object[])data);
    }
    List<Object> list = (List<Object>) data;
    assert(list.size() == unionObjectInspectors.size());
    // Explode
    ArrayList<Object> result = new ArrayList<Object>(fields.size());
    for(int i=0; i<unionObjectInspectors.size(); i++) {
      result.addAll(unionObjectInspectors.get(i).getStructFieldsDataAsList(list.get(i)));
    }
    return result;
  }

}

ObjectInspectorFactory

ObjectInspectorFactory 提供了创建 ObjectInspector 的各种方法。

public class ObjectInspectorFactory {


  /**
   * ObjectInspectorOptions describes what ObjectInspector to use. 
   * JAVA is to use pure JAVA reflection. THRIFT is to use JAVA reflection and filter out __isset fields.
   * New ObjectInspectorOptions can be added here when available.
   * 
   * We choose to use a single HashMap objectInspectorCache to cache all situations for efficiency and code 
   * simplicity.  And we don't expect a case that a user need to create 2 or more different types of 
   * ObjectInspectors for the same Java type.
   */
  public enum ObjectInspectorOptions {
    JAVA,
    THRIFT
  };
  
  private static HashMap<Type, ObjectInspector> objectInspectorCache = new HashMap<Type, ObjectInspector>();
  
  public static ObjectInspector getReflectionObjectInspector(Type t, ObjectInspectorOptions options) {
    ObjectInspector oi = objectInspectorCache.get(t);
    if (oi == null) {
      oi = getReflectionObjectInspectorNoCache(t, options);
      objectInspectorCache.put(t, oi);
    }
    if ((options.equals(ObjectInspectorOptions.JAVA) && oi.getClass().equals(ThriftStructObjectInspector.class))
        || (options.equals(ObjectInspectorOptions.THRIFT) && oi.getClass().equals(ReflectionStructObjectInspector.class))) {
      throw new RuntimeException("Cannot call getObjectInspectorByReflection with both JAVA and THRIFT !");
    }
    return oi;
  }
  
  private static ObjectInspector getReflectionObjectInspectorNoCache(Type t, ObjectInspectorOptions options) {
    if (t instanceof GenericArrayType) {
      GenericArrayType at = (GenericArrayType)t;
      return getStandardListObjectInspector(
          getReflectionObjectInspector(at.getGenericComponentType(), options));
    }

    if (t instanceof ParameterizedType) {
      ParameterizedType pt = (ParameterizedType)t;
      // List?
      if (List.class.isAssignableFrom((Class<?>)pt.getRawType())) {
        return getStandardListObjectInspector(
            getReflectionObjectInspector(pt.getActualTypeArguments()[0], options));
      }
      // Map?
      if (Map.class.isAssignableFrom((Class<?>)pt.getRawType())) {
        return getStandardMapObjectInspector(
            getReflectionObjectInspector(pt.getActualTypeArguments()[0], options),
            getReflectionObjectInspector(pt.getActualTypeArguments()[1], options));
      }
      // Otherwise convert t to RawType so we will fall into the following if block.
      t = pt.getRawType();
    }
    
    // Must be a class.
    if (!(t instanceof Class)) {
      throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error."); 
    }
    Class<?> c = (Class<?>)t;
    
    // Primitive?
    if (ObjectInspectorUtils.isPrimitiveClass(c)) {
      return getStandardPrimitiveObjectInspector(c);
    }
    
    // Must be struct because List and Map need to be ParameterizedType
    assert(!List.class.isAssignableFrom(c));
    assert(!Map.class.isAssignableFrom(c));
    
    // Create StructObjectInspector
    ReflectionStructObjectInspector oi;
    switch(options) {
    case JAVA: 
      oi = new ReflectionStructObjectInspector();
      break;
    case THRIFT: 
      oi = new ThriftStructObjectInspector();
      break;
    default:
      throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error."); 
    }
    // put it into the cache BEFORE it is initialized to make sure we can catch recursive types. 
    objectInspectorCache.put(t, oi);
    Field[] fields = c.getDeclaredFields();
    ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(fields.length);
    for(int i=0; i<fields.length; i++) {
      if (!oi.shouldIgnoreField(fields[i].getName())) {
        structFieldObjectInspectors.add(getReflectionObjectInspector(fields[i].getGenericType(), options));
      }
    }
    oi.init(c, structFieldObjectInspectors);
    return oi;
  }
  
  
  private static HashMap<Class<?>, StandardPrimitiveObjectInspector> cachedStandardPrimitiveInspectorCache = new HashMap<Class<?>, StandardPrimitiveObjectInspector>();
  public static StandardPrimitiveObjectInspector getStandardPrimitiveObjectInspector(Class<?> c) {
    c = ObjectInspectorUtils.generalizePrimitive(c);
    StandardPrimitiveObjectInspector result = cachedStandardPrimitiveInspectorCache.get(c);
    if (result == null) {
      result = new StandardPrimitiveObjectInspector(c);
      cachedStandardPrimitiveInspectorCache.put(c, result);
    }
    return result;
  }
  
  static HashMap<ObjectInspector, StandardListObjectInspector> cachedStandardListObjectInspector =
    new HashMap<ObjectInspector, StandardListObjectInspector>(); 
  public static StandardListObjectInspector getStandardListObjectInspector(ObjectInspector listElementObjectInspector) {
    StandardListObjectInspector result = cachedStandardListObjectInspector.get(listElementObjectInspector);
    if (result == null) {
      result = new StandardListObjectInspector(listElementObjectInspector);
      cachedStandardListObjectInspector.put(listElementObjectInspector, result);
    }
    return result;
  }

  static HashMap<List<ObjectInspector>, StandardMapObjectInspector> cachedStandardMapObjectInspector =
    new HashMap<List<ObjectInspector>, StandardMapObjectInspector>(); 
  public static StandardMapObjectInspector getStandardMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
    ArrayList<ObjectInspector> signature = new ArrayList<ObjectInspector>(2);
    signature.add(mapKeyObjectInspector);
    signature.add(mapValueObjectInspector);
    StandardMapObjectInspector result = cachedStandardMapObjectInspector.get(signature);
    if (result == null) {
      result = new StandardMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector);
      cachedStandardMapObjectInspector.put(signature, result);
    }
    return result;
  }
  
  static HashMap<ArrayList<List<?>>, StandardStructObjectInspector> cachedStandardStructObjectInspector =
    new HashMap<ArrayList<List<?>>, StandardStructObjectInspector>(); 
  public static StandardStructObjectInspector getStandardStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
    ArrayList<List<?>> signature = new ArrayList<List<?>>();
    signature.add(structFieldNames);
    signature.add(structFieldObjectInspectors);
    StandardStructObjectInspector result = cachedStandardStructObjectInspector.get(signature);
    if (result == null) {
      result = new StandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
      cachedStandardStructObjectInspector.put(signature, result);
    }
    return result;
  }

  static HashMap<ArrayList<List<?>>, LazySimpleStructObjectInspector> cachedLazySimpleStructObjectInspector =
    new HashMap<ArrayList<List<?>>, LazySimpleStructObjectInspector>(); 
  public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
    ArrayList<List<?>> signature = new ArrayList<List<?>>();
    signature.add(structFieldNames);
    signature.add(structFieldObjectInspectors);
    LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector.get(signature);
    if (result == null) {
      result = new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors);
      cachedLazySimpleStructObjectInspector.put(signature, result);
    }
    return result;
  }
  
  static HashMap<List<StructObjectInspector>, UnionStructObjectInspector> cachedUnionStructObjectInspector =
    new HashMap<List<StructObjectInspector>, UnionStructObjectInspector>(); 
  public static UnionStructObjectInspector getUnionStructObjectInspector(List<StructObjectInspector> structObjectInspectors) {
    UnionStructObjectInspector result = cachedUnionStructObjectInspector.get(structObjectInspectors);
    if (result == null) {
      result = new UnionStructObjectInspector(structObjectInspectors);
      cachedUnionStructObjectInspector.put(structObjectInspectors, result);
    }
    return result;
  }
  
  
}

ObjectInspectorUtils

ObjectInspectorUtils 提供了各种工具方法

isPrimitiveClass 判断 class 是否是基本类型。

public static boolean isPrimitiveClass(Class<?> c) {
    return ((c == String.class) || (c == Boolean.class) ||
            (c == Character.class) || (c == java.sql.Date.class) || 
            java.lang.Number.class.isAssignableFrom(c) || (c == Void.class) ||
            c.isPrimitive());
  }

generalizePrimitive 用于得到泛化的基本类型

public static Class<?> generalizePrimitive(Class<?> primitiveClass) {
    if (primitiveClass == Boolean.TYPE)   primitiveClass = Boolean.class;
    if (primitiveClass == Byte.TYPE)      primitiveClass = Byte.class;
    if (primitiveClass == Character.TYPE) primitiveClass = Character.class;
    if (primitiveClass == Short.TYPE)     primitiveClass = Short.class;
    if (primitiveClass == Integer.TYPE)   primitiveClass = Integer.class;
    if (primitiveClass == Long.TYPE)      primitiveClass = Long.class;
    if (primitiveClass == Float.TYPE)     primitiveClass = Float.class;
    if (primitiveClass == Double.TYPE)    primitiveClass = Double.class;
    if (primitiveClass == Void.TYPE)      primitiveClass = Void.class;
    return primitiveClass;
  }

classToTypeName 提供了 className 和 hive type 的对应关系

 public static final Map<Class<?>, String> classToTypeName = new HashMap<Class<?>, String>();
  static {
    classToTypeName.put(Boolean.class, org.apache.hadoop.hive.serde.Constants.BOOLEAN_TYPE_NAME);
    classToTypeName.put(Byte.class, org.apache.hadoop.hive.serde.Constants.TINYINT_TYPE_NAME);
    classToTypeName.put(Short.class, org.apache.hadoop.hive.serde.Constants.SMALLINT_TYPE_NAME);
    classToTypeName.put(Integer.class, org.apache.hadoop.hive.serde.Constants.INT_TYPE_NAME);
    classToTypeName.put(Long.class, org.apache.hadoop.hive.serde.Constants.BIGINT_TYPE_NAME);
    classToTypeName.put(Float.class, org.apache.hadoop.hive.serde.Constants.FLOAT_TYPE_NAME);
    classToTypeName.put(Double.class, org.apache.hadoop.hive.serde.Constants.DOUBLE_TYPE_NAME);
    classToTypeName.put(String.class, org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME);
    classToTypeName.put(java.sql.Date.class, org.apache.hadoop.hive.serde.Constants.DATE_TYPE_NAME);
  }

typeNameToClass 提供 hive type 和 className 的对应关系,与 classToTypeName 相反

  /**
   * The mapping from type name in DDL to the Java class. 
   */
  public static final Map<String, Class<?>> typeNameToClass = new HashMap<String, Class<?>>();
  static {
    typeNameToClass.put(Constants.BOOLEAN_TYPE_NAME, Boolean.class);
    typeNameToClass.put(Constants.TINYINT_TYPE_NAME, Byte.class);
    typeNameToClass.put(Constants.SMALLINT_TYPE_NAME, Short.class);
    typeNameToClass.put(Constants.INT_TYPE_NAME, Integer.class);
    typeNameToClass.put(Constants.BIGINT_TYPE_NAME, Long.class);
    typeNameToClass.put(Constants.FLOAT_TYPE_NAME, Float.class);
    typeNameToClass.put(Constants.DOUBLE_TYPE_NAME, Double.class);
    typeNameToClass.put(Constants.STRING_TYPE_NAME, String.class);
    typeNameToClass.put(Constants.DATE_TYPE_NAME, java.sql.Date.class);
    // These types are not supported yet. 
    // TypeNameToClass.put(Constants.DATETIME_TYPE_NAME);
    // TypeNameToClass.put(Constants.TIMESTAMP_TYPE_NAME);
  }

getClassShortName 返回一个类的短名称

public static String getClassShortName(Class<?> classObject) {
    String result = classToTypeName.get(classObject);
    if (result == null) {
      result = classObject.getName();
      LOG.warn("unsupported class: " + result);
      // Remove prefix
      String prefix = "java.lang.";
      if (result.startsWith(prefix)) {
        result = result.substring(prefix.length());
      }
    }
    return result;
  }

getIntegerArray 返回指定参数大小的 list,如参数为 2,则返回的 list 大小为2,有 “0”,“1” 两个元素

static ArrayList<ArrayList<String>> integerArrayCache = new ArrayList<ArrayList<String>>();
  /**
   * Returns an array of Integer strings, starting from "0".
   * This function caches the arrays to provide a better performance. 
   */
  public static ArrayList<String> getIntegerArray(int size) {
    while (integerArrayCache.size() <= size) {
      integerArrayCache.add(null);
    }
    ArrayList<String> result = integerArrayCache.get(size);
    if (result == null) {
      result = new ArrayList<String>();
      for (int i=0; i<size; i++) {
        result.add(Integer.valueOf(i).toString());
      }
      integerArrayCache.set(size, result);
    }
    return result;
  }

getIntegerCSV 返回指定大小的 csv,该 csv string 是从 0 到指定参数组成的。

static ArrayList<String> integerCSVCache = new ArrayList<String>(); 
  public static String getIntegerCSV(int size) {
    while (integerCSVCache.size() <= size) {
      integerCSVCache.add(null);
    }
    String result = integerCSVCache.get(size);
    if (result == null) {
      StringBuilder sb = new StringBuilder();
      for(int i=0; i<size; i++) {
        if (i>0) sb.append(",");
        sb.append("" + i);
      }
      result = sb.toString();
      integerCSVCache.set(size, result);
    }
    return result;
  }

getStandardObject

得到 o 的标准对象,如果 o 不是原子类型,则进行递归调用。

public static Object getStandardObject(Object o, ObjectInspector oi) {
    if (o == null) {
      return null;
    }
    
    Object result = null;
    switch (oi.getCategory()) {
      case PRIMITIVE: {
        result = o;
        break;
      }
      case LIST: {
        ListObjectInspector loi = (ListObjectInspector)oi;
        int length = loi.getListLength(o);
        ArrayList<Object> list = new ArrayList<Object>(length);
        for(int i=0; i<length; i++) {
          list.add(getStandardObject(
              loi.getListElement(o, i),
              loi.getListElementObjectInspector()));
        }
        result = list;
        break;
      }
      case MAP: {
        MapObjectInspector moi = (MapObjectInspector)oi;
        HashMap<Object, Object> map = new HashMap<Object, Object>();
        Map<? extends Object, ? extends Object> omap = moi.getMap(o);
        for(Map.Entry<? extends Object, ? extends Object> entry: omap.entrySet()) {
          map.put(getStandardObject(entry.getKey(), moi.getMapKeyObjectInspector()),
              getStandardObject(entry.getValue(), moi.getMapValueObjectInspector()));
        }
        result = map;
        break;
      }
      case STRUCT: {
        StructObjectInspector soi = (StructObjectInspector)oi;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();
        ArrayList<Object> struct = new ArrayList<Object>(fields.size()); 
        for(StructField f : fields) {
          struct.add(getStandardObject(soi.getStructFieldData(o, f), f.getFieldObjectInspector()));
        }
        result = struct;
        break;
      }
      default: {
        throw new RuntimeException("Unknown ObjectInspector category!");
      }
    }
    return result;
  }  
  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值