ObjectInspector
ObjectInspector 帮助我们探查复杂对象的内部结构。一个 ObjectInspector 实例代表了一个具体的类型,和存储这种类型的数据在内存中的一个特定方法。
使用 ObjectInspectorFactory 创建 ObjectInspector 对象,因为可以可以保证相同的 ObjectInspector 仅有一个实例。
public interface ObjectInspector {
public static enum Category {
PRIMITIVE, LIST, MAP, STRUCT
};
/**
* Returns the name of the data type that is inspected by this ObjectInspector.
* This is used to display the type information to the user.
*
* For primitive types, the type name is standardized.
* For other types, the type name can be something like "list<int>", "map<int,string>",
* java class names, or user-defined type names similar to typedef.
*/
public String getTypeName();
/**
* An ObjectInspector must inherit from one of the following interfaces
* if getCategory() returns:
* PRIMITIVE: PrimitiveObjectInspector
* LIST: ListObjectInspector
* MAP: MapObjectInspector
* STRUCT: StructObjectInspector
*/
public Category getCategory();
}
PrimitiveObjectInspector
public interface PrimitiveObjectInspector extends ObjectInspector{
public Class<?> getPrimitiveClass();
}
StandardPrimitiveObjectInspector
StandardPrimitiveObjectInspector 提供了一种通用的处理各种 PrimitiveObjectInspector 的方法。他的构造方法的参数里需要 primitiveClass。通过 ObjectInspectorUtils,为各种 各种 PrimitiveObjectInspector 提供统一接口。
class StandardPrimitiveObjectInspector implements PrimitiveObjectInspector {
Class<?> primitiveClass;
/** Call ObjectInspectorFactory.getStandardPrimitiveObjectInspector instead.
*/
protected StandardPrimitiveObjectInspector(Class<?> primitiveClass) {
this.primitiveClass = primitiveClass;
}
public Class<?> getPrimitiveClass() {
return primitiveClass;
}
public final Category getCategory() {
return Category.PRIMITIVE;
}
public String getTypeName() {
return ObjectInspectorUtils.getClassShortName(primitiveClass);
}
}
ListObjectInspector
public interface ListObjectInspector extends ObjectInspector {
// ** Methods that does not need a data object **
public ObjectInspector getListElementObjectInspector();
// ** Methods that need a data object **
/** returns null for null list, out-of-the-range index.
*/
public Object getListElement(Object data, int index);
/** returns -1 for data = null.
*/
public int getListLength(Object data);
/** returns null for data = null.
*/
public List<?> getList(Object data);
}
StandardListObjectInspector
StandardListObjectInspector 是 ListObjectInspector 的通用实现。数据可以存储为 List 或者 Array 对象。构建对象,需要提供元素的 ObjectInspector。
class StandardListObjectInspector implements ListObjectInspector {
ObjectInspector listElementObjectInspector;
/** Call ObjectInspectorFactory.getStandardListObjectInspector instead.
*/
protected StandardListObjectInspector(ObjectInspector listElementObjectInspector) {
this.listElementObjectInspector = listElementObjectInspector;
}
public final Category getCategory() {
return Category.LIST;
}
// without data
public ObjectInspector getListElementObjectInspector() {
return listElementObjectInspector;
}
// with data
public Object getListElement(Object data, int index) {
List<?> list = getList(data);
if (list == null || index < 0 || index >= list.size()) {
return null;
}
return list.get(index);
}
public int getListLength(Object data) {
List<?> list = getList(data);
if (list == null) return -1;
return list.size();
}
public List<?> getList(Object data) {
if (data == null) return null;
if (data.getClass().isArray()) {
data = java.util.Arrays.asList((Object[])data);
}
List<?> list = (List<?>) data;
return list;
}
public String getTypeName() {
return org.apache.hadoop.hive.serde.Constants.LIST_TYPE_NAME
+ "<" + listElementObjectInspector.getTypeName() + ">";
}
}
MapObjectInspector
public interface MapObjectInspector extends ObjectInspector {
// ** Methods that does not need a data object **
// Map Type
public ObjectInspector getMapKeyObjectInspector();
public ObjectInspector getMapValueObjectInspector();
// ** Methods that need a data object **
// In this function, key has to be of the same structure as the Map expects.
// Most cases key will be primitive type, so it's OK.
// In rare cases that key is not primitive, the user is responsible for defining
// the hashCode() and equals() methods of the key class.
public Object getMapValueElement(Object data, Object key);
/** returns null for data = null.
*/
public Map<?,?> getMap(Object data);
}
StandardMapObjectInspector
StandardMapObjectInspector 需要提供 key 和 value 的 ObjectInspector。
class StandardMapObjectInspector implements MapObjectInspector {
ObjectInspector mapKeyObjectInspector;
ObjectInspector mapValueObjectInspector;
/** Call ObjectInspectorFactory.getStandardMapObjectInspector instead.
*/
protected StandardMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
this.mapKeyObjectInspector = mapKeyObjectInspector;
this.mapValueObjectInspector = mapValueObjectInspector;
}
// without data
public ObjectInspector getMapKeyObjectInspector() {
return mapKeyObjectInspector;
}
public ObjectInspector getMapValueObjectInspector() {
return mapValueObjectInspector;
}
// with data
// TODO: Now we assume the key Object supports hashCode and equals functions.
public Object getMapValueElement(Object data, Object key) {
if (data == null || key == null) return null;
Map<?,?> map = (Map<?,?>)data;
return map.get(key);
}
int getMapSize(Object data) {
if (data == null) return -1;
Map<?,?> map = (Map<?,?>)data;
return map.size();
}
public Map<?,?> getMap(Object data) {
if (data == null) return null;
Map<?,?> map = (Map<?,?>)data;
return map;
}
public final Category getCategory() {
return Category.MAP;
}
public String getTypeName() {
return org.apache.hadoop.hive.serde.Constants.MAP_TYPE_NAME
+ "<" + mapKeyObjectInspector.getTypeName() + ","
+ mapValueObjectInspector.getTypeName() + ">";
}
}
StructObjectInspector
public interface StructObjectInspector extends ObjectInspector {
// ** Methods that does not need a data object **
/** Returns all the fields.
*/
public List<? extends StructField> getAllStructFieldRefs();
/** Look up a field.
*/
public StructField getStructFieldRef(String fieldName);
// ** Methods that need a data object **
/** returns null for data = null.
*/
public Object getStructFieldData(Object data, StructField fieldRef);
/** returns null for data = null.
*/
public List<Object> getStructFieldsDataAsList(Object data);
}
- StructField
StructField 提供字段名和该字段的 ObjectInspector。
/**
* StructField is an empty interface.
*
* Classes implementing this interface are considered to represent
* a field of a struct for this serde package.
*/
public interface StructField {
/**
* Get the name of the field. The name should be always in lower-case.
*/
String getFieldName();
/**
* Get the ObjectInspector for the field.
*/
ObjectInspector getFieldObjectInspector();
}
StructObjectInspector 有四个子类,分别是 LazySimpleStructObjectInspector,
LazySimpleStructObjectInspector
public class LazySimpleStructObjectInspector implements StructObjectInspector {
public static final Log LOG = LogFactory.getLog(LazySimpleStructObjectInspector.class.getName());
protected static class MyField implements StructField {
protected int fieldID;
protected String fieldName;
protected ObjectInspector fieldObjectInspector;
public MyField(int fieldID, String fieldName, ObjectInspector fieldObjectInspector) {
this.fieldID = fieldID;
this.fieldName = fieldName.toLowerCase();
this.fieldObjectInspector = fieldObjectInspector;
}
public int getFieldID() {
return fieldID;
}
public String getFieldName() {
return fieldName;
}
public ObjectInspector getFieldObjectInspector() {
return fieldObjectInspector;
}
public String toString() {
return "" + fieldID + ":" + fieldName;
}
}
protected List<MyField> fields;
public String getTypeName() {
return ObjectInspectorUtils.getStandardStructTypeName(this);
}
/** Call ObjectInspectorFactory.getLazySimpleStructObjectInspector instead.
*/
protected LazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
init(structFieldNames, structFieldObjectInspectors);
}
protected void init(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
assert(structFieldNames.size() == structFieldObjectInspectors.size());
fields = new ArrayList<MyField>(structFieldNames.size());
for(int i=0; i<structFieldNames.size(); i++) {
fields.add(new MyField(i, structFieldNames.get(i), structFieldObjectInspectors.get(i)));
}
}
protected LazySimpleStructObjectInspector(List<StructField> fields) {
init(fields);
}
protected void init(List<StructField> fields) {
this.fields = new ArrayList<MyField>(fields.size());
for(int i=0; i<fields.size(); i++) {
this.fields.add(new MyField(i, fields.get(i).getFieldName(), fields.get(i).getFieldObjectInspector()));
}
}
public final Category getCategory() {
return Category.STRUCT;
}
// Without Data
public StructField getStructFieldRef(String fieldName) {
return ObjectInspectorUtils.getStandardStructFieldRef(fieldName, fields);
}
public List<? extends StructField> getAllStructFieldRefs() {
return fields;
}
// With Data
@SuppressWarnings("unchecked")
public Object getStructFieldData(Object data, StructField fieldRef) {
if (data == null) {
return null;
}
LazyStruct struct = (LazyStruct)data;
MyField f = (MyField) fieldRef;
int fieldID = f.getFieldID();
assert(fieldID >= 0 && fieldID < fields.size());
return struct.getField(fieldID);
}
@Override
public List<Object> getStructFieldsDataAsList(Object data) {
if (data == null) {
return null;
}
List<Object> fieldsData = new ArrayList<Object>(fields.size());
LazyStruct struct = (LazyStruct)data;
for (int i=0; i<fields.size(); i++) {
fieldsData.add(struct.getField(i));
}
return fieldsData;
}
}
StandardStructObjectInspector
public class StandardStructObjectInspector implements StructObjectInspector {
public static final Log LOG = LogFactory.getLog(StandardStructObjectInspector.class.getName());
protected static class MyField implements StructField {
protected int fieldID;
protected String fieldName;
protected ObjectInspector fieldObjectInspector;
public MyField(int fieldID, String fieldName, ObjectInspector fieldObjectInspector) {
this.fieldID = fieldID;
this.fieldName = fieldName.toLowerCase();
this.fieldObjectInspector = fieldObjectInspector;
}
public int getFieldID() {
return fieldID;
}
public String getFieldName() {
return fieldName;
}
public ObjectInspector getFieldObjectInspector() {
return fieldObjectInspector;
}
public String toString() {
return "" + fieldID + ":" + fieldName;
}
}
protected List<MyField> fields;
public String getTypeName() {
return ObjectInspectorUtils.getStandardStructTypeName(this);
}
/** Call ObjectInspectorFactory.getStandardListObjectInspector instead.
*/
protected StandardStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
init(structFieldNames, structFieldObjectInspectors);
}
protected void init(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
assert(structFieldNames.size() == structFieldObjectInspectors.size());
fields = new ArrayList<MyField>(structFieldNames.size());
for(int i=0; i<structFieldNames.size(); i++) {
fields.add(new MyField(i, structFieldNames.get(i), structFieldObjectInspectors.get(i)));
}
}
protected StandardStructObjectInspector(List<StructField> fields) {
init(fields);
}
protected void init(List<StructField> fields) {
this.fields = new ArrayList<MyField>(fields.size());
for(int i=0; i<fields.size(); i++) {
this.fields.add(new MyField(i, fields.get(i).getFieldName(), fields.get(i).getFieldObjectInspector()));
}
}
public final Category getCategory() {
return Category.STRUCT;
}
// Without Data
public StructField getStructFieldRef(String fieldName) {
return ObjectInspectorUtils.getStandardStructFieldRef(fieldName, fields);
}
public List<? extends StructField> getAllStructFieldRefs() {
return fields;
}
boolean warned = false;
// With Data
@SuppressWarnings("unchecked")
public Object getStructFieldData(Object data, StructField fieldRef) {
List<Object> list = getStructFieldsDataAsList(data);
if (list == null) {
return null;
}
MyField f = (MyField) fieldRef;
if (fields.size() != list.size() && !warned) {
// TODO: remove this
warned = true;
LOG.warn("Trying to access " + fields.size() + " fields inside a list of " + list.size()
+ " elements: " + list);
LOG.warn("ignoring similar errors.");
}
int fieldID = f.getFieldID();
assert(fieldID >= 0 && fieldID < fields.size());
return fieldID >= list.size() ? null : list.get(fieldID);
}
@SuppressWarnings("unchecked")
public List<Object> getStructFieldsDataAsList(Object data) {
if (data == null) {
return null;
}
if (data.getClass().isArray()) {
data = java.util.Arrays.asList((Object[])data);
}
List<Object> list = (List<Object>) data;
assert(list.size() == fields.size());
return list;
}
}
MetadataListStructObjectInspector
public class MetadataListStructObjectInspector extends StandardStructObjectInspector {
static HashMap<List<String>, MetadataListStructObjectInspector> cached
= new HashMap<List<String>, MetadataListStructObjectInspector>();
public static MetadataListStructObjectInspector getInstance(int fields) {
return getInstance(ObjectInspectorUtils.getIntegerArray(fields));
}
public static MetadataListStructObjectInspector getInstance(List<String> columnNames) {
MetadataListStructObjectInspector result = cached.get(columnNames);
if (result == null) {
result = new MetadataListStructObjectInspector(columnNames);
cached.put(columnNames, result);
}
return result;
}
static ArrayList<ObjectInspector> getFieldObjectInspectors(int fields) {
ArrayList<ObjectInspector> r = new ArrayList<ObjectInspector>(fields);
for(int i=0; i<fields; i++) {
r.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(String.class));
}
return r;
}
MetadataListStructObjectInspector(List<String> columnNames) {
super(columnNames, getFieldObjectInspectors(columnNames.size()));
}
// Get col object out
public Object getStructFieldData(Object data, StructField fieldRef) {
if (data instanceof ColumnSet) {
data = ((ColumnSet)data).col;
}
return super.getStructFieldData(data, fieldRef);
}
// Get col object out
public List<Object> getStructFieldsDataAsList(Object data) {
if (data instanceof ColumnSet) {
data = ((ColumnSet)data).col;
}
return super.getStructFieldsDataAsList(data);
}
}
UnionStructObjectInspector
UnionStructObjectInspector 实现了 Struct 嵌套Struct。
public class UnionStructObjectInspector implements StructObjectInspector {
public static class MyField implements StructField {
public int structID;
StructField structField;
public MyField(int structID, StructField structField) {
this.structID = structID;
this.structField = structField;
}
public String getFieldName() {
return structField.getFieldName();
}
public ObjectInspector getFieldObjectInspector() {
return structField.getFieldObjectInspector();
}
}
List<StructObjectInspector> unionObjectInspectors;
List<MyField> fields;
protected UnionStructObjectInspector(List<StructObjectInspector> unionObjectInspectors) {
init(unionObjectInspectors);
}
void init(List<StructObjectInspector> unionObjectInspectors) {
this.unionObjectInspectors = unionObjectInspectors;
int totalSize = 0;
for (int i=0; i<unionObjectInspectors.size(); i++) {
totalSize += unionObjectInspectors.get(i).getAllStructFieldRefs().size();
}
fields = new ArrayList<MyField>(totalSize);
for (int i=0; i<unionObjectInspectors.size(); i++) {
StructObjectInspector oi = unionObjectInspectors.get(i);
for(StructField sf: oi.getAllStructFieldRefs()) {
fields.add(new MyField(i, sf));
}
}
}
public final Category getCategory() {
return Category.STRUCT;
}
public String getTypeName() {
return ObjectInspectorUtils.getStandardStructTypeName(this);
}
// Without Data
public StructField getStructFieldRef(String fieldName) {
return ObjectInspectorUtils.getStandardStructFieldRef(fieldName, fields);
}
public List<? extends StructField> getAllStructFieldRefs() {
return fields;
}
// With Data
@SuppressWarnings("unchecked")
public Object getStructFieldData(Object data, StructField fieldRef) {
if (data == null) {
return null;
}
if (data.getClass().isArray()) {
data = java.util.Arrays.asList((Object[])data);
}
MyField f = (MyField) fieldRef;
List<Object> list = (List<Object>) data;
assert(list.size() == unionObjectInspectors.size());
return unionObjectInspectors.get(f.structID).getStructFieldData(list.get(f.structID), f.structField);
}
@SuppressWarnings("unchecked")
public List<Object> getStructFieldsDataAsList(Object data) {
if (data == null) {
return null;
}
if (data.getClass().isArray()) {
data = java.util.Arrays.asList((Object[])data);
}
List<Object> list = (List<Object>) data;
assert(list.size() == unionObjectInspectors.size());
// Explode
ArrayList<Object> result = new ArrayList<Object>(fields.size());
for(int i=0; i<unionObjectInspectors.size(); i++) {
result.addAll(unionObjectInspectors.get(i).getStructFieldsDataAsList(list.get(i)));
}
return result;
}
}
ObjectInspectorFactory
ObjectInspectorFactory 提供了创建 ObjectInspector 的各种方法。
public class ObjectInspectorFactory {
/**
* ObjectInspectorOptions describes what ObjectInspector to use.
* JAVA is to use pure JAVA reflection. THRIFT is to use JAVA reflection and filter out __isset fields.
* New ObjectInspectorOptions can be added here when available.
*
* We choose to use a single HashMap objectInspectorCache to cache all situations for efficiency and code
* simplicity. And we don't expect a case that a user need to create 2 or more different types of
* ObjectInspectors for the same Java type.
*/
public enum ObjectInspectorOptions {
JAVA,
THRIFT
};
private static HashMap<Type, ObjectInspector> objectInspectorCache = new HashMap<Type, ObjectInspector>();
public static ObjectInspector getReflectionObjectInspector(Type t, ObjectInspectorOptions options) {
ObjectInspector oi = objectInspectorCache.get(t);
if (oi == null) {
oi = getReflectionObjectInspectorNoCache(t, options);
objectInspectorCache.put(t, oi);
}
if ((options.equals(ObjectInspectorOptions.JAVA) && oi.getClass().equals(ThriftStructObjectInspector.class))
|| (options.equals(ObjectInspectorOptions.THRIFT) && oi.getClass().equals(ReflectionStructObjectInspector.class))) {
throw new RuntimeException("Cannot call getObjectInspectorByReflection with both JAVA and THRIFT !");
}
return oi;
}
private static ObjectInspector getReflectionObjectInspectorNoCache(Type t, ObjectInspectorOptions options) {
if (t instanceof GenericArrayType) {
GenericArrayType at = (GenericArrayType)t;
return getStandardListObjectInspector(
getReflectionObjectInspector(at.getGenericComponentType(), options));
}
if (t instanceof ParameterizedType) {
ParameterizedType pt = (ParameterizedType)t;
// List?
if (List.class.isAssignableFrom((Class<?>)pt.getRawType())) {
return getStandardListObjectInspector(
getReflectionObjectInspector(pt.getActualTypeArguments()[0], options));
}
// Map?
if (Map.class.isAssignableFrom((Class<?>)pt.getRawType())) {
return getStandardMapObjectInspector(
getReflectionObjectInspector(pt.getActualTypeArguments()[0], options),
getReflectionObjectInspector(pt.getActualTypeArguments()[1], options));
}
// Otherwise convert t to RawType so we will fall into the following if block.
t = pt.getRawType();
}
// Must be a class.
if (!(t instanceof Class)) {
throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error.");
}
Class<?> c = (Class<?>)t;
// Primitive?
if (ObjectInspectorUtils.isPrimitiveClass(c)) {
return getStandardPrimitiveObjectInspector(c);
}
// Must be struct because List and Map need to be ParameterizedType
assert(!List.class.isAssignableFrom(c));
assert(!Map.class.isAssignableFrom(c));
// Create StructObjectInspector
ReflectionStructObjectInspector oi;
switch(options) {
case JAVA:
oi = new ReflectionStructObjectInspector();
break;
case THRIFT:
oi = new ThriftStructObjectInspector();
break;
default:
throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error.");
}
// put it into the cache BEFORE it is initialized to make sure we can catch recursive types.
objectInspectorCache.put(t, oi);
Field[] fields = c.getDeclaredFields();
ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(fields.length);
for(int i=0; i<fields.length; i++) {
if (!oi.shouldIgnoreField(fields[i].getName())) {
structFieldObjectInspectors.add(getReflectionObjectInspector(fields[i].getGenericType(), options));
}
}
oi.init(c, structFieldObjectInspectors);
return oi;
}
private static HashMap<Class<?>, StandardPrimitiveObjectInspector> cachedStandardPrimitiveInspectorCache = new HashMap<Class<?>, StandardPrimitiveObjectInspector>();
public static StandardPrimitiveObjectInspector getStandardPrimitiveObjectInspector(Class<?> c) {
c = ObjectInspectorUtils.generalizePrimitive(c);
StandardPrimitiveObjectInspector result = cachedStandardPrimitiveInspectorCache.get(c);
if (result == null) {
result = new StandardPrimitiveObjectInspector(c);
cachedStandardPrimitiveInspectorCache.put(c, result);
}
return result;
}
static HashMap<ObjectInspector, StandardListObjectInspector> cachedStandardListObjectInspector =
new HashMap<ObjectInspector, StandardListObjectInspector>();
public static StandardListObjectInspector getStandardListObjectInspector(ObjectInspector listElementObjectInspector) {
StandardListObjectInspector result = cachedStandardListObjectInspector.get(listElementObjectInspector);
if (result == null) {
result = new StandardListObjectInspector(listElementObjectInspector);
cachedStandardListObjectInspector.put(listElementObjectInspector, result);
}
return result;
}
static HashMap<List<ObjectInspector>, StandardMapObjectInspector> cachedStandardMapObjectInspector =
new HashMap<List<ObjectInspector>, StandardMapObjectInspector>();
public static StandardMapObjectInspector getStandardMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
ArrayList<ObjectInspector> signature = new ArrayList<ObjectInspector>(2);
signature.add(mapKeyObjectInspector);
signature.add(mapValueObjectInspector);
StandardMapObjectInspector result = cachedStandardMapObjectInspector.get(signature);
if (result == null) {
result = new StandardMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector);
cachedStandardMapObjectInspector.put(signature, result);
}
return result;
}
static HashMap<ArrayList<List<?>>, StandardStructObjectInspector> cachedStandardStructObjectInspector =
new HashMap<ArrayList<List<?>>, StandardStructObjectInspector>();
public static StandardStructObjectInspector getStandardStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
ArrayList<List<?>> signature = new ArrayList<List<?>>();
signature.add(structFieldNames);
signature.add(structFieldObjectInspectors);
StandardStructObjectInspector result = cachedStandardStructObjectInspector.get(signature);
if (result == null) {
result = new StandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
cachedStandardStructObjectInspector.put(signature, result);
}
return result;
}
static HashMap<ArrayList<List<?>>, LazySimpleStructObjectInspector> cachedLazySimpleStructObjectInspector =
new HashMap<ArrayList<List<?>>, LazySimpleStructObjectInspector>();
public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
ArrayList<List<?>> signature = new ArrayList<List<?>>();
signature.add(structFieldNames);
signature.add(structFieldObjectInspectors);
LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector.get(signature);
if (result == null) {
result = new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors);
cachedLazySimpleStructObjectInspector.put(signature, result);
}
return result;
}
static HashMap<List<StructObjectInspector>, UnionStructObjectInspector> cachedUnionStructObjectInspector =
new HashMap<List<StructObjectInspector>, UnionStructObjectInspector>();
public static UnionStructObjectInspector getUnionStructObjectInspector(List<StructObjectInspector> structObjectInspectors) {
UnionStructObjectInspector result = cachedUnionStructObjectInspector.get(structObjectInspectors);
if (result == null) {
result = new UnionStructObjectInspector(structObjectInspectors);
cachedUnionStructObjectInspector.put(structObjectInspectors, result);
}
return result;
}
}
ObjectInspectorUtils
ObjectInspectorUtils 提供了各种工具方法
isPrimitiveClass 判断 class 是否是基本类型。
public static boolean isPrimitiveClass(Class<?> c) {
return ((c == String.class) || (c == Boolean.class) ||
(c == Character.class) || (c == java.sql.Date.class) ||
java.lang.Number.class.isAssignableFrom(c) || (c == Void.class) ||
c.isPrimitive());
}
generalizePrimitive 用于得到泛化的基本类型
public static Class<?> generalizePrimitive(Class<?> primitiveClass) {
if (primitiveClass == Boolean.TYPE) primitiveClass = Boolean.class;
if (primitiveClass == Byte.TYPE) primitiveClass = Byte.class;
if (primitiveClass == Character.TYPE) primitiveClass = Character.class;
if (primitiveClass == Short.TYPE) primitiveClass = Short.class;
if (primitiveClass == Integer.TYPE) primitiveClass = Integer.class;
if (primitiveClass == Long.TYPE) primitiveClass = Long.class;
if (primitiveClass == Float.TYPE) primitiveClass = Float.class;
if (primitiveClass == Double.TYPE) primitiveClass = Double.class;
if (primitiveClass == Void.TYPE) primitiveClass = Void.class;
return primitiveClass;
}
classToTypeName 提供了 className 和 hive type 的对应关系
public static final Map<Class<?>, String> classToTypeName = new HashMap<Class<?>, String>();
static {
classToTypeName.put(Boolean.class, org.apache.hadoop.hive.serde.Constants.BOOLEAN_TYPE_NAME);
classToTypeName.put(Byte.class, org.apache.hadoop.hive.serde.Constants.TINYINT_TYPE_NAME);
classToTypeName.put(Short.class, org.apache.hadoop.hive.serde.Constants.SMALLINT_TYPE_NAME);
classToTypeName.put(Integer.class, org.apache.hadoop.hive.serde.Constants.INT_TYPE_NAME);
classToTypeName.put(Long.class, org.apache.hadoop.hive.serde.Constants.BIGINT_TYPE_NAME);
classToTypeName.put(Float.class, org.apache.hadoop.hive.serde.Constants.FLOAT_TYPE_NAME);
classToTypeName.put(Double.class, org.apache.hadoop.hive.serde.Constants.DOUBLE_TYPE_NAME);
classToTypeName.put(String.class, org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME);
classToTypeName.put(java.sql.Date.class, org.apache.hadoop.hive.serde.Constants.DATE_TYPE_NAME);
}
typeNameToClass 提供 hive type 和 className 的对应关系,与 classToTypeName 相反
/**
* The mapping from type name in DDL to the Java class.
*/
public static final Map<String, Class<?>> typeNameToClass = new HashMap<String, Class<?>>();
static {
typeNameToClass.put(Constants.BOOLEAN_TYPE_NAME, Boolean.class);
typeNameToClass.put(Constants.TINYINT_TYPE_NAME, Byte.class);
typeNameToClass.put(Constants.SMALLINT_TYPE_NAME, Short.class);
typeNameToClass.put(Constants.INT_TYPE_NAME, Integer.class);
typeNameToClass.put(Constants.BIGINT_TYPE_NAME, Long.class);
typeNameToClass.put(Constants.FLOAT_TYPE_NAME, Float.class);
typeNameToClass.put(Constants.DOUBLE_TYPE_NAME, Double.class);
typeNameToClass.put(Constants.STRING_TYPE_NAME, String.class);
typeNameToClass.put(Constants.DATE_TYPE_NAME, java.sql.Date.class);
// These types are not supported yet.
// TypeNameToClass.put(Constants.DATETIME_TYPE_NAME);
// TypeNameToClass.put(Constants.TIMESTAMP_TYPE_NAME);
}
getClassShortName 返回一个类的短名称
public static String getClassShortName(Class<?> classObject) {
String result = classToTypeName.get(classObject);
if (result == null) {
result = classObject.getName();
LOG.warn("unsupported class: " + result);
// Remove prefix
String prefix = "java.lang.";
if (result.startsWith(prefix)) {
result = result.substring(prefix.length());
}
}
return result;
}
getIntegerArray 返回指定参数大小的 list,如参数为 2,则返回的 list 大小为2,有 “0”,“1” 两个元素
static ArrayList<ArrayList<String>> integerArrayCache = new ArrayList<ArrayList<String>>();
/**
* Returns an array of Integer strings, starting from "0".
* This function caches the arrays to provide a better performance.
*/
public static ArrayList<String> getIntegerArray(int size) {
while (integerArrayCache.size() <= size) {
integerArrayCache.add(null);
}
ArrayList<String> result = integerArrayCache.get(size);
if (result == null) {
result = new ArrayList<String>();
for (int i=0; i<size; i++) {
result.add(Integer.valueOf(i).toString());
}
integerArrayCache.set(size, result);
}
return result;
}
getIntegerCSV 返回指定大小的 csv,该 csv string 是从 0 到指定参数组成的。
static ArrayList<String> integerCSVCache = new ArrayList<String>();
public static String getIntegerCSV(int size) {
while (integerCSVCache.size() <= size) {
integerCSVCache.add(null);
}
String result = integerCSVCache.get(size);
if (result == null) {
StringBuilder sb = new StringBuilder();
for(int i=0; i<size; i++) {
if (i>0) sb.append(",");
sb.append("" + i);
}
result = sb.toString();
integerCSVCache.set(size, result);
}
return result;
}
getStandardObject
得到 o 的标准对象,如果 o 不是原子类型,则进行递归调用。
public static Object getStandardObject(Object o, ObjectInspector oi) {
if (o == null) {
return null;
}
Object result = null;
switch (oi.getCategory()) {
case PRIMITIVE: {
result = o;
break;
}
case LIST: {
ListObjectInspector loi = (ListObjectInspector)oi;
int length = loi.getListLength(o);
ArrayList<Object> list = new ArrayList<Object>(length);
for(int i=0; i<length; i++) {
list.add(getStandardObject(
loi.getListElement(o, i),
loi.getListElementObjectInspector()));
}
result = list;
break;
}
case MAP: {
MapObjectInspector moi = (MapObjectInspector)oi;
HashMap<Object, Object> map = new HashMap<Object, Object>();
Map<? extends Object, ? extends Object> omap = moi.getMap(o);
for(Map.Entry<? extends Object, ? extends Object> entry: omap.entrySet()) {
map.put(getStandardObject(entry.getKey(), moi.getMapKeyObjectInspector()),
getStandardObject(entry.getValue(), moi.getMapValueObjectInspector()));
}
result = map;
break;
}
case STRUCT: {
StructObjectInspector soi = (StructObjectInspector)oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
ArrayList<Object> struct = new ArrayList<Object>(fields.size());
for(StructField f : fields) {
struct.add(getStandardObject(soi.getStructFieldData(o, f), f.getFieldObjectInspector()));
}
result = struct;
break;
}
default: {
throw new RuntimeException("Unknown ObjectInspector category!");
}
}
return result;
}