spark UDF使用map作为形参,需要继承 GenericUDF
class TestUDF extends GenericUDF {
// 0. ObjectInspector,通常以成员变量的形式被创建
MapObjectInspector mapOI;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
// 1. 检查该记录是否传过来正确的参数数量
if (arguments.length != 1) {
throw new UDFArgumentLengthException("arguments only one");
}
// 2. 检查该条记录是否传过来正确的参数类型
ObjectInspector a = arguments[0];
if (!(a instanceof MapObjectInspector)) {
throw new UDFArgumentException("argument must be a map<string,string>");
}
// 3. 检查通过后,将参数赋值给成员变量ObjectInspector,为了在evaluate()中使用
this.mapOI= (MapObjectInspector) a;
// 4. 检查map的值和value是否均为string
if(!(this.mapOI.getMapKeyObjectInspector() instanceof StringObjectInspector)) {
throw new UDFArgumentException("argument must be a map<string,string>");
}
if(!(this.mapOI.getMapValueObjectInspector() instanceof StringObjectInspector)) {
throw new UDFArgumentException("argument must be a map<string,string>");
}
// 5. 用工厂类生成用于表示返回值的ObjectInspector,这里返回字符串
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
// 获取map
Map<String,String> map= (Map<String,String>) this.mapOI.getMap(arguments[0].get());
// 检查null
if (map == null) {
return null;
}
return map.toString();
}
@Override
public String getDisplayString(String[] args) {
return args[0];
}
参考文档
GenericUDF使用详解