需求:查分outresult字段生成多个列
代码:
1.udtf部分
import com.alibaba.fastjson.JSONException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
/**
* 根据 outresult 进行解析
* 继承org.apache.hadoop.hive.ql.udf.generic.GenericUDTF,
* 实现initialize, process, close三个方法。
*/
public class ExplodeUDTF extends GenericUDTF {
//用于规定列名
private ArrayList<String> colName = new ArrayList();
//用于指定列类型
private ArrayList<ObjectInspector> resType = new ArrayList<>();
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
if (argOIs.length != 1) {
throw new UDFArgumentLengthException(
"有且只能传入一个参数");
}
if (argOIs[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentException(
"参数类型不匹配");
}
//新增字段的列名
add_colName("col_1");
add_colName("col_2");
add_colName("col_3");
add_colName("col_4");
//新增字段的类型
resType.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
resType.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
resType.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
resType.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
//定义返回的列名和列类型
return ObjectInspectorFactory.getStandardStructObjectInspector(colName,resType);
}
// 此方法即为输入字段的处理逻辑
@Override
public void process(Object[] obj) throws HiveException {
String input = obj[0].toString();
if (StringUtils.isBlank(input)){
return;
}else try{
//创建JSONObject数组,用于接收拿到的json对象
Object[] res = new Object[4];
//因业务需求,不多赘述
res[0] = ''
res[1] = ''
//写出结果
forward(res);
}catch (JSONException e){
e.printStackTrace();
}
}
//对需要清理的方法进行清理,未进行任何操作
@Override
public void close() throws HiveException {
}
}
2.调用部分
public class ExplodeTest {
public static void main(String[] args) {
SparkSession spark = SparkSession.builder()
//.master("local[*]")
.appName("OutResultTest")
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.config("spark.sql.parquet.writeLegacyFormat", true)
.enableHiveSupport()
.getOrCreate();
//读取sql文件
InputStreamReader isr = new InputStreamReader(Run2Pid.class.getResourceAsStream("/pid2.sql"), StandardCharsets.UTF_8);
String[] pidsql =new BufferedReader(isr).lines().collect(Collectors.joining("\n")).split(";");
Dataset<Row> test = spark.sql(pidsql[1]);
test.registerTempTable("test");
//使用sql的方式注册udtf
spark.sql("CREATE TEMPORARY FUNCTION myudtf as 'com.aaa.udf.ExplodeUDTF'");
Dataset<Row> test2 = spark.sql(pidsql[2]);
test2.registerTempTable("test2");
spark.sql(pidsql[3]);
}
}
若后续想列转行,可以和lateral view 联用,实现列转行。
可参考:
Lateral View语法应用