flinksql对于解析json字符串的性能存在性能瓶颈问题,每次解析都需要创建一次对象,而udtf函数只需要创建一次就可以解决这个问题,网上对于udtf的样例很少,现在我写一个真实场景样例供大家学习,
需求是解析json字符串的某一些字段,通过传入解析字段字符串来解决需要解析的字段问题
代码如下
package part_11;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.annotation.FunctionHint;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;
@FunctionHint(output = @DataTypeHint("ROW<l1 STRING, l2 STRING,l3 STRING,l4 STRING,l5 STRING,l6 STRING,l7 STRING,l8 STRING,l9 STRING,l10 STRING,l11 STRING, l12 STRING,l13 STRING,l14 STRING,l15 STRING,l16 STRING,l17 STRING,l18 STRING,l19 STRING,l20 STRING,l21 STRING, l22 STRING,l23 STRING,l24 STRING,l25 STRING,l26 STRING,l27 STRING,l28 STRING,l29 STRING,l30 STRING,l31 STRING, l32 STRING,l33 STRING,l34 STRING,l35 STRING,l36 STRING,l37 STRING,l38 STRING,l39 STRING,l40 STRING,l41 STRING, l42 STRING,l43 STRING,l44 STRING,l45 STRING,l46 STRING,l47 STRING,l48 STRING,l49 STRING,l50 STRING>"))
public class Udtf extends TableFunction<Row> {
// 返回是 void,用 collect方法输出
public void eval(String json, String line) throws Exception {
com.fasterxml.jackson.databind.ObjectMapper objectMapper = new ObjectMapper();
JsonNode jsonNode = objectMapper.readTree(json);
String[] col = line.split(",");
String c1 = jsonNode.path(col[0]).asText();
//System.out.println(c1);
String c2 = jsonNode.path(col[1]).asText();
//System.out.println(c2);
String c3 = jsonNode.path(col[2]).asText();
String c4 = jsonNode.path(col[3]).asText();
String c5 = jsonNode.path(col[4]).asText();
//System.out.println(c5);
String c6 = jsonNode.path(col[5]).asText();
String c7 = jsonNode.path(col[6]).asText();
String c8 = jsonNode.path(col[7]).asText();
String c9 = jsonNode.path(col[8]).asText();
String c10 = jsonNode.path(col[9]).asText();
String c11 = jsonNode.path(col[10]).asText();
String c12 = jsonNode.path(col[11]).asText();
String c13 = jsonNode.path(col[12]).asText();
String c14 = jsonNode.path(col[13]).asText();
String c15 = jsonNode.path(col[14]).asText();
String c16 = jsonNode.path(col[15]).asText();
String c17 = jsonNode.path(col[16]).asText();
String c18 = jsonNode.path(col[17]).asText();
String c19 = jsonNode.path(col[18]).asText();
String c20 = jsonNode.path(col[19]).asText();
String c21 = jsonNode.path(col[20]).asText();
String c22 = jsonNode.path(col[21]).asText();
String c23 = jsonNode.path(col[22]).asText();
String c24 = jsonNode.path(col[23]).asText();
String c25 = jsonNode.path(col[24]).asText();
String c26 = jsonNode.path(col[25]).asText();
String c27 = jsonNode.path(col[26]).asText();
String c28 = jsonNode.path(col[27]).asText();
String c29 = jsonNode.path(col[28]).asText();
String c30 = jsonNode.path(col[29]).asText();
String c31 = jsonNode.path(col[30]).asText();
String c32 = jsonNode.path(col[31]).asText();
String c33 = jsonNode.path(col[32]).asText();
String c34 = jsonNode.path(col[33]).asText();
String c35 = jsonNode.path(col[34]).asText();
String c36 = jsonNode.path(col[35]).asText();
String c37 = jsonNode.path(col[36]).asText();
String c38 = jsonNode.path(col[37]).asText();
String c39 = jsonNode.path(col[38]).asText();
String c40 = jsonNode.path(col[39]).asText();
String c41 = jsonNode.path(col[40]).asText();
String c42 = jsonNode.path(col[41]).asText();
String c43 = jsonNode.path(col[42]).asText();
String c44 = jsonNode.path(col[43]).asText();
String c45 = jsonNode.path(col[44]).asText();
String c46 = jsonNode.path(col[45]).asText();
String c47 = jsonNode.path(col[46]).asText();
String c48 = jsonNode.path(col[47]).asText();
String c49 = jsonNode.path(col[48]).asText();
String c50 = jsonNode.path(col[49]).asText();
collect(Row.of(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17
, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37, c38, c39, c40,
c41, c42, c43, c44, c45, c46, c47, c48, c49, c50));
}
}
我这里是传入50个字段,对于不需要解析的字段可以后续在落表时直接去掉就可以
下面的是怎么调用样例,后续需要打成jar包去服务器上跑一下
package part_11;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.annotation.FunctionHint;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;
import static org.apache.flink.table.api.Expressions.$;
/**
* TODO
*
* @author cjp
* @version 1.0
*/
public class Udtf1 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> strDS = env.fromElements(
// "hello flink",
// "hello world hi",
// "hello java"
"{\"name\":\"John\", \"age\":\"30\", \"city\":\"NewYork\"}"
, "{\"name\":\"sdksk\", \"age\":\"90\", \"city\":\"lkdklsalk\"}",
"{}"
);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
String line = "name,age,city,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26,a27,a28,a29,a30,a31,a32,a33,a34,a35,a36,a37,a38,a39,a40,a41,a42,a43,a44,a45,a46,a47";
Table sensorTable = tableEnv.fromDataStream(strDS, $("words"));
tableEnv.createTemporaryView("str", sensorTable);
// TODO 2.注册函数
tableEnv.createTemporaryFunction("JsonFunction", JsonFunction.class);
// TODO 3.调用 自定义函数
// 3.1 交叉联结
tableEnv
// 3.1 交叉联结
.sqlQuery("select l1 as name,l2 as age,l3 as city,l4,l5,l6,l7,l8,l9,l10,l11 ,l12 ,l13 ,l14,l15,l16,l17,l18,l19,l20,l21 ,l22 ,l23 ,l24,l25,l26,l27,l28,l29,l30,l31 ,l32 ,l33 ,l34,l35,l36,l37,l38,l39,l40,l41 ,l42 ,l43 ,l44,l45,l46,l47,l48,l49,l50 from str,lateral table (JsonFunction(words, '" + line + "' ))")
// 3.2 带 on true 条件的 左联结
// .sqlQuery("select words,word,length from str left join lateral table(SplitFunction(words)) on true")
// 重命名侧向表中的字段
//.sqlQuery("select words,newWord,newLength from str left join lateral table(SplitFunction(words)) as T(newWord,newLength) on true")
.execute()
.print();
}
// TODO 1.继承 TableFunction<返回的类型>
// 类型标注: Row包含两个字段:word和length
@FunctionHint(output = @DataTypeHint("ROW<l1 STRING, l2 STRING,l3 STRING,l4 STRING,l5 STRING,l6 STRING,l7 STRING,l8 STRING,l9 STRING,l10 STRING,l11 STRING, l12 STRING,l13 STRING,l14 STRING,l15 STRING,l16 STRING,l17 STRING,l18 STRING,l19 STRING,l20 STRING,l21 STRING, l22 STRING,l23 STRING,l24 STRING,l25 STRING,l26 STRING,l27 STRING,l28 STRING,l29 STRING,l30 STRING,l31 STRING, l32 STRING,l33 STRING,l34 STRING,l35 STRING,l36 STRING,l37 STRING,l38 STRING,l39 STRING,l40 STRING,l41 STRING, l42 STRING,l43 STRING,l44 STRING,l45 STRING,l46 STRING,l47 STRING,l48 STRING,l49 STRING,l50 STRING>"))
public static class JsonFunction extends TableFunction<Row> {
// 返回是 void,用 collect方法输出
public void eval(String json, String line) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
JsonNode jsonNode = objectMapper.readTree(json);
String[] col = line.split(",");
String c1 = jsonNode.path(col[0]).asText();
//System.out.println(c1);
String c2 = jsonNode.path(col[1]).asText();
//System.out.println(c2);
String c3 = jsonNode.path(col[2]).asText();
String c4 = jsonNode.path(col[3]).asText();
String c5 = jsonNode.path(col[4]).asText();
//System.out.println(c5);
String c6 = jsonNode.path(col[5]).asText();
String c7 = jsonNode.path(col[6]).asText();
String c8 = jsonNode.path(col[7]).asText();
String c9 = jsonNode.path(col[8]).asText();
String c10 = jsonNode.path(col[9]).asText();
String c11 = jsonNode.path(col[10]).asText();
String c12 = jsonNode.path(col[11]).asText();
String c13 = jsonNode.path(col[12]).asText();
String c14 = jsonNode.path(col[13]).asText();
String c15 = jsonNode.path(col[14]).asText();
String c16 = jsonNode.path(col[15]).asText();
String c17 = jsonNode.path(col[16]).asText();
String c18 = jsonNode.path(col[17]).asText();
String c19 = jsonNode.path(col[18]).asText();
String c20 = jsonNode.path(col[19]).asText();
String c21 = jsonNode.path(col[20]).asText();
String c22 = jsonNode.path(col[21]).asText();
String c23 = jsonNode.path(col[22]).asText();
String c24 = jsonNode.path(col[23]).asText();
String c25 = jsonNode.path(col[24]).asText();
String c26 = jsonNode.path(col[25]).asText();
String c27 = jsonNode.path(col[26]).asText();
String c28 = jsonNode.path(col[27]).asText();
String c29 = jsonNode.path(col[28]).asText();
String c30 = jsonNode.path(col[29]).asText();
String c31 = jsonNode.path(col[30]).asText();
String c32 = jsonNode.path(col[31]).asText();
String c33 = jsonNode.path(col[32]).asText();
String c34 = jsonNode.path(col[33]).asText();
String c35 = jsonNode.path(col[34]).asText();
String c36 = jsonNode.path(col[35]).asText();
String c37 = jsonNode.path(col[36]).asText();
String c38 = jsonNode.path(col[37]).asText();
String c39 = jsonNode.path(col[38]).asText();
String c40 = jsonNode.path(col[39]).asText();
String c41 = jsonNode.path(col[40]).asText();
String c42 = jsonNode.path(col[41]).asText();
String c43 = jsonNode.path(col[42]).asText();
String c44 = jsonNode.path(col[43]).asText();
String c45 = jsonNode.path(col[44]).asText();
String c46 = jsonNode.path(col[45]).asText();
String c47 = jsonNode.path(col[46]).asText();
String c48 = jsonNode.path(col[47]).asText();
String c49 = jsonNode.path(col[48]).asText();
String c50 = jsonNode.path(col[49]).asText();
collect(Row.of(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17
, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37, c38, c39, c40,
c41, c42, c43, c44, c45, c46, c47, c48, c49, c50));
// for (String word : str.split(" ")) {
// collect(Row.of(word, word.length()));
// }
}
}
}
运行结果如下:



对于解析不出来的字段直接就是为空串可以避免null值影响
本案例供大家学习,网上资料真的少
1965

被折叠的 条评论
为什么被折叠?



