数据集,Json数组
NJBK 2022-04-07 17:13:52 {"inst_id":"NJBK","info_map":"[{\"foreign_loan_prod_code_name\":\"单期贷\",\"ant_credit_prod_code\":\"A91\"},{\"foreign_loan_prod_code_name\":\"分期贷\",\"ant_credit_prod_code\":\"B92\"}]"}
BJBK 2022-04-01 15:11:45 {"inst_id":"BJBK","info_map":"[{\"foreign_loan_prod_code_name\":\"单期贷\",\"ant_credit_prod_code\":\"A91\"},{\"foreign_loan_prod_code_name\":\"分期贷\",\"ant_credit_prod_code\":\"B92\"}]"}
Scala-Json解析代码
import java.util
import org.apache.flink.api.common.state._
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import com.alibaba.fastjson.{JSON, JSONArray, JSONObject, parser}
import org.apache.flink.table.descriptors.Json
import org.apache.flink.util.{Collector,CollectionUtil}
import org.apache.flink.streaming.api.functions.sink.{SinkFunction}
case class JsonData( orgId: String, timeCreate: String, jsonAarray: JSONArray )
object FlinkJson {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val inputPath = "/Users/IdeaProjects/flinktest1/src/main/resources/json.txt"
val inputStream = env.readTextFile(inputPath)
val dataStream = inputStream
.map( data => {
val arr = data.split("\t")
val timeCreate = arr(1).substring(0, 10)
val jsonObj: JSONObject = JSON.parseObject(arr(2))
val value: String = jsonObj.getString("info_map")
val value2: JSONArray = JSON.parseArray(value)
JsonData(arr(0), timeCreate, value2)
}).flatMap({
(in, out: Collector[(String, String, String, String)]) =>
in.jsonAarray
.forEach(data =>
out.collect((in.orgId, in.timeCreate
,data.asInstanceOf[JSONObject].getString("foreign_loan_prod_code_name")
,data.asInstanceOf[JSONObject].getString("ant_credit_prod_code")))
)
})
dataStream.print()
env.execute("state test")
}
}
import org.apache.flink.api.common.state._
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import com.alibaba.fastjson.{JSON, JSONArray, JSONObject, parser}
import org.apache.flink.util.Collector
object FlinkJson {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val inputPath = "/Users/IdeaProjects/flinktest1/src/main/resources/json.txt"
val inputStream = env.readTextFile(inputPath)
val dataStream = inputStream
.flatMap( (in, out: Collector[(String, String, String, String)]) => {
val arr = in.split("\t")
val timeCreate = arr(1).substring(0, 10)
val jsonObj: JSONObject = JSON.parseObject(arr(2))
val value: String = jsonObj.getString("info_map")
val value2: JSONArray = JSON.parseArray(value)
for (a <- 0 to value2.size()-1){
val object1 = JSON.parseObject(value2.get(a).toString)
out.collect((arr(0), timeCreate
,object1.getString("foreign_loan_prod_code_name")
,object1.getString("ant_credit_prod_code")))
}
})
dataStream.print()
env.execute("state test")
}
}
Java-Json解析代码
import com.alibaba.fastjson.*;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import java.lang.Integer;
import java.sql.Time;
import java.text.SimpleDateFormat;
public class FlinkJson {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env =new StreamExecutionEnvironment().getExecutionEnvironment();
env.setParallelism(1);
String inputPath = "/Users/IdeaProjects/flinktest1/src/main/resources/json.txt";
DataStreamSource<String> inputStream = env.readTextFile(inputPath);
SingleOutputStreamOperator<Tuple4<String, String, String, String>> dataStream =
inputStream.flatMap((String line, Collector<Tuple4<String, String, String, String>> out) -> {
String[] words = line.split("\t");
String timeCreate = words[1].substring(0, 10);
JSONObject jsonObj= JSON.parseObject(words[2]);
String value = jsonObj.getString("info_map");
JSONArray jsonArray = JSON.parseArray((String) value);
for (int i = 0; i <jsonArray.size() ; i++) {
JSONObject object1 = JSONArray.parseObject(jsonArray.get(i).toString());
out.collect(Tuple4.of(words[0], timeCreate
,object1.getString("foreign_loan_prod_code_name")
,object1.getString("ant_credit_prod_code")
));
}
jsonArray.forEach( (e)-> {
JSONObject object1 = JSONArray.parseObject(e.toString());
out.collect(Tuple4.of(words[0], timeCreate
,object1.getString("foreign_loan_prod_code_name")
,object1.getString("ant_credit_prod_code")
));
});
})
.returns(Types.TUPLE(Types.STRING, Types.STRING, Types.STRING, Types.STRING))
.filter(r -> r.f1.compareTo("2022-05-01")>=0);
dataStream.print();
env.execute();
}
}