一、需求分析
订单场景,订单状态分为
// WHEN oms_order.order_state = 11 THEN '待支付'
// WHEN oms_order.order_state = 12 THEN '订购成功'
// WHEN oms_order.order_state = 21 THEN '订单已提交到厂家'
// WHEN oms_order.order_state = 22 THEN '厂家已接受订单'
// WHEN oms_order.order_state = 31 THEN '生产中'
// WHEN oms_order.order_state = 32 THEN '生产完成'
// WHEN oms_order.order_state = 41 THEN '车辆已发出'
// WHEN oms_order.order_state = 42 THEN '车辆已到店'
// WHEN oms_order.order_state = 51 THEN '已完成'
// WHEN oms_order.order_state = 52 THEN '已取消'
// WHEN oms_order.order_state = 53 THEN '已关闭'
// WHEN oms_order.order_state = 62 THEN '已退款'
需求是向业务部门实时推送订单的状态,且每人每订单每个状态只能推送一次
二、技术背景
实时数据存储在阿里云Datahub上,由于实时数据使用DTS获取的业务库的binlog,所以会有大量的重复数据,且有历史数据,由于要对用户进行短信触达,所以数据要保证不重复且及时,就需要舍弃历史数据,并对操作数据去重。然后拼装成业务侧需要的json格式,在推送到指定的消息队列,下游通过订阅消费。
技术路线:Mysql=>DTS=>DataHub=>Flink(Blink,MapState,Enum,Ontimer)=>DatahubSink
三、技术实现
1、pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.gwm</groupId>
<artifactId>blink_ae</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<scala.version>2.11.12</scala.version>
<scala.binary.version>2.11</scala.binary.version>
<blink.version>blink-3.3.0</blink.version>
<java.version>1.8</java.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<sdk.version>0.38.3-public</sdk.version>
</properties>
<dependencies>
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-core</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <!– <systemPath>${project.basedir}/lib/flink-core-blink-3.2.2.jar</systemPath>–>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <!– <systemPath>${project.basedir}/lib/flink-streaming-java_2.11-blink-3.2.2.jar</systemPath>–>-->
<!-- </dependency>-->
<!--打包udf自定义函数需要添加此依赖-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-table_${scala.binary.version}</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <!– <systemPath>${project.basedir}/lib/flink-table_2.11-blink-3.2.2.jar</systemPath>–>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-table-common</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- <!– <systemPath>${project.basedir}/lib/flink-table_2.11-blink-3.2.2.jar</systemPath>–>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-streaming-scala -->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-java</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-scala_${scala.binary.version}</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-jdbc -->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.blink</groupId>-->
<!-- <artifactId>flink-jdbc</artifactId>-->
<!-- <version>${blink.version}</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<dependency>
<groupId>com.alibaba.blink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${blink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-table-common -->
<dependency>
<groupId>com.alibaba.flink</groupId>
<artifactId>datahub-connector</artifactId>
<version>0.1-SNAPSHOT</version>
<classifier>jar-with-dependencies</classifier>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.34</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.1.1</version>
<scope>provided</scope>
</dependency>
<!-- <dependency>-->
<!-- <groupId>com.fasterxml.jackson.core</groupId>-->
<!-- <artifactId>jackson-databind</artifactId>-->
<!-- <version>2.11.2</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.fasterxml.jackson.core</groupId>-->
<!-- <artifactId>jackson-core</artifactId>-->
<!-- <version>2.11.2</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.fasterxml.jackson.core</groupId>-->
<!-- <artifactId>jackson-annotations</artifactId>-->
<!-- <version>2.11.2</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
</dependencies>
<!-- <build>-->
<!-- <plugins>-->
<!-- <plugin>-->
<!-- <groupId>org.apache.maven.plugins</groupId>-->
<!-- <artifactId>maven-shade-plugin</artifactId>-->
<!-- <version>3.4.1</version>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <phase>package</phase>-->
<!-- <goals>-->
<!-- <goal>shade</goal>-->
<!-- </goals>-->
<!-- <configuration>-->
<!-- <minimizeJar>true</minimizeJar>-->
<!-- <filters>-->
<!-- <filter>-->
<!-- <artifact>log4j:log4j</artifact>-->
<!-- <includes>-->
<!-- <include>**</include>-->
<!-- </includes>-->
<!-- </filter>-->
<!-- <filter>-->
<!-- <artifact>commons-logging:commons-logging</artifact>-->
<!-- <includes>-->
<!-- <include>**</include>-->
<!-- </includes>-->
<!-- </filter>-->
<!-- <filter>-->
<!-- <artifact>foo:bar</artifact>-->
<!-- <excludeDefaults>false</excludeDefaults>-->
<!-- <includes>-->
<!-- <include>foo/Bar.class</include>-->
<!-- </includes>-->
<!-- </filter>-->
<!-- </filters>-->
<!-- <relocations combine.self="override">-->
<!-- <relocation>-->
<!-- <pattern>org.glassfish.jersey</pattern>-->
<!-- <shadedPattern>com.alibaba.blink.shaded.datahub.org.glassfish.jersey</shadedPattern>-->
<!-- </relocation>-->
<!-- </relocations>-->
<!-- </configuration>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
<!-- </plugins>-->
<!-- </build>-->
<!-- 添加依赖打包 -->
<!-- <build>-->
<!-- <plugins>-->
<!-- <plugin>-->
<!-- <groupId>org.apache.maven.plugins</groupId>-->
<!-- <artifactId>maven-assembly-plugin</artifactId>-->
<!-- <version>3.1.1</version>-->
<!-- <configuration>-->
<!-- <archive>-->
<!-- <manifest>-->
<!-- <mainClass>com.gwm.driver.AeCollectorData</mainClass>-->
<!-- </manifest>-->
<!-- </archive>-->
<!-- <descriptorRefs>-->
<!-- <descriptorRef>jar-with-dependencies</descriptorRef>-->
<!-- </descriptorRefs>-->
<!-- </configuration>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <id>make-assembly</id>-->
<!-- <phase>package</phase>-->
<!-- <goals>-->
<!-- <goal>single</goal>-->
<!-- </goals>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
<!-- </plugins>-->
<!-- </build>-->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.0</version>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
</configuration>
<executions>
<execution>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<manifestEntries>
<Main-Class>com.gwm.driver.AeCollectorData</Main-Class>
<X-Compile-Source-JDK>${maven.compiler.source}</X-Compile-Source-JDK>
<X-Compile-Target-JDK>${maven.compiler.target}</X-Compile-Target-JDK>
</manifestEntries>
</transformer>
</transformers>
<relocations combine.self="override">
<relocation>
<pattern>org.glassfish.jersey</pattern>
<shadedPattern>com.alibaba.blink.shaded.datahub.org.glassfish.jersey</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
2、Enum类
package com.gwm.pojo;
/**
* @author yangyingchun
* @version 1.0
* @date 2023/2/24 8:53
*/
public enum OrderStage {
// WHEN oms_order.order_state = 11 THEN '待支付'
// WHEN oms_order.order_state = 12 THEN '订购成功'
// WHEN oms_order.order_state = 21 THEN '订单已提交到厂家'
// WHEN oms_order.order_state = 22 THEN '厂家已接受订单'
// WHEN oms_order.order_state = 31 THEN '生产中'
// WHEN oms_order.order_state = 32 THEN '生产完成'
// WHEN oms_order.order_state = 41 THEN '车辆已发出'
// WHEN oms_order.order_state = 42 THEN '车辆已到店'
// WHEN oms_order.order_state = 51 THEN '已完成'
// WHEN oms_order.order_state = 52 THEN '已取消'
// WHEN oms_order.order_state = 53 THEN '已关闭'
// WHEN oms_order.order_state = 62 THEN '已退款'
OrderStage11("待支付"),OrderStage12("订购成功"),
OrderStage21("订单已提交到厂家"),OrderStage22("厂家已接受订单"),
OrderStage31("生产中"),OrderStage32("生产完成"),
OrderStage41("车辆已发出"),OrderStage42("车辆已到店"),
OrderStage51("已完成"),OrderStage52("已取消"),
OrderStage53("已关闭"),OrderStage62("已退款"),;
private String chinese;
//枚举类型的构造函数默认为private,因为枚举类型的初始化要在当前枚举类中完成。
OrderStage (String chinese){
this.chinese= chinese;
}
public String getChinese(){
return chinese;
}
}
3、Class对象
package com.gwm.pojo;
import com.gwm.utils.getString;
import java.io.Serializable;
import java.math.BigInteger;
import java.util.Date;
/**
* @author yangyingchun
* @version 1.0
* @date 2023/2/20 13:31
*/
public class EventSuccessInfo implements Serializable {
private String phone ;
private String order_sn ;
private String brand;
private String car_model ;
private String action_time ;
private Double paid_amount ;
private String name ;
private String operation_flag ;
private String order_time;
private String order_state;
public EventSuccessInfo() {
}
public EventSuccessInfo(String phone, String order_sn, String brand, String car_model, String action_time, Double paid_amount, String name, String operation_flag, String order_time, String order_state) {
this.phone = phone;
this.order_sn = order_sn;
this.brand = brand;
this.car_model = car_model;
this.action_time = action_time;
this.paid_amount = paid_amount;
this.name = name;
this.operation_flag = operation_flag;
this.order_time = order_time;
this.order_state = order_state;
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
public String getOrder_sn() {
return order_sn;
}
public void setOrder_sn(String order_sn) {
this.order_sn = order_sn;
}
public String getBrand() {
return brand;
}
public void setBrand(String brand) {
this.brand = brand;
}
public String getCar_model() {
return car_model;
}
public void setCar_model(String car_model) {
this.car_model = car_model;
}
public String getAction_time() {
return action_time;
}
public void setAction_time(String action_time) {
this.action_time = action_time;
}
public Double getPaid_amount() {
return paid_amount;
}
public void setPaid_amount(Double paid_amount) {
this.paid_amount = paid_amount;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getOperation_flag() {
return operation_flag;
}
public void setOperation_flag(String operation_flag) {
this.operation_flag = operation_flag;
}
public String getOrder_time() {
return order_time;
}
public void setOrder_time(String order_time) {
this.order_time = order_time;
}
public String getOrder_state() {
return order_state;
}
public void setOrder_state(String order_state) {
this.order_state = order_state;
}
@Override
public String toString() {
return "EventSuccessInfo{" +
"phone='" + phone + '\'' +
", order_sn='" + order_sn + '\'' +
", brand='" + brand + '\'' +
", car_model='" + car_model + '\'' +
", action_time='" + action_time + '\'' +
", paid_amount=" + paid_amount +
", name='" + name + '\'' +
", operation_flag='" + operation_flag + '\'' +
", order_time='" + order_time + '\'' +
", order_state=" + order_state +
'}';
}
}
4、Main
package com.gwm.driver;
import com.alibaba.fastjson.JSON;
import com.alibaba.flink.connectors.datahub.datastream.source.DatahubSourceFunction;
import com.aliyun.datahub.client.model.RecordEntry;
import com.gwm.pojo.EventSuccessInfo;
import com.gwm.pojo.OrderStage;
import com.gwm.pojo.OrderState;
import com.gwm.utils.TimeToStampUtil;
import com.gwm.utils.getString;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichFilterFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.util.StringUtils;
import scala.Tuple4;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.UUID;
/**
* @author yangyingchun
* @version 1.0
* @date 2022/11/14 16:26
*/
public class OmsEventInfo {
//声明所有的链接信息
private static String endPoint = "http://datahub.cn-XXXXXXXX-d01.dh.res.cloud.gwm.cn";
//private static String endPoint ="public endpoint";//公网访问(填写内网Endpoint,就不用填写公网Endpoint)。
private static String projectName = "oms";
private static String topicSourceName = "dts_oms_ols_order";
// private static String topicSourceName = "dts_oms_uat_ols_order";
private static String accessId = "5jNE06XXXXXleYil";
private static String accessKey = "Ehsl9n3XXXXXXXCFAaTbm47uHtWQP";
//设置消费的启动位点对应的时间。TimeToStampUtil.timeToStamp("2021-12-21") 此时间至少为当前时间
// private static Long datahubStartInMs = TimeToStampUtil.timeToStamp("2023-02-20");
private static Long datahubStartInMs = System.currentTimeMillis();
private static Long datahubEndInMs=Long.MAX_VALUE;
//时间格式化备用
private static SimpleDateFormat sd = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private static SimpleDateFormat sd1 = new SimpleDateFormat("yyyy-MM-dd");
//设置数据时间的对比值,这里主要为了实现数据的业务时间要大于等于起始时间,并且每天凌晨需要进行重置
private static Date startDate;
static {
try {
//初始化赋值
startDate = sd1.parse(sd.format(new Date()));
} catch (ParseException e) {
e.printStackTrace();
}
}
;
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
String configFilePath = params.get("configFile");
Properties properties = new Properties();
// properties.load(new StringReader(new String(Files.readAllBytes(Paths.get(configFilePath, new String[0])), StandardCharsets.UTF_8)));
if (params.has("blink.job.startTimer")){
datahubStartInMs = TimeToStampUtil.timeToStamp((String) properties.get("blink.job.startTimer"));
}else {
System.out.println("datahubStartInMs not set , use default parameter : System.currentTimeMillis");
}
if (params.has("blink.job.endTimer")){
datahubEndInMs = TimeToStampUtil.timeToStamp((String) properties.get("blink.job.endTimer"));
}else {
System.out.println("datahubEndInMs not set , use default parameter : Long.MAX_VALUE");
}
if (params.has("blink.job.endPoint")){
endPoint = (String) properties.get("blink.job.endPoint");
}else {
System.out.println("endPoint not set , use default parameter : http://datahub.cn-baoding-gwmcloud-d01.dh.res.cloud.gwm.cn");
}
if (params.has("blink.job.projectName")){
projectName = (String)properties.get("blink.job.projectName");
}else {
System.out.println("projectName not set , use default parameter : OMS");
}
if (params.has("blink.job.topicSourceName")){
topicSourceName = (String)properties.get("blink.job.topicSourceName");
}else {
System.out.println("topicSourceName not set , use default parameter : dts_oms_uat_ols_order");
}
if (params.has("blink.job.accessId")){
accessId = (String)properties.get("blink.job.accessId");
}else {
System.out.println("accessId not set , use default parameter : 5jNE06TXXXXXXXXXXXXXX8SPHleYil");
}
if (params.has("blink.job.accessKey")){
accessKey = (String)properties.get("blink.job.accessKey");
}else {
System.out.println("accessKey not set , use default parameter : Ehsl9n3xhk0XXXXXXXXXXXWvdxCFAaTbm47uHtWQP");
}
//初始化环境入口
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(3600000L);
// env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
// env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000L));
//并行度与datahub shard数据一致
env.setParallelism(5);
//获取datastream
DataStreamSource<List<RecordEntry>> aedata = env.addSource(
new DatahubSourceFunction(
endPoint,
projectName,
topicSourceName,
accessId,
accessKey,
datahubStartInMs,
datahubEndInMs,
20L,
1000L,
1000
));
//将datasteam转化为Class类,且增加一些其他相关信息
DataStream<Tuple4<String, EventSuccessInfo, String, Long>> aecollectordataDataStream = aedata.flatMap(new FlatMapFunction<List<RecordEntry>, Tuple4<String, EventSuccessInfo, String, Long>>() {
@Override
public void flatMap(List<RecordEntry> value, Collector<Tuple4<String, EventSuccessInfo, String, Long>> out) throws Exception {
for (RecordEntry recordEntry : value) {
String phone = getString.getString(recordEntry, "customer_phone");
String order_sn = getString.getString(recordEntry, "order_no");
String brand_name = getString.getString(recordEntry, "brand");
String car_model = getString.getString(recordEntry, "car_model");
String action_time = "null".equals(getString.getString(recordEntry, "update_time"))||"".equals(getString.getString(recordEntry, "update_time"))?null:
sd.format(new Date(Long.parseLong(getString.getString(recordEntry, "update_time"))/1000));
Double paid_amount = "null".equals(getString.getString(recordEntry, "paid_amount"))?null:
Double.parseDouble(getString.getString(recordEntry, "paid_amount"));
String name = getString.getString(recordEntry, "customer_name");
String operation_flag = getString.getString(recordEntry, "new_dts_sync_dts_after_flag");
String order_time = "null".equals(getString.getString(recordEntry, "order_time"))||"".equals(getString.getString(recordEntry, "order_time"))?null:
sd.format(new Date(Long.parseLong(getString.getString(recordEntry, "order_time"))/1000));
String order_state = getString.getString(recordEntry, "order_state"); //'订购成功'
String trans_order_state = null;
//通过枚举类赋值
switch (order_state) {
case "11":
trans_order_state= OrderStage.OrderStage11.getChinese();
break;
case "12":
trans_order_state= OrderStage.OrderStage12.getChinese();
break;
case "21":
trans_order_state= OrderStage.OrderStage21.getChinese();
break;
case "22":
trans_order_state= OrderStage.OrderStage22.getChinese();
break;
case "31":
trans_order_state= OrderStage.OrderStage31.getChinese();
break;
case "32":
trans_order_state= OrderStage.OrderStage32.getChinese();
break;
case "41":
trans_order_state= OrderStage.OrderStage41.getChinese();
break;
case "42":
trans_order_state= OrderStage.OrderStage42.getChinese();
break;
case "51":
trans_order_state= OrderStage.OrderStage51.getChinese();
break;
case "52":
trans_order_state= OrderStage.OrderStage52.getChinese();
break;
case "53":
trans_order_state= OrderStage.OrderStage53.getChinese();
break;
case "62":
trans_order_state= OrderStage.OrderStage62.getChinese();
break;
}
//业务对比的时间信息
Date add_time =
"null".equals(getString.getString(recordEntry, "order_time"))||"".equals(getString.getString(recordEntry, "order_time"))
?null
:new Date(Long.parseLong(getString.getString(recordEntry, "order_time")) / 1000);
// startDate = sd1.parse(sd.format(new Date()));
System.out.println(order_state+"====startDate:"+startDate+"====paid_at:"+order_time+"=====phone:"+phone);
//这里有三个问题,
// 1、技术+业务:因为获取的是数据库操作日志,所以数据是重复的,(MA已经做了重复校验,确保不会重复发且无时效性)
// 2、技术:如果操作了历史数据,且用户的订单状态恰好还是订购成功时,也会触达,是不是要加限制,加的话加什么合适,
// 新增且当天(很多数据是获取不到时间的)?还是所有时间都推,再ma测加一个时间的控制条件
// 结论:空的也要,
// 3、业务:需要明确订购成功的规则,否则极易造成异常, order_state=12当前是订购成功 能复用吗
if (
// "12".equals(order_state)&&
//由于是binlog只获取插入或修改后的数据数据情况如下
//情况 标志位 更新前标志 更新后标志
//1 I N Y
//2 U N Y
//2 U Y N
//3 D N Y
//3 D Y N
"Y".equals(operation_flag)&&
//对比时间为空的去掉
!StringUtils.isNullOrWhitespaceOnly(order_time)&&
//业务时间字段要比初始化时间晚的数据才有意义
add_time.after(startDate)
){
EventSuccessInfo eventSuccessInfo = new EventSuccessInfo(
phone
, order_sn
, brand_name
, car_model
, action_time
, paid_amount
, name
, operation_flag
,order_time
,trans_order_state
);
// System.out.println(eventSuccessInfo);
Tuple4<String, EventSuccessInfo, String, Long> tuple4
= new Tuple4<String, EventSuccessInfo, String, Long>(
"test_event_order_success"
,eventSuccessInfo
, UUID.randomUUID().toString().replace("-","")
,System.currentTimeMillis()
);
out.collect(tuple4);
}
}
}
});
//为去重做准备
KeyedStream<Tuple4<String, EventSuccessInfo, String, Long>, String> tuple4StringKeyedStream
= aecollectordataDataStream.keyBy(x -> x._2().getPhone());
//手机号去重逻辑 通过mapstate
SingleOutputStreamOperator<Tuple4<String, EventSuccessInfo, String, Long>> map = tuple4StringKeyedStream.filter(new RichFilterFunction<Tuple4<String, EventSuccessInfo, String, Long>>() {
//1.定义状态 进行手机号+订单状态+订单编号 去重
private MapState<String,String> phoneState;
@Override
public void open(Configuration parameters) throws Exception {
phoneState = getRuntimeContext().getMapState(new MapStateDescriptor<String,String>("phonestate", String.class,String.class));
}
@Override
public boolean filter(Tuple4<String, EventSuccessInfo, String, Long> value) throws Exception {
if (!phoneState.contains(value._2().getPhone()+value._2().getOrder_state()+value._2().getOrder_sn())){
phoneState.put(value._2().getPhone()+value._2().getOrder_state()+value._2().getOrder_sn(),value._2().getOrder_sn());
return true;
}
return false;
}
});
//只要数据时间是当天的 :通过flink定时器 定义每天零晨更新比较时间
SingleOutputStreamOperator<Tuple4<String, EventSuccessInfo, String, Long>> process = map.keyBy(x -> x._2().getPhone()).process(new KeyedProcessFunction<String, Tuple4<String, EventSuccessInfo, String, Long>, Tuple4<String, EventSuccessInfo, String, Long>>() {
//1.定义状态 进行手机号去重
private ValueState<String> timeSate;
@Override
public void processElement(Tuple4<String, EventSuccessInfo, String, Long> value, Context ctx, Collector<Tuple4<String, EventSuccessInfo, String, Long>> out) throws Exception {
//获取格林威治标准时间的第二天00:00:00即获取北京时间的第二天08:00:00
// long ts = (ctx.timerService().currentProcessingTime() / (1000 * 60 * 60 * 24) + 1) * (1000 * 60 * 60 * 24);
//获取北京时间的第二天00:00:00
long ts = ( ctx.timerService().currentProcessingTime()/(1000*60*60*24) + 1) * (1000*60*60*24)- 8 * 60 * 60 * 1000;
// long ts = 1677054000000L;
//如果注册相同数据的TimeTimer,后面的会将前面的覆盖,即相同的timeTimer只会触发一次
ctx.timerService().registerProcessingTimeTimer(ts);
out.collect(value);
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple4<String, EventSuccessInfo, String, Long>> out) throws Exception {
//定时器设置,每天凌晨更新开始时间
System.out.println(timestamp);
System.out.println("定时器执行了:" + timestamp);
startDate = sd1.parse(sd.format(new Date()));
// startDate = sd1.parse("2023-02-01");
}
});
SingleOutputStreamOperator<Tuple4<String, String, String, Long>> jsonString = process.map(new MapFunction<Tuple4<String, EventSuccessInfo, String, Long>, Tuple4<String, String, String, Long>>() {
@Override
public Tuple4<String, String, String, Long> map(Tuple4<String, EventSuccessInfo, String, Long> value) throws Exception {
return new Tuple4<String, String, String, Long>(
value._1(),
JSON.toJSONString(value._2()),
value._3(),
value._4()
);
}
});
// jsonString.print();
//自定义输出sink
jsonString.addSink(new EventOmsSuccessSink());
env.execute("EventOrderSuccess===>");
}
}
5、自定义sink
package com.gwm.sink;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.http.HttpConfig;
import com.aliyun.datahub.client.model.RecordEntry;
import com.aliyun.datahub.client.model.RecordSchema;
import com.aliyun.datahub.client.model.TupleRecordData;
import com.gwm.utils.ConfigPropUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.types.Record;
import scala.Tuple4;
import java.util.ArrayList;
import java.util.List;
/**
* @author yangyingchun
* @version 1.0
* @date 2023/2/20 14:00
*/
public class EventOmsSuccessSink extends RichSinkFunction<Tuple4<String, String, String, Long>> {
// Endpoint以Region: 华东1为例,其他Region请按实际情况填写
static String endpoint = ConfigPropUtils.get("gwm_datahub_endpoint");
//private static String endPoint ="public endpoint";//公网访问(填写内网Endpoint,就不用填写公网Endpoint)。
static String projectName = ConfigPropUtils.get("datahub_sink_projectname");
static String topicSinkName = ConfigPropUtils.get("datahub_sink_topic");
static String accessId = ConfigPropUtils.get("gwm_accessId");
static String accessKey = ConfigPropUtils.get("gwm_accessKey");
static DatahubClient datahubClient;
static RecordSchema recordSchema;
static int retryTimes = 10;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
datahubClient = DatahubClientBuilder.newBuilder()
.setDatahubConfig(
new DatahubConfig(endpoint,
// 是否开启二进制传输,服务端2.12版本开始支持
new AliyunAccount(accessId, accessKey), true))
//专有云使用出错尝试将参数设置为 false
// HttpConfig可不设置,不设置时采用默认值
.setHttpConfig(new HttpConfig()
.setCompressType(HttpConfig.CompressType.LZ4) // 读写数据推荐打开网络传输 LZ4压缩
.setConnTimeout(10000))
.build();
// 获取schema
recordSchema = datahubClient.getTopic(projectName,topicSinkName ).getRecordSchema();
}
@Override
public void invoke(Tuple4<String, String, String, Long> record, Context context) throws Exception {
List<RecordEntry> recordEntries = new ArrayList<>();
RecordEntry recordEntry = new RecordEntry();
// 对每条数据设置额外属性,例如ip 机器名等。可以不设置额外属性,不影响数据写入
recordEntry.addAttribute("key2", "value2");
TupleRecordData data = new TupleRecordData(recordSchema);
data.setField("event_name", record._1());
data.setField("json_message", record._2());
data.setField("unique_id", record._3());
data.setField("etl_time", record._4());
recordEntry.setRecordData(data);
recordEntries.add(recordEntry);
datahubClient.putRecords(projectName, topicSinkName, recordEntries);
}
}
6、工具类
package com.gwm.utils;
import java.io.InputStream;
import java.util.Properties;
public class ConfigPropUtils {
private static Properties props;
static{
try {
props = new Properties();
//"config.properties"放在classpath(类路径)下
InputStream in = ConfigPropUtils.class.getClassLoader().getResourceAsStream("config.properties");
props.load(in);
} catch (Exception e) {
e.printStackTrace();
}
}
//根据配置文件中的key获取value
public static String get(String key){
return props.getProperty(key, null);
}
}
package com.gwm.utils;
import com.aliyun.datahub.client.model.RecordEntry;
import com.aliyun.datahub.client.model.TupleRecordData;
/**
* @author yangyingchun
* @version 1.0
* @date 2022/9/21 17:02
*/
public class getString {
public static String getString(RecordEntry recordEntry, String field) {
Object o = ((TupleRecordData)recordEntry.getRecordData()).getField(field);
if (o == null)
return "null";
return o.toString();
}
}
package com.gwm.utils;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* @author yangyingchun
* @version 1.0
* @date 2021/12/14 15:35
*/
public class TimeToStampUtil {
public static Long timeToStamp(String timers) {
Date d = new Date();
long timeStemp = 0;
try {
SimpleDateFormat sf = new SimpleDateFormat("yyyy-MM-dd");
d = sf.parse(timers);// 日期转换为时间戳
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
timeStemp = d.getTime();
return timeStemp;
}
}
7、配置文件config
gwm_datahub_endpoint=https://datahub.cn-XXXXXX-d01.dh.res.cloud.gwm.cn
datahub_sink_projectname=itsl
datahub_source_topic=event_info
datahub_source_topic1=event_info1
istl.datahub.oms_event=oms_event_info
datahub_scrm_topic=blink_scrm_new_user
datahub_sink_topic=oms_event_info
oneid.datahub_projectname=oneid
oneid.sink_topic_name.warehousing=etl_dts_vms_tt_warehousing_entry
oneid.sink_topic_name.vehicle=etl_dts_vms_tm_vehicle
oneid.accessId=CCxvVNXXXXmBLJEEKuSO
oneid.accessKey=0NMiKPoQTXXXX8OafeKssmUEzN1nahBOc7
gwm_accessId=5jNE06T8SPHXXXXXXleYil
gwm_accessKey=Ehsl9n3xhk0WvXXXXXXXdxCFAaTbm47uHtWQP
gwm_holo_url=jdbc:postgresql://holo-cn-XXXXXX-cn-baoding-gwmcloud-d01-internal.hologres.ops.cloud.gwm.cn:80/itsl_dev?tcpKeepAlive=true
postgresdriver=org.postgresql.Driver
gwm_odps_endpoint=http://service.cn-XXXXXXX-d01.odps.ops.cloud.gwm.cn/api
gwm_odps_project=itsl_dev
gwm_odps_driver=com.aliyun.odps.jdbc.OdpsDriver
gwm_odps_url=jdbc:odps:http://service.cn-XXXXXXX-d01.odps.ops.cloud.gwm.cn/api?project=ITSL_dev&charset=UTF-8&interactiveMode=true
kafka.topic=topic_name
bootstrap.servers=localhost:9092
zookeeper.connect=localhost:2181
group.id001=customer-001
##数仓相关
###############################ODS###############################
ods.datahub_projectname=itsl
ods.datahub_source_topic.event_info=event_info
###############################ODS###############################
###############################DWD###############################
dwd.datahub_projectname=itsl
dwd.datahub_source_topic.datahubsink=datahub_sink
###############################DWD###############################
##测试相关
testfile=E:\\software\\workspace\\blink_udx_3x-master\\src\\main\\resources\\testfile
四、测试运行
五、打包上线
1、打包
因为是BLINK打包是注意将以下两依赖不要打入包里
<dependency> <groupId>com.alibaba.blink</groupId> <artifactId>flink-streaming-java_${scala.binary.version}</artifactId> <version>${blink.version}</version> <scope>provided</scope> </dependency> <!-- https://mvnrepository.com/artifact/com.alibaba.blink/flink-table-common --> <!-- datahub依赖要打入包里,具体获取方式参看:Blink-DataHub connector Maven依赖转化_大数据00的博客-CSDN博客 --> <dependency> <groupId>com.alibaba.flink</groupId> <artifactId>datahub-connector</artifactId> <version>0.1-SNAPSHOT</version> <classifier>jar-with-dependencies</classifier> <!-- <scope>provided</scope>--> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.34</version> <scope>provided</scope> </dependency>