Flink SQL实时计算案例二

知识点:与维表join,开窗计算

消息模拟器

/**
 * @description loT解决方案之 多维度传感器数据分析
 * 模拟传感器采集上传服务
 * @author: ZhiWen
 * @create: 2020-05-25 11:26
 **/
public class MessageGenerator03 {
    /**
     *功能描述
     * 该工业客户拥有1千多台设备,分布在不同城市的多个厂区,每个设备上有10个不同种类传感器,这些传感器,大概每5秒采集并上传一份数据到日志服务(Log/SLS),每个采集点格式如下。

     s_id	    s_value	        s_ts
     传感器ID	传感器当前值	发送时间
     同时,上述传感器分布在多个设备、多个厂区,用户在RDS上记录传感器、设备、厂区的分布维表,表格如下:

     s_id	    s_type	        device_id	factory_id
     传感器ID	传感器监控类型	设备ID	    厂区ID
     上述信息存放在RDS上,用户希望传感器上传的数据能够和上述数据关联,并将传感器数据按照设备归类每1分钟打平为一张宽表,表格如下:

     ts	    device_id	factory_id	device_temp	    device_pres
     时间	设备ID	    工厂ID	    设备温度	    设备压力

     为了简化不必要的逻辑,我们假定仅有两种类型的监控传感器,即温度和压力,以方便后续的计算,后续计算逻辑如下:
     1.筛选温度大于80的设备,并向下游触发告警。用户选择使用MQ作为消息触发源,也就是实时计算将温度大于80的设备过滤并投递给MQ,触发下游的用户定义的告警系统。
     2.将数据写出到在线OLAP系统中
     */

            /*
  {
    "sid": "t_xxsfdsad",
    "s_value": "85.5",
    "s_ts": "1515228763"
   }
*/



    /**传感器id*/
    private static String[] sid = new String[]{"t_china_10001","t_china_10002","t_china_10003","p_china_20001","p_china_20002","p_china_20003"};

    /**传感器当前值*/
    private static double[] sValue = new double[]{85.5,75.6,67.4,78.6,80.5,75.9,83.7,82.3,81.6,83.2,84.4};


    public static void main(String[] args) {




        //配置信息
        Properties props = new Properties();
        //kafka服务器地址
        props.put("bootstrap.servers", "172.24.103.8:9092");
        //设置数据key和value的序列化处理类
        props.put("key.serializer", StringSerializer.class);
        props.put("value.serializer", StringSerializer.class);
        //创建生产者实例
        KafkaProducer<String,String> producer = new KafkaProducer<>(props);



        Random random = new Random();
        while(true){

            Message03 message = new Message03();
            message.setS_id(sid[random.nextInt(sid.length)]);
            message.setS_value(sValue[random.nextInt(sValue.length)]);
            message.setS_ts(System.currentTimeMillis()/1000);


            String json = JSONObject.toJSONString(message);
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }

            ProducerRecord record = new ProducerRecord<String, String>("topic_sensor",json);
            //发送记录
            producer.send(record);
            System.out.println(json);


        }

    }

}

处理逻辑

import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

/**
 * @description loT解决方案之 多维度传感器数据分析
 * @author: ZhiWen
 * @create: 2020-05-25 11:15
 **/
public class MainAppSensor {
    /**
     *功能描述
     * 该工业客户拥有1千多台设备,分布在不同城市的多个厂区,每个设备上有10个不同种类传感器,这些传感器,大概每5秒采集并上传一份数据到日志服务(Log/SLS),每个采集点格式如下。

     s_id	    s_value	        s_ts
     传感器ID	传感器当前值	发送时间
     同时,上述传感器分布在多个设备、多个厂区,用户在RDS上记录传感器、设备、厂区的分布维表,表格如下:

     s_id	    s_type	        device_id	factory_id
     传感器ID	传感器监控类型	设备ID	    厂区ID
     上述信息存放在RDS上,用户希望传感器上传的数据能够和上述数据关联,并将传感器数据按照设备归类每1分钟打平为一张宽表,表格如下:

     ts	    device_id	factory_id	device_temp	    device_pres
     时间	设备ID	    工厂ID	    设备温度	    设备压力

     为了简化不必要的逻辑,我们假定仅有两种类型的监控传感器,即温度和压力,以方便后续的计算,后续计算逻辑如下:
     1.筛选温度大于80的设备,并向下游触发告警。用户选择使用MQ作为消息触发源,也就是实时计算将温度大于80的设备过滤并投递给MQ,触发下游的用户定义的告警系统。
     2.将数据写出到在线OLAP系统中
     */

    public static final String KAFKA_SOURCE_SQL = "CREATE TABLE s_sensor_data (\n" +
            "    s_id VARCHAR,--要与Kafka中的名称保持一致\n" +
            "    s_ts VARCHAR,\n" +
            "    s_value VARCHAR,\n" +
            "    pt AS PROCTIME(),--join维表时要用到\n" +
            "    ts AS CAST(FROM_UNIXTIME(CAST(s_ts AS BIGINT)) AS TIMESTAMP),\n" +
            "    WATERMARK FOR ts AS ts - INTERVAL '10' SECOND\n" +
            ") WITH (\n" +
            "\t'connector.type' = 'kafka',\n" +
            "\t'connector.version' = 'universal',\n" +
            "\t'connector.topic' = 'topic_sensor',\n" +
            "\t'update-mode' = 'append',\n" +
            "\t'connector.properties.group.id' = 'testGroup',\n" +
            "\t'connector.properties.zookeeper.connect' = '172.24.103.8:2181',\n" +
            "\t'connector.properties.bootstrap.servers' = '172.24.103.8:9092',\n" +
            "\t'connector.startup-mode' = 'latest-offset',\n" +
            "\t'format.type' = 'json'\n" +
            ")";

    public static final String MYSQL_DIM_SQL = "CREATE TABLE d_sensor_device_data (\n" +
            "\ts_id                  VARCHAR,\n" +
            "\ts_type                VARCHAR,\n" +
            "\tdevice_id             BIGINT,\n" +
            "\tfactory_id            BIGINT\n" +
            ") WITH (\n" +
            "\t'connector.type' = 'jdbc',\n" +
            "\t'connector.url' = 'jdbc:mysql://172.24.103.3:3306/flink',\n" +
            "\t'connector.table' = 'sensor_device_data',\n" +
            "\t'connector.username' = 'root',\n" +
            "\t'connector.password' = '123456',\n" +
            "\t'connector.lookup.cache.max-rows' = '100',--维表必须参数\n" +
            "\t'connector.lookup.cache.ttl' = '10s'--维表必须参数\n" +
            ")";
    public static final String KAFKA_SINK_SQL = "CREATE TABLE r_monitor_data (\n" +
            "    ts    VARCHAR,\n" +
            "    device_id    BIGINT,\n" +
            "    factory_id    BIGINT,\n" +
            "    device_temp    DOUBLE,\n" +
            "    device_pres DOUBLE\n" +
            ") WITH (\n" +
            "\t'connector.type' = 'kafka',\n" +
            "\t'connector.version' = 'universal',\n" +
            "\t'connector.topic' = 'topic_sensor_output',\n" +
            "\t'update-mode' = 'append',\n" +
            "\t'connector.properties.zookeeper.connect' = '172.24.103.8:2181',\n" +
            "\t'connector.properties.bootstrap.servers' = '172.24.103.8:9092',\n" +
            "\t'connector.sink-partitioner' = 'round-robin',\n" +
            "\t'format.type' = 'json'\n" +
            ")";
    public static final String MYSQL_SINK_SQL = "CREATE TABLE r_device_data (\n" +
            "\tts\tVARCHAR,\n" +
            "\tdevice_id BIGINT,\n" +
            "\tfactory_id BIGINT,\n" +
            "\tdevice_temp\tDOUBLE,\n" +
            "\tdevice_pres DOUBLE\n" +
            "--\tPRIMARY KEY(ts, device_id)\n" +
            ") WITH (\n" +
            "\t'connector.type' = 'jdbc',\n" +
            "\t'connector.url' = 'jdbc:mysql://172.24.103.3:3306/flink',\n" +
            "\t'connector.table' = 'device_data',\n" +
            "\t'connector.username' = 'root',\n" +
            "\t'connector.password' = '123456',\n" +
            "\t'connector.write.flush.max-rows' = '10',\n" +
            "\t'connector.write.flush.interval' = '5s' --指定刷新间隔\n" +
            ")";
    public static final String JOIN_SQL = "SELECT\n" +
            "    s.ts,\n" +
            "    s.s_id,\n" +
            "    s.s_value,\n" +
            "    d.s_type,\n" +
            "    d.device_id,\n" +
            "    d.factory_id\n" +
            "FROM\n" +
            "    s_sensor_data s\n" +
            "JOIN\n" +
            "    d_sensor_device_data FOR SYSTEM_TIME AS OF s.pt AS d --固定写法,仅支持带有处理时间时态表的内部联接和左联接\n" +
            "ON\n" +
            "    s.s_id = d.s_id";
    public static final String MAP_SQL = "SELECT\n" +
            "    --使用滚窗的起始时间作为该条记录的时间。\n" +
            "    CAST(TUMBLE_START(v.ts, INTERVAL '1' MINUTE) AS VARCHAR) as ts,\n" +
            "    v.device_id,\n" +
            "    v.factory_id,\n" +
            "    CAST(SUM(cast(IF(v.s_type = 'TEMP', v.s_value, '0') as bigint)) AS DOUBLE)/CAST(SUM(cast(IF(v.s_type = 'TEMP', '1', '0') as BIGINT)) AS DOUBLE) device_temp, \n" +
            "    --这里用于计算这一分钟的温度平均值。\n" +
            "    CAST(SUM(cast(IF(v.s_type = 'PRES', v.s_value, '0')as BIGINT)) AS DOUBLE)/CAST(SUM(cast(IF(v.s_type = 'PRES', '1', '0') as BIGINT)) AS DOUBLE) device_pres \n" +
            "    --这里用于计算这一分钟的压力平均值。\n" +
            "FROM\n" +
            "    v_sensor_device_data v\n" +
            "GROUP BY\n" +
            "    TUMBLE(v.ts, INTERVAL '1' MINUTE), v.device_id, v.factory_id --查询列表字段要分组";
    public static final String INSERT_MQ_SQL = "INSERT INTO r_monitor_data\n" +
            "SELECT\n" +
            "    ts,\n" +
            "    device_id,\n" +
            "    factory_id,\n" +
            "    device_temp,\n" +
            "    device_pres\n" +
            "FROM\n" +
            "    v_device_data\n" +
            "WHERE\n" +
            "    device_temp > 80.0";
    public static final String INSERT_MYSQL_SQL = "INSERT INTO r_device_data\n" +
            "SELECT\n" +
            "\tts,\n" +
            "\tdevice_id,\n" +
            "\tfactory_id,\n" +
            "\tdevice_temp,\n" +
            "\tdevice_pres\n" +
            "FROM\n" +
            "\tv_device_data";

    public static void main(String[] args) throws Exception {//todo 已实现
        //获取流式环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //选用blink
        EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
        //创建流式表环境
        StreamTableEnvironment tEnv = StreamTableEnvironment.create(env, settings);

        //设置作业的并行度和时间类型
        env.setParallelism(3).setStreamTimeCharacteristic(TimeCharacteristic.EventTime);




        /*--***********************建源表 维表 结果表*******************--*/

        //源表
        tEnv.sqlUpdate(KAFKA_SOURCE_SQL);
        //维表
        tEnv.sqlUpdate(MYSQL_DIM_SQL);
        //俩张结果表
        //tEnv.sqlUpdate(KAFKA_SINK_SQL);
        tEnv.sqlUpdate(MYSQL_SINK_SQL);


        /*--***********************处理逻辑*******************--*/



        //先从维表中获取每个传感器对应的设备、厂区
        Table view11 = tEnv.sqlQuery(JOIN_SQL);
        tEnv.createTemporaryView("v_sensor_device_data",view11);

        //tEnv.toAppendStream(view11,Row.class).print();
        //打平为一张宽表
        Table view12 = tEnv.sqlQuery(MAP_SQL);
        tEnv.createTemporaryView("v_device_data",view12);
        tEnv.toRetractStream(view12,Row.class).print();

        //过滤温度大于80摄氏度的传感器,并写出到MQ触发告警
        //tEnv.sqlUpdate(INSERT_MQ_SQL);
        //将数据写出到DB,用于后续的分析
        tEnv.sqlUpdate(INSERT_MYSQL_SQL);


        /*--***********************开始执行*******************--*/
        System.out.println(env.getParallelism());
        tEnv.execute("sensor");

    }
}
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值