flink流处理访问mysql

业务场景如下: 

    概述:采集工厂设备的数据。

    flink连接emqtt,采集工业物联网的数据,进行流处理,工厂设备数据内没有machID, 需要从mysq的设备信息基础表根据gateMac获取对应的machID。

   访问mysql实现思路(一)

   flink项目在初始化的时候从mysql获取所有设备的基础信息,所遇问题:如果新增一个设备,flink项目需要重启一次,从mysql来获取全部的machID,这样效果很差,被否定。

    实现思路(二)

   将flink读取mysql写成一个单流, 每5分钟重新获取一次,定时刷新, 将结果写入map中。

         private static Map<String, String> deviceMap = new Hashtable<String, String>();
 
    
        /**
        *  从mysql获取machID, 五分钟刷新一次  本博客讲解的地方
        */
        DataStream<Map<String, String>> deviceStream = env.addSource(new JdbcReader());
        deviceStream.broadcast().map(new MapFunction<Map<String, String>, Object>() {
            @Override
            public Object map(Map<String, String> value) {
                deviceMap = value;
                return null;
            }
        });

 

   SourceMain.java (flink处理数据的主项目)

package com.flink;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.flink.config.flinkConstants;
import com.flink.model.DeviceAlarm;
import com.flink.model.DeviceData;
import com.flink.utils.emqtt.EmqttSource;
import com.flink.utils.mysql.JdbcReader;
import com.flink.utils.mysql.JdbcWriter;
import com.flink.utils.opentsdb.OpnetsdbWriter;
import com.flink.utils.redis.RedisWriter;
import com.google.gson.Gson;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import java.util.*;


public class EmqttFlinkMain {
    private static Map<String, String> deviceMap = new Hashtable<String, String>();

    public static void main(String[] args) throws Exception {
        flinkConstants fc = flinkConstants.getInstance();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        /**
         *  从mysql获取machID, 五分钟刷新一次  本博客讲解的地方
         */
        DataStream<Map<String, String>> deviceStream = env.addSource(new JdbcReader());
        deviceStream.broadcast().map(new MapFunction<Map<String, String>, Object>() {
            @Override
            public Object map(Map<String, String> value) {
                deviceMap = value;
                return null;
            }
        });
            
        // ========================================================================


        //emqtt
        DataStream<Tuple2<String, String>> inputStream = env.addSource(new EmqttSource());

        /**
         *  数据类型
         */
        DataStream<DeviceData> dataStream = inputStream
                .rebalance()
                .flatMap(new FlatMapFunction<Tuple2<String, String>, DeviceData>() {
                    @Override
                    public void flatMap(Tuple2<String, String> value, Collector<DeviceData> out) {
                        String message = value.f0;
                        String topic = value.f1;
                        List<DeviceData> d = DataHandle(message, topic);
                        for (DeviceData line : d) {
                            out.collect(line);
                        }
                    }
                });


        //写入opentsdb
        dataStream.addSink(new OpnetsdbWriter()).name("opentsdb");

        //写入redis
        SingleOutputStreamOperator<Tuple2<String, String>> keyedStream = dataStream
                .map(new MapFunction<DeviceData, Tuple2<String, String>>() {
                    @Override
                    public Tuple2<String, String> map(DeviceData value) {
                        String key = value.getCompID() + "/" + value.getMachID() + "/" + value.getOperationValue();
                        return Tuple2.of(key, value.getOperationData());
                    }
                })
                .keyBy(0)
                .timeWindow(Time.seconds(3))
                .process(new ProcessWindowFunction<Tuple2<String, String>, Tuple2<String, String>, Tuple, TimeWindow>() {
                    @Override
                    public void process(Tuple tuple, Context context, Iterable<Tuple2<String, String>> elements, Collector<Tuple2<String, String>> out) throws Exception {
                        Iterator<Tuple2<String, String>> iter = elements.iterator();
                        while (iter.hasNext()) {
                            Tuple2<String, String> temp = iter.next();
                            if (!iter.hasNext()) {
                                out.collect(temp);
                            }
                        }
                    }
                });
        keyedStream.addSink(new RedisWriter()).name("redis");

        /**
         *  告警类型
         */
        //写入mysql
        DataStream<List<DeviceAlarm>> alarmStream = inputStream.filter(new FilterFunction<Tuple2<String, String>>() {
            @Override
            public boolean filter(Tuple2<String, String> value) throws Exception {
                JSONObject AlarmObject = JSON.parseObject(value.f0);
                String dataType = (String) AlarmObject.get("type");
                return dataType.equals("Alarm") || dataType.equals("alarm");
            }
        }).map(new MapFunction<Tuple2<String, String>, List<DeviceAlarm>>() {
            @Override
            public List<DeviceAlarm> map(Tuple2<String, String> s) throws Exception {
                return alarmDnalysis(s.f0, s.f1);
            }
        });
        //调用JdbcWriter
        alarmStream.addSink(new JdbcWriter()).name("mysql").setParallelism(3);

        //调用JdbcWriterAsyncFunction
//        // create async function, which will *wait* for a while to simulate the process of async i/o
//        AsyncFunction<List<DeviceAlarm>, String> function = new JdbcWriterAsyncFunction();
//
//        // add async operator to streaming job
//        AsyncDataStream.orderedWait(
//                alarmStream,
//                function,
//                10000L,
//                TimeUnit.MILLISECONDS,
//                20).name("async write mysql").setParallelism(3);

        env.execute("EmqttFlinkMain");
    }

    private static List<DeviceData> DataHandle(String message, String topic) {
        List<DeviceData> d = new ArrayList<>();
        try {
            JSONObject DataObject = JSON.parseObject(message);
            String dataType = (String) DataObject.get("type");
            if (dataType.equals("Data") || dataType.equals("data")) {
                String[] array = topic.split("/");

                JSONArray dataList = JSON.parseArray(DataObject.get("values").toString());

                String machID = deviceMap.get(array[1]);
                if (machID != null) {
                    for (int i = 0; i < dataList.size(); i++) {
                        DeviceData d1 = new DeviceData();
                        JSONObject dataDict = dataList.getJSONObject(i);
                        d1.setMachID(machID);
                        d1.setCompID(array[0]);
                        d1.setGateMac(array[1]);
                        d1.setOperationValue(dataDict.get("name").toString());
                        d1.setOperationData(dataDict.get("data").toString());
                        d1.setGatherTime(dataDict.get("time").toString());
                        d.add(d1);
                    }
                } else {
                    System.out.println("无法解析数据");
                }
            }
        } catch (Throwable t) {
            t.printStackTrace();
        }
        return d;
    }
}

 

flink自定义数据源mysql


JdbcReader.java (读取mysql)

package com.flink.utils.mysql;

import com.flink.config.flinkConstants;
import com.mysql.jdbc.Driver;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.Hashtable;
import java.util.Map;

//RichSourceFunction RichParallelSourceFunction
public class JdbcReader extends RichSourceFunction<Map<String, String>> {
    private static final Logger logger = LoggerFactory.getLogger(JdbcReader.class);

    private Connection connection = null;
    private PreparedStatement ps = null;
    private volatile boolean isRunning = true;

    //该方法主要用于打开数据库连接,下面的ConfigKeys类是获取配置的类
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        flinkConstants fc = flinkConstants.getInstance();
        DriverManager.registerDriver(new Driver());
        String db_url = "jdbc:mysql://" + fc.JDBC_HOST + ":" + fc.JDBC_PORT + "/" + fc.JDBC_DATABASE;
        connection = DriverManager.getConnection(db_url, fc.JDBC_USERNAME, fc.JDBC_PASSWORD);//获取连接
        ps = connection.prepareStatement("select machID, gateMac from dac_machinestatus where operationFlag=9");
    }

    //执行查询并获取结果
    @Override
    public void run(SourceContext<Map<String, String>> ctx) throws Exception {
        Map<String, String> DeviceMap = new Hashtable<String, String>();
        try {
            while (isRunning) {
                ResultSet resultSet = ps.executeQuery();
                while (resultSet.next()) {
                    String gateMac = resultSet.getString("gateMac");
                    String machID = resultSet.getString("machID");
                    if (!(gateMac.isEmpty() && machID.isEmpty())) {
                        DeviceMap.put(gateMac, machID);
                    }
                }
                System.out.println("DeviceMap>>>>>>" + DeviceMap);
                ctx.collect(DeviceMap);//发送结果
                DeviceMap.clear();
                Thread.sleep(5000 * 60);
            }
        } catch (Exception e) {
            logger.error("runException:{}", e);
        }
    }

    //关闭数据库连接
    @Override
    public void cancel() {
        try {
            super.close();
            if (connection != null) {
                connection.close();
            }
            if (ps != null) {
                ps.close();
            }
        } catch (Exception e) {
            logger.error("runException:{}", e);
        }
        isRunning = false;
    }
}

 

有趣的是在开发中发现了一个问题:

RichSourceFunction读取mysql, 将结果存在public static Map<String,String> DeviceMap = new Hashtable<String,String>(); 中, 最后集群中只有一台服务器DeviceMap 获取到数据, 其他的都为空。

修改代码, RichSourceFunction读取mysql, map算子操作前加一个broadcast()算子操作, 这样将一个节点的数据广播到所有的节点。 最后集群中所有的服务器DeviceMap 都获取到数据。

 

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>FlinkDataHandle</groupId>
    <artifactId>FlinkDataHandle</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>FlinkDataHandle</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <scala.version>2.11.11</scala.version>
        <scala.compat.version>2.11</scala.compat.version>
        <spark.version>2.2.0</spark.version>
        <c3p0.version>0.9.1.1</c3p0.version>
        <mysql.version>5.1.26</mysql.version>
        <fastjson.version>1.1.41</fastjson.version>
        <hbase.version>1.2.0</hbase.version>
        <flink.version>1.4.2</flink.version>
    </properties>

    <repositories>
        <!-- 		指定该项目可以从哪些地方下载依赖包  -->
        <repository>
            <id>aliyun</id>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
        </repository>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
        <repository>
            <id>jboss</id>
            <url>http://repository.jboss.org/nexus/content/groups/public</url>
        </repository>
    </repositories>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_${scala.compat.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-twitter_${scala.compat.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.10_${scala.compat.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_${scala.compat.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table_${scala.compat.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_${scala.compat.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-hbase_${scala.compat.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-redis_2.11</artifactId>
            <version>1.1.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-pool2</artifactId>
            <version>2.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.json</groupId>
            <artifactId>json</artifactId>
            <version>20180130</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.25</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.25</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>2.3.0</version>
        </dependency>
        <dependency>
            <groupId>org.eclipse.paho</groupId>
            <artifactId>org.eclipse.paho.client.mqttv3</artifactId>
            <version>1.0.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-rabbitmq_2.11</artifactId>
            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.fusesource.mqtt-client</groupId>
            <artifactId>mqtt-client</artifactId>
            <version>1.12</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table_2.11</artifactId>
            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.35</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-dbcp2</artifactId>
            <version>2.1.1</version>
        </dependency>
    </dependencies>

    <build>
        <sourceDirectory>src/main/java</sourceDirectory>
        <testSourceDirectory>src/test/java</testSourceDirectory>
        <plugins>
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <archive>
                        <manifest>
                            <mainClass>com.flink.EmqttFlinkMain</mainClass>
                        </manifest>
                    </archive>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <groupId>org.codehaus.mojo</groupId>
                <artifactId>exec-maven-plugin</artifactId>
                <version>1.2.1</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>exec</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <executable>java</executable>
                    <includeProjectDependencies>true</includeProjectDependencies>
                    <includePluginDependencies>false</includePluginDependencies>
                    <classpathScope>compile</classpathScope>
                    <mainClass>com.stars</mainClass>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>

        </plugins>
    </build>
</project>

 

 

 

 

 

  • 5
    点赞
  • 39
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 9
    评论
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

千里风雪

你的鼓励是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值