flink 读取kafka 数据写入mysql

创建flink环境:

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, build);

创建连接kafka表:

String sql =
        "CREATE TABLE kafka_job (\n" +
                "  tab STRING,\n" +
                "  job_name STRING,\n" +
                "  job_salary int,\n" +
                "  job_address STRING,\n" +
                "  job_exp STRING,\n" +
                "  education STRING,\n" +
                "  company_name STRING,\n" +
                "  demand STRING,\n" +
                "  welfare STRING\n" +
                ")\n" +
                "WITH (\n" +
                "  'connector' = 'kafka',\n" +
                "  'topic' = 'pa',\n" +
                "  'properties.bootstrap.servers' = 'hadoop111:9092',\n" +
                "  'format' = 'csv',\n" +
                "  'csv.field-delimiter' = ',',\n" +
                "  'csv.ignore-parse-errors' = 'true'\n" +
                ")";

tableEnv.executeSql(sql);
Table t_user = tableEnv.from("kafka_job");

创建写入mysql表:

String sql2 =
        "CREATE TABLE t1_avg (\n" +
                " tab VARCHAR(255),\n" +
                " avg_salary DECIMAL(10, 2),\n" +
                " unix_t BIGINT ," +
                "PRIMARY KEY (unix_t) NOT ENFORCED \n" +
                ")\n" +
                " WITH\n" +
                "(\n" +
                "'connector' = 'jdbc',\n" +
                "'url' = 'jdbc:mysql://localhost:3306/flaskdb?serverTimezone=Asia/Shanghai&zeroDaeTimeBehavior=convertToNull&useSSL=false',\n" +
                "'driver' = 'com.mysql.jdbc.Driver',\n" +
                "'username' = 'root',\n" +
                "'password' = 'root',\n" +
                "'table-name' = 't1_avg',\n" +
                "'lookup.cache.max-rows' = '1000',\n" +
                "'lookup.cache.ttl' = '60000'\n" +
                ")";
tableEnv.executeSql(sql2);

编写插入表:

String insert = "insert into t1_avg\n" +
        "SELECT *\n" +
        "FROM (\n" +
        "  SELECT tab, ROUND(AVG(job_salary), 2) AS avg_salary, UNIX_TIMESTAMP() AS unix_t\n" +
        "  FROM kafka_job\n" +
        "  GROUP BY tab\n" +
        ") AS subquery";
tableEnv.executeSql(insert);

最终代码:

package flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class t1 {
    public static void main(String[] args) {
        Logger log = LoggerFactory.getLogger(t1.class);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, build);

        String sql =
                "CREATE TABLE kafka_job (\n" +
                        "  tab STRING,\n" +
                        "  job_name STRING,\n" +
                        "  job_salary int,\n" +
                        "  job_address STRING,\n" +
                        "  job_exp STRING,\n" +
                        "  education STRING,\n" +
                        "  company_name STRING,\n" +
                        "  demand STRING,\n" +
                        "  welfare STRING\n" +
                        ")\n" +
                        "WITH (\n" +
                        "  'connector' = 'kafka',\n" +
                        "  'topic' = 'pa',\n" +
                        "  'properties.bootstrap.servers' = 'hadoop111:9092',\n" +
                        "  'format' = 'csv',\n" +
                        "  'csv.field-delimiter' = ',',\n" +
                        "  'csv.ignore-parse-errors' = 'true'\n" +
                        ")";

        tableEnv.executeSql(sql);
        Table t_user = tableEnv.from("kafka_job");

        DataStream<Row> rowDataStream = tableEnv.toAppendStream(t_user, Row.class);
        rowDataStream.print();

        t_user.printSchema();


        String sql2 =
                "CREATE TABLE t1_avg (\n" +
                        " tab VARCHAR(255),\n" +
                        " avg_salary DECIMAL(10, 2),\n" +
                        " unix_t BIGINT ," +
                        "PRIMARY KEY (unix_t) NOT ENFORCED \n" +
                        ")\n" +
                        " WITH\n" +
                        "(\n" +
                        "'connector' = 'jdbc',\n" +
                        "'url' = 'jdbc:mysql://localhost:3306/flaskdb?serverTimezone=Asia/Shanghai&zeroDaeTimeBehavior=convertToNull&useSSL=false',\n" +
                        "'driver' = 'com.mysql.jdbc.Driver',\n" +
                        "'username' = 'root',\n" +
                        "'password' = 'root',\n" +
                        "'table-name' = 't1_avg',\n" +
                        "'lookup.cache.max-rows' = '1000',\n" +
                        "'lookup.cache.ttl' = '60000'\n" +
                        ")";
        tableEnv.executeSql(sql2);

        String insert = "insert into t1_avg\n" +
                "SELECT *\n" +
                "FROM (\n" +
                "  SELECT tab, ROUND(AVG(job_salary), 2) AS avg_salary, UNIX_TIMESTAMP() AS unix_t\n" +
                "  FROM kafka_job\n" +
                "  GROUP BY tab\n" +
                ") AS subquery";
        tableEnv.executeSql(insert);

        try {
            env.execute("flink_running");
        } catch (Exception e) {
            log.info("抛出异常!");
            System.out.println(e.getMessage());
        }
    }
}

  • 12
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Flink是一个分布式流处理框架,能够处理和分析实时数据流。Kafka是一个分布式流式数据处理平台,能够实时地收集、存储和处理大规模数据流。 在Flink读取Kafka数据并将其写入MySQL数据库需要以下步骤: 1. 配置Kafka Consumer:通过配置Kafka Consumer相关的属性,如bootstrap.servers(Kafka的地址)、group.id(消费者组标识)、topic(要读取的主题名称)等。 2. 创建Flink Execution Environment:通过创建Flink执行环境,可以定义Flink作业的运行模式和相关配置。 3. 创建Kafka Data Source:使用FlinkKafka Consumer API创建一个Kafka数据源,通过指定Kafka Consumer的配置和要读取的主题,可以从Kafka中获取数据。 4. 定义数据转换逻辑:根据需要,可以使用Flink提供的转换算子对Kafka数据进行处理,如map、filter、reduce等。 5. 创建MySQL Sink:通过配置MySQL数据库的连接信息,如URL、用户名、密码等,创建一个MySQL数据池。 6. 将数据写入MySQL:通过使用FlinkMySQL Sink API,将经过转换后的数据写入MySQL数据库。可以指定要写入的表名、字段映射关系等。 7. 设置并执行作业:将Kafka数据源和MySQL Sink绑定在一起,并设置作业的并行度,然后执行Flink作业。 通过以上步骤,我们可以将Kafka中的数据读取出来,并经过转换后写入MySQL数据库,实现了从KafkaMySQL数据传输。 需要注意的是,在配置Kafka Consumer和MySQL数据库时,要确保其正确性和可用性,以确保数据的正确读取写入。同时,在处理大规模数据流时,还需要考虑分布式部署、容错性和高可用性等方面的问题,以保证系统的稳定性和性能。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值