kafka-flink-mysql的流程

1、首先导入依赖

<properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <log4j.version>2.12.1</log4j.version>
        <flink.version>1.13.0</flink.version>
        <java.version>1.8</java.version>
        <scala.binary.version>2.12</scala.binary.version>
        <slf4j.version>1.7.30</slf4j.version>
        <kafka.version>2.4.1</kafka.version>
        <mysql.version>8.0.20</mysql.version>
        <zookeeper.version>3.5.7</zookeeper.version>
        <druid.version>1.1.20</druid.version>
        <lombok.version>1.16.2</lombok.version>
    </properties>
    <dependencies>
        <!-- 引入 Flink 相关依赖-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- 引入日志管理相关依赖-->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-to-slf4j</artifactId>
            <version>2.14.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.version}</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>${mysql.version}</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.13.1</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-jdbc_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>druid</artifactId>
            <version>${druid.version}</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.20</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <descriptorRefs>
                        34
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

2、写一个Pojo

package Pojo;/*
@author Serenity
@create 2022-07-31-23:23
*/

public class Person {
    public int id;
    public String name;


    public Person() {
    }

    public Person(int id, String name) {
        this.id = id;
        this.name = name;
    }

    public int getId() {
        return id;
    }

    public void setId(int id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    @Override
    public String toString() {
        return "Person{" +
                "id=" + id +
                ", name=" + name + "}";
    }
}

3、创建一个生产者的Util类

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;

public class ProducerUtil {
    public static KafkaProducer<String, String> createProducer(String bootstrapServers){
        Properties props = new Properties();
        //kafka服务器地址版本
        props.put("bootstrap.servers", bootstrapServers);
        //设置数据key和value的序列化处理类
        props.put("key.serializer", StringSerializer.class);
        props.put("value.serializer", StringSerializer.class);
        props.put("zookeeper.connect", "hadoop102:2181");
        return new KafkaProducer<>(props);
    }
}

4、通过IO流读取本地文件,用生产者将消息发送到topic

package com.sunyb.test1;/*
@author Serenity
@create 2022-07-28-21:54
*/

import com.sunyb.test1.Util.ProducerUtil;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.TimeUnit;

public class WriteToKafka {
    public static void main(String[] args){
        int i = 0;
        boolean isFlag = true;
        while(isFlag) {
            try {
                //每秒写一条数据
                TimeUnit.SECONDS.sleep(1);
                writeToKafka();
                i++;
                if ( i >=4 ){
                    isFlag = false;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    public static void writeToKafka() {
        try {
            // 读取文件test.txt
            String srcFile = "D:\\test.txt";
            File file = new File(srcFile);
            InputStreamReader reader = new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8);
            BufferedReader bufferedReader = new BufferedReader(reader);
            String line = "";

            // 创建一个生产者
            KafkaProducer<String, String> producer = ProducerUtil.createProducer("hadoop102:9092");
            while((line = bufferedReader.readLine()) != null) {
                // 发送消息到 Kafka,指定topic
                producer.send(new ProducerRecord<>("test", line));
                System.out.println(line);
            }
            //立即发送
            producer.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

5、使用FlinkKafkaConsumer消费生产者的数

package com.sunyb.test1;/*
@author Serenity
@create 2022-07-29-16:24
*/

import Pojo.Person;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.flink.streaming.api.datastream.DataStream;
import java.util.LinkedList;
import java.util.Properties;

@Slf4j
public class FlinkToKafka {
	//消费者端的设置
    private static final Properties prop = new Properties();
    private static final String BOOTSTRAP = "hadoop102:9092";
    private static final String ZOOKEEPER = "hadoop102:2181";

    static {
        prop.put("bootstrap.servers", BOOTSTRAP);
        prop.put("zookeeper.connect", ZOOKEEPER);
        prop.put("key.deserializer", StringDeserializer.class);
        prop.put("value.deserializer", StringDeserializer.class);
    }

    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
        env.setParallelism(1);

        //添加topic
        LinkedList<String> topics = new LinkedList<>();
        topics.add("test");

        //创建对象
        FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>(topics, new SimpleStringSchema(), prop);

        consumer.setStartFromLatest();

        DataStream<String> stream = env.addSource(consumer);
        env.enableCheckpointing(5000);

        //获取kafka的消息
        log.info("从kafka接收到的消息");
        stream.print("从kafka接收到的消息");

        //写一个map算子,对数据流一对一加载计算
        SingleOutputStreamOperator<Person> energyMySQL = stream.map(new MyMapFunction());
        energyMySQL.print();
        //连接mysql数据库,执行相应的sql语句
        log.info("获取数据库连接");
        energyMySQL.addSink(new WriteMysqlSink());

        try {
            env.execute("flink parsing to mysql job");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    //清洗方法
    private static class MyMapFunction implements MapFunction<String, Person> {
        @Override
        public Person map(String value) throws Exception {
            String[] splits = value.split("\\|\\|");
            if (splits[0].equals("11")){
                splits[0] = "111";
            }else if (splits[0].equals("22")){
                splits[0] = "222";
            }
            Person person = new Person(Integer.valueOf(splits[0]), splits[1]);
            return person;
        }
    }
}

6.写一个SQL连接的Util类并自定义sink方法

package com.sunyb.test1.Util;/*
@author Serenity
@create 2022-07-31-21:49
*/

import com.alibaba.druid.pool.DruidDataSource;
import java.sql.Connection;

public class DatabasesUtil {
    private static DruidDataSource dataSource;

    public static Connection getConnection() throws Exception {
        dataSource = new DruidDataSource();
//        dataSource.setDriverClassName("com.mysql.jdbc.Driver"); //弃用的驱动
        dataSource.setDriverClassName("com.mysql.cj.jdbc.Driver");
//        dataSource.setUrl("jdbc:mysql://hadoop102:3306/test?useUnicode=true&characterEncoding=utf-8");
        dataSource.setUrl("jdbc:mysql://localhost:3306/test?serverTimezone=GMT");
        dataSource.setUsername("root");
        dataSource.setPassword("自己的密码");
        return  dataSource.getConnection();
    }
}

package com.sunyb.test1;/*
@author Serenity
@create 2022-07-31-21:56
*/

import Pojo.Person;
import com.sunyb.test1.Util.DatabasesUtil;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
import java.sql.PreparedStatement;

@Slf4j
public class WriteMysqlSink extends RichSinkFunction<Person> {
    private Connection connection = null;
    private PreparedStatement ps = null;

    @Override
    public void open(Configuration parameters) throws Exception {
        log.info("获取数据库连接");
        //连接数据库
        connection = DatabasesUtil.getConnection();
        //关闭自动提交
        connection.setAutoCommit(false);
    }

    @Override
    public void invoke(Person value, Context ctx) throws Exception {
        // 获取发送过来的结果
        if (value == null){
            log.info("无法获取数据");
            return ;
        }
        String sql = "INSERT INTO employee(id, name) VALUES (?, ?);";
        ps = connection.prepareStatement(sql);
        ps.setInt(1, value.id);
        ps.setString(2, value.name);
        ps.execute();
        connection.commit();

        log.info("成功写入Mysql");

    }

    @Override
    public void close() throws Exception {
        //关闭并释放资源,从创建到销毁只会执行一次
        if(connection != null) {
            connection.close();
        }

        if(ps != null) {
            ps.close();
        }
    }
}

在练习时遇到的问题:
1、之前在kafka生产者发送消息的时候就对数据修改,数据计算处理还是交给flink比较快。–现在直接以String的数据形式放入生产者,数据处理部分全部由flink处理。

2、自定义sink,继承RichSinkFunction抽象类。–重写open()主要写数据库连接,需要关闭自动提交,如果不关闭,会卡在一条结果中,invoke()获取发送过来的数据写入数据库,close()关闭并释放资源。

3、集群上mysql的Name字段显示有问题,换成utf-8还是无法解决。—解决方法:检查Linux系统的默认字符集,使用zh-cn.utf8。检查IDEA的默认字符集。检查数据库的字符集。
最后发现我的问题:建库的时候字符集设置出错,删库删表,重新建库建表。

CREATE DATABASE test DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;

DROP TABLE IF EXISTS employee;
CREATE TABLE IF NOT EXISTS employee(
id INT,
`name` VARCHAR(100)
)ENGINE=INNODB DEFAULT CHARSET=utf8;

SELECT * FROM employee;

TRUNCATE TABLE employee;

4、连接数据库报错:
java.sql.SQLException: The server time zone value ‘�й���׼ʱ��’
is unrecognized or represents more than one time zone. You
must configure either the server or JDBC driver (via the
‘serverTimezone’ configuration property) to use a more
specifc time zone value if you want to utilize time zone
support.

错误原因:数据库和系统时区设置有区别,在jdbc的url后面加上?serverTimezone=GMT即可。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

SYBY

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值