flume 导数据从 kafka 到 mysql (二)

Flume 版本:1.8.0
Mysql 版本:8.0
Kafka 版本:1.0.1

一、创建 maven,编辑 连接 mysql 的插件

创建 maven 工程,自定义插件,供 flume 的sink 使用:

Pom.xml

<?xml version="1.0" encoding="UTF-8"?>


4.0.0

<groupId>com.jz.flume</groupId>
<artifactId>flumeMysql</artifactId>
<version>1.0-SNAPSHOT</version>
   <properties>        
   <maven.compiler.target>1.8</maven.compiler.target>
    <maven.compiler.source>1.8</maven.compiler.source>
    <version.flume>1.8.0</version.flume>
</properties>

<dependencies>
    <dependency>
        <groupId>org.apache.flume</groupId>
        <artifactId>flume-ng-core</artifactId>
        <version>1.8.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flume</groupId>
        <artifactId>flume-ng-configuration</artifactId>
        <version>1.8.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>8.0.15</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.json/json -->
    <dependency>
        <groupId>org.json</groupId>
        <artifactId>json</artifactId>
        <version>20180813</version>
    </dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
        <scope>compile</scope>
    </dependency>

    <dependency>
        <groupId>com.alibaba</groupId>
        <artifactId>fastjson</artifactId>
        <version>1.2.61</version>
    </dependency>


    <dependency>
        <groupId>net.sf.json-lib</groupId>
        <artifactId>json-lib</artifactId>
        <version>2.4</version>
        <classifier>jdk15</classifier>
    </dependency>

</dependencies>



</project>

MysqlSink.java

package com.jz.flume;


import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.net.URI;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;

import static java.lang.Thread.sleep;


public class MysqlSink extends AbstractSink implements Configurable {

private Logger LOG = LoggerFactory.getLogger(MysqlSink.class);
private String hostname;
private String port;
private String databaseName;
private String tableName;
private String column;
private String column_type;
private String sqlMsg;
private String user;
private String password;
private PreparedStatement preparedStatement;
private Connection conn;
private int batchSize;
String[] columns;
String[] columnTypes;
private String path;
private static Configuration configuration = null;

public MysqlSink() {
    LOG.info("MysqlSink start...");
}

public void configure(Context context) {
    hostname = context.getString("hostname");
    Preconditions.checkNotNull(hostname, "hostname must be set!!");
    port = context.getString("port");
    Preconditions.checkNotNull(port, "port must be set!!");
    databaseName = context.getString("databaseName");
    Preconditions.checkNotNull(databaseName, "databaseName must be set!!");
    tableName = context.getString("tableName");
    Preconditions.checkNotNull(tableName, "tableName must be set!!");
    column = context.getString("column");
    Preconditions.checkNotNull(column, "column must be set!!");
    column_type = context.getString("column_type");
    Preconditions.checkNotNull(column_type, "column_type must be set!!");
    sqlMsg = context.getString("sqlMsg");
    Preconditions.checkNotNull(sqlMsg, "sqlMsg must be set!!");
    user = context.getString("user");
    Preconditions.checkNotNull(user, "user must be set!!");
    password = context.getString("password");
    Preconditions.checkNotNull(password, "password must be set!!");
    batchSize = context.getInteger("batchSize", 1000);
    Preconditions.checkNotNull(batchSize > 0, "batchSize must be a positive number!!");
    path = context.getString("path");
    Preconditions.checkNotNull(path, "path must be set!!");


}

@Override
public void start() {
    super.start();
    // 获取 列 的字段内容
    columns = column.split(",");
    columnTypes = column_type.split(",");
    try {
        //调用Class.forName()方法加载驱动程序
        Class.forName("com.mysql.jdbc.Driver");
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    String url = "jdbc:mysql://" + hostname + ":" + port + "/" + databaseName;
    //调用DriverManager对象的getConnection()方法,获得一个Connection对象

    try {
        conn = DriverManager.getConnection(url, user, password);
        conn.setAutoCommit(false);
        //创建一个Statement对象
        //批量插入数据
        preparedStatement = conn.prepareStatement(sqlMsg, Statement.RETURN_GENERATED_KEYS);

    } catch (SQLException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

@Override
public void stop() {
    super.stop();
    //批量
    if (preparedStatement != null) {
        try {
            preparedStatement.close();
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }

    if (conn != null) {
        try {
            conn.close();
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }
}

public Status process() throws EventDeliveryException {
    Status result = Status.READY;
    Channel channel = getChannel();
    Transaction transaction = channel.getTransaction();
    Event event;
    int id = 0;
    List<JSONObject> jsonTake = new ArrayList<JSONObject>();
    int num = -1;


    //       event.getHeaders();
    transaction.begin();


    try {

        if (conn == null || conn.isClosed() == true) {
            String url = "jdbc:mysql://" + hostname + ":" + port + "/" + databaseName;
            conn = DriverManager.getConnection(url, user, password);

            sleep(10);
            transaction.rollback();

            return result;

        }

        preparedStatement.clearBatch();
        for (int i = 0; i < batchSize; i++) {

            event = channel.take();
            if (event != null) {//对事件进行处理
                //event 的 body 为   "exec tail$i , abel"
                String body = new String(event.getBody());
                JSONObject message = JSONObject.parseObject(String.valueOf(body));
                jsonTake.add(message);
                //根据类型和列名分辨事件的列和类型,分别保存到 mysql 里
                Object obj;


                for (int j = 0; j < columns.length; j++) {
                    obj = message.get(columns[j]);

                    //判断输入的数据中是否包含 mysql 表的字段,不包含就返回 null
                    if (obj != null) {
                    } else {
                        obj = "null";
                    }
                    if (columnTypes[j].equalsIgnoreCase("varchar")) {
                        preparedStatement.setString(j + 1, obj.toString().equalsIgnoreCase("null") ? "" : message.getString(columns[j]));
                    } else if (columnTypes[j].equalsIgnoreCase("integer")) {
                        preparedStatement.setInt(j + 1, obj.toString().equalsIgnoreCase("null") ? 0 : message.getInteger(columns[j]));
                    } else if (columnTypes[j].equalsIgnoreCase("float")) {
                        preparedStatement.setFloat(j + 1, obj.toString().equalsIgnoreCase("null") ? 0 : message.getFloat(columns[j]));
                    } else if (columnTypes[j].equalsIgnoreCase("date")) {
                        preparedStatement.setDate(j + 1, obj.toString().equalsIgnoreCase("null") ? null : message.getSqlDate(columns[j]));
                    } else {
                        preparedStatement.setString(j + 1, obj.toString().equalsIgnoreCase("null") ? null : message.getString(columns[j]));
                    }


                }

                preparedStatement.addBatch();

            } else {
                result = Status.BACKOFF;
                break;
            }

        }
        //按批次提交
        preparedStatement.executeBatch();
        conn.commit();
        transaction.commit();


        //获取本批次传入的数据,以json 的格式打印
        if (jsonTake.size() > 0) {//加个判断,保证没有数据传入的时候不会调动回滚
            ResultSet rs = preparedStatement.getGeneratedKeys();
            while (rs.next()) {
                num++;
                id = rs.getInt(1);//查询 mysql 的自增长主键
                JSONObject msg = (JSONObject) jsonTake.get(num);//jsonobject 格式,拿到事物中的数据加入主键后打印
                msg.put("id :", id);
                System.out.println(msg);
            }
            rs.close();
        }

    } catch (Exception e) {
        LOG.error("Failed to commit transaction." + "Transaction rolled back.", e);

        transaction.commit();

        // 当数据格式不规范时,将本批次的数据取到放入本地指定文件中

        try {
            File file = new File(path);
            file.createNewFile();
            FileWriter fw = new FileWriter(file, true);

            for (int v = 0; v < jsonTake.size(); v++) {
                JSONObject msg = jsonTake.get(v);
                if (msg != null) {
                    fw.write(msg.toJSONString());//写入本地文件中
                    fw.write("\r\n");

                }
            }
//                FileOutputStream files = new FileOutputStream(path);
//                BufferedWriter fw=new BufferedWriter(files);
            fw.flush();
            fw.close();

        } catch (IOException ex) {
            ex.printStackTrace();
        }
    } finally {
        transaction.close();
    }

    return result;
}


}

然后把 maven 工程打成 jar 包,丢到 flume 的lib 目录,还要把 mysql 的连接包放入mysql-connector-java

二、flume 编辑 conf

Kafka_mysql.conf

agent1.sources = s1
agent1.sinks = mysqlSink
agent1.channels = c1


agent1.sources.s1.type = org.apache.flume.source.kafka.KafkaSource
agent1.sources.s1.channels = c1
agent1.sources.s1.batchSize = 100
agent1.sources.s1.batchDurationMillis = 1000
agent1.sources.s1.kafka.bootstrap.servers = 127.0.0.1:6667
agent1.sources.s1.kafka.topics = katest
agent1.sources.s1.kafka.consumer.group.id = ka1
agent1.sources.s1.inputCharset = GBK

agent1.sinks.mysqlSink.type=com.jz.flume.MysqlSink
#agent1.sinks.mysqlSink.type=logger
#agent1.sinks.mysqlSink.path=jdbc:mysql://127.0.0.1:6645/etltest/flume_test
agent1.sinks.mysqlSink.hostname=127.0.0.1
agent1.sinks.mysqlSink.port=6645
agent1.sinks.mysqlSink.databaseName=etltest
agent1.sinks.mysqlSink.tableName=flume_test
agent1.sinks.mysqlSink.column=content,create_by,recv_time
agent1.sinks.mysqlSink.column_type=varchar,varchar,datetime
agent1.sinks.mysqlSink.sqlMsg =insert into flume_test(content,create_by,recv_time) values (?,?,?)
agent1.sinks.mysqlSink.user=etl01
agent1.sinks.mysqlSink.password=*********
agent1.sinks.mysqlSink.channel = c1



agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapactiy = 100
agent1.channels.c1.checkpointDir = /usr/flume/checkpoint
agent1.channels.c1.dataDirs = /usr/flume/data
agent1.channels.c1.keep-alive = 30

Conf 文件里面的 sink 列要和 maven 工程里面 MysqlSink.java 类定义的参数相对应,sqlMsg 因为是在 conf 文件写,所以 java 代码里面不需要定义,不同的表直接写不同的 conf 文件就好

三、mysql 建表

Mysql 建表,列内容要和 conf 文件里面的 column 定义的相同

create table flume_test(id integer primary key auto_increment,content varchar(20),create_by text,recv_time date);

四、启动 flume

启动flume

./bin/flume-ng agent -c /data/flume/apache-flume-1.8.0-bin/conf -f /data/flume/apache-flume-1.8.0-bin/conf/kafka_mysql.conf -n agent1  -Dflume.root.logger=INFO,console

向 kafka 中传递数据,查mysql 表中的数据

在这里插入图片描述

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Flume 是一个分布式、可靠和高可用的海量日志采集、聚合和传输系统。它可以将来自多个数据源的数据采集到 Hadoop 生态系统中,其中包括 HDFS、HBase、Kafka 等。 下面是一个将本地数据 CSV 文件上传至 MySQL 的 employee 表的 Flume 配置示例: 1. 首先,创建一个名为 `csv_to_mysql.conf` 的 Flume 配置文件,内容如下: ```properties # 定义 Flume agent 名称和组件 csv_to_mysql.sources = csv_source csv_to_mysql.sinks = mysql_sink csv_to_mysql.channels = memory_channel # 配置数据源 csv_to_mysql.sources.csv_source.type = spooldir csv_to_mysql.sources.csv_source.spoolDir = /path/to/csv/files csv_to_mysql.sources.csv_source.fileHeader = true csv_to_mysql.sources.csv_source.fileSuffix = .csv csv_to_mysql.sources.csv_source.batchSize = 100 # 配置数据传输管道 csv_to_mysql.channels.memory_channel.type = memory csv_to_mysql.channels.memory_channel.capacity = 1000 csv_to_mysql.channels.memory_channel.transactionCapacity = 100 # 配置数据传输目的地 csv_to_mysql.sinks.mysql_sink.type = org.apache.flume.sink.jdbc.JDBCSink csv_to_mysql.sinks.mysql_sink.jdbc.url = jdbc:mysql://localhost:3306/your_database csv_to_mysql.sinks.mysql_sink.jdbc.user = your_username csv_to_mysql.sinks.mysql_sink.jdbc.password = your_password csv_to_mysql.sinks.mysql_sink.jdbc.driver.class = com.mysql.jdbc.Driver csv_to_mysql.sinks.mysql_sink.batchSize = 100 csv_to_mysql.sinks.mysql_sink.channel = memory_channel csv_to_mysql.sinks.mysql_sink.sql = insert into employee (id, name, age, gender) values (?, ?, ?, ?) csv_to_mysql.sinks.mysql_sink.channel = memory_channel ``` 2. 接下来,使用以下命令启动 Flume: ```bash $ bin/flume-ng agent --conf conf --conf-file csv_to_mysql.conf --name csv_to_mysql -Dflume.root.logger=INFO,console ``` 3. 最后,将 CSV 文件复制到 `/path/to/csv/files` 目录中,Flume 将自动将其上传到 MySQL 的 employee 表中。 在上传过程中,Flume 将会读取 CSV 文件中的数据,将其转换为插入语句,然后将其批量提交到 MySQL 数据库中。 注意:在执行此操作之前,请确保已经创建了名为 employee 的表,并正确配置了 MySQL 数据库连接信息。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值