1.自定义MysqlSink类
package flume;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
public class MysqlSink extends AbstractSink implements Configurable {
// 获取Logger对象
Logger logger = LoggerFactory.getLogger(MysqlSink.class);
private Connection connect;
private Statement stmt;
private String columnName;
private String url;
private String user;
private String password;
private String tableName;
// 从配置文件中读取各种属性,并进行一些非空验证
public void configure(Context context) {
// 检查列名
columnName = context.getString("column_name");
Preconditions.checkNotNull(columnName, "column_name must be set!");
// 检查url
url = context.getString("url");
Preconditions.checkNotNull(url, "url must be set!");
// 检查用户名
user = context.getString("user");
Preconditions.checkNotNull(user, "user must be set!");
// 检查密码
password = context.getString("password");
Preconditions.checkNotNull(password, "password must be set!");
// 检查表名
tableName = context.getString("tableName");
Preconditions.checkNotNull(tableName, "tableName must be set!!");
}
// 在整个sink开始时执行一遍,用来初始化数据库连接
@Override
public synchronized void start() {
super.start();
try {
Class.forName("com.mysql.cj.jdbc.Driver");
} catch (ClassNotFoundException e1) {
e1.printStackTrace();
}
try {
connect = DriverManager.getConnection(url, user, password);
// URL为 jdbc:mysql//服务器地址/数据库名
stmt = connect.createStatement();
} catch (SQLException e) {
e.printStackTrace();
}
}
public Status process() throws EventDeliveryException {
//获取Channel
Channel channel = getChannel();
//从Channel获取事务以及数据
Transaction transaction = channel.getTransaction();
Event event = null;
transaction.begin();
while (true) {
//从Channel获取数据
event = channel.take();
if (event != null) {
break;
}
}
try {
String rawbody = new String(event.getBody());
String[] bodys = rawbody.split("\t");
String value = "";
for(int i = 0;i < bodys.length;i++) {
if(!bodys[i].matches("[0-9]*\\.?[0-9]+")) {
bodys[i] = "\"" + bodys[i] + "\"";
}
if(i == bodys.length - 1) {
value += bodys[i];
} else {
value += bodys[i] + ",";
}
}
if (bodys.length == columnName.split(",").length) {
String sql = "insert into " + tableName + "(" + columnName + ") values(" + value + ")";
stmt.executeUpdate(sql);
transaction.commit();
return Status.READY;
} else {
transaction.rollback();
return null;
}
} catch (Throwable t) {
transaction.rollback();
if (t instanceof Error) {
throw (Error) t;
} else {
throw new EventDeliveryException(t);
}
} finally {
transaction.close();
}
}
// 在整个sink结束时执行一遍
@Override
public synchronized void stop() {
super.stop();
}
}
将以上代码打成jar包后,上传到flume安装目录下的lib文件夹中,同时需要上传对应MySQL版本的mysql驱动jar包(如mysql-connector-java-5.1.49.jar)。
2.编写flume配置文件
在flume安装目录下的conf文件夹中编写配置文件file-flume-mysql.conf,代码如下:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = TAILDIR
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /root/data/.*0
a1.sources.r1.positionFile = /usr/flume/flume-1.9.0/position/ffl_position.json
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#Describe the sink
a1.sinks.k1.type = flume.MysqlSink
a1.sinks.k1.url = jdbc:mysql://101.34.32.156:3306/db_hadoop?useUnicode=true&characterEncoding=UTF-8
a1.sinks.k1.tableName = tb_task13
a1.sinks.k1.user = root
a1.sinks.k1.password = 9525878
a1.sinks.k1.column_name = cityName,sales
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
3.启动flume
回到flume安装目录,linux命令行使用如下命令:
flume-ng agent -c conf/ -f conf/file-flume-mysql.conf -n a1