环境准备
集群情况
10.40.10.246 dbtest1
10.40.10.247 dbtest2
10.40.10.248 dbtest3
配置mysql
修改my.cnf,加入如下配置:
[mysqld]
server_id=1
log-bin=master
binlog_format=row
重启mysql, sevice mysql restart
修改数据库,支持maxwell
mysql> set global binlog_format=ROW;
mysql> set global binlog_row_image=FULL;
mysql> GRANT ALL on maxwell.* to 'maxwell'@'%' identified by 'XXXXXX';
mysql> GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE on *.* to 'maxwell'@'%';
# or for running maxwell locally:
mysql> GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE on *.* to 'maxwell'@'localhost' identified by 'XXXXXX';
mysql> GRANT ALL on maxwell.* to 'maxwell'@'localhost';
安装并启动maxwell
wget https://github.com/zendesk/maxwell/releases/download/v1.17.1/maxwell-1.17.1.tar.gz
tar -zxvf maxwell-1.17.1.tar.gz
cd maxwell-1.17.1
bin/maxwell --user='maxwell' --password='XXXXXX' --host='127.0.0.1' --producer=stdout
安装zookeeper和kafka
zookeeper
https://blog.csdn.net/yuandiyzy1987/article/details/81564267
kafka
https://blog.csdn.net/yuandiyzy1987/article/details/81564292
安装flume
flume
https://blog.csdn.net/yuandiyzy1987/article/details/81564322
启动
启动maxwell
启动maxwell将mysql实时数据写入kafka
- 创建kafka topic
bin/kafka-topics.sh --create --zookeeper dbtest1:2181,dbtest2:2181,dbtest3:2181/kafka --replication-factor 3 --partitions 5 --topic my-replicated-topic5
- 查询topic信息
bin/kafka-topics.sh --describe --zookeeper dbtest1:2181,dbtest2:2181,dbtest3:2181/kafka --topic my-replicated-topic5
- 启动maxwell
bin/maxwell --user='maxwell' --password='XXXXXX' --host='127.0.0.1' \
--producer=kafka --kafka.bootstrap.servers=localhost:9092 --kafka_topic=my-replicated-topic5
验证mysql实时数据写入kafka
在另一个终端,启动Consumer,并订阅我们上面创建的名称为my-replicated-topic5的Topic中生产的消息,执行如下脚本:
bin/kafka-console-consumer.sh --zookeeper dbtest1:2181,dbtest2:2181,dbtest3:2181/kafka --from-beginning --topic my-replicated-topic5
往mysql中写入数据,查看Consumer是否有数据输出。
关闭Consumer终端
启动flume
通过flume事实消费kafka中的数据,并写入HDFS
bin/flume-ng agent --conf conf --conf-file conf/flume-agent.properties --name a1 -Dflume.root.logger=INFO,console
conf/flume-agent.properties 的配置如下:
#定义agent名, source、channel、sink的名称
agent.sources = r1
agent.channels = c1
agent.sinks = k1
#具体定义source
# 定义消息源类型
agent.sources.r1.type = org.apache.flume.source.kafka.KafkaSource
# 定义kafka所在zk的地址
agent.sources.r1.zookeeperConnect = dbtest1:2181,dbtest2:2182,dbtest3:2183
agent.sources.r1.kafka.bootstrap.servers = dbtest1:9092,dbtest2:9093,dbtest3:9094
agent.sources.r1.brokerList = dbtest1:9092,dbtest2:9093,dbtest3:9094
# 配置消费的kafka topic
agent.sources.r1.topic = my-replicated-topic5
#agent.sources.r1.kafka.consumer.timeout.ms = 100
# 配置消费者组的id
agent.sources.r1.kafka.consumer.group.id = flume
#自定义拦截器
#agent.sources.r1.interceptors=i1
#agent.sources.r1.interceptors.i1.type=com.hadoop.flume.FormatInterceptor$Builder
#具体定义channel
# channel类型
agent.channels.c1.type = memory
# channel存储的事件容量
agent.channels.c1.capacity = 10000
# 事务容量
agent.channels.c1.transactionCapacity = 100
#具体定义sink
agent.sinks.k1.type = hdfs
agent.sinks.k1.hdfs.path = hdfs://dbtest1:8020/test/%Y%m%d
agent.sinks.k1.hdfs.fileType = DataStream
agent.sinks.k1.hdfs.writeFormat = Text
agent.sinks.k1.hdfs.rollInterval = 3
agent.sinks.k1.hdfs.rollSize = 1024000
agent.sinks.k1.hdfs.rollCount = 0
#配置前缀和后缀
agent.sinks.k1.hdfs.fileSuffix=.data
agent.sinks.k1.hdfs.filePrefix = localhost-%Y-%m-%d
agent.sinks.k1.hdfs.useLocalTimeStamp = true
agent.sinks.k1.hdfs.idleTimeout = 60
#避免文件在关闭前使用临时文件
#agent.sinks.k1.hdfs.inUserPrefix=_
#agent.sinks.k1.hdfs.inUserSuffix=
#组装channels
agent.sources.r1.channels = c1
agent.sinks.k1.channel = c1
验证
写入mysql数据,查看HDFS下数据是否实时生成
[root@dbtest1 hadoop-2.6.0-cdh5.10.2]# bin/hdfs dfs -ls -R /
18/08/10 09:44:45 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
drwxr-xr-x - root supergroup 0 2018-08-10 09:43 /test
drwxr-xr-x - root supergroup 0 2018-08-09 18:58 /test/20180809
-rw-r--r-- 3 root supergroup 440 2018-08-09 18:58 /test/20180809/localhost-2018-08-09.1533812290352
drwxr-xr-x - root supergroup 0 2018-08-10 09:44 /test/20180810
-rw-r--r-- 3 root supergroup 440 2018-08-10 09:43 /test/20180810/localhost-2018-08-10.1533865430145
-rw-r--r-- 3 root supergroup 330 2018-08-10 09:44 /test/20180810/localhost-2018-08-10.1533865472399