环境说明
操作系统:centos6.5
canal:1.1.4
elasticsearch:6.8.0
MYSQL:5.6
系统设置
1、设置最大文件句柄数
#往文件末端追加2行配置
vi /etc/security/limits.conf
* hard nofile 65536
* soft nofile 65536
2、设置用户的最大线程数
#往文件末端追加2行配置
vi /etc/security/limits.d/90-nproc.conf
* soft nproc 4096
root soft nproc unlimited
3、限制进程可以拥有的VMA(虚拟内存区域)的数量
#追加配置
vim /etc/sysctl.conf
vm.max_map_count=655360
#刷新系统配置
sysctl -p
4、添加 elasticsearch 用户
#添加用户
useradd elasticsearch
#为用户设置密码, 也可以不设置,但是要从root用户切换到elasticsearch用户
passwd elasticsearch
安装Elasticsearch
#下载安装包
wget https://xxxxx.aliyuncs.com/elasticsearch-6.8.10.tar.gz
tar xf elasticsearch-6.8.10.tar.gz -C /usr/local
chown -R elasticsearch.elasticsearch /usr/local/elasticsearch-6.8.10
#切换到 elasticsearch 用户
su - elasticsearch
#切换到es目录
cd /usr/local/elasticsearch-6.8.10
#修改配置
vim config/elasticsearch.yml
#集群名称
cluster.name: my-application
#当前节点主机名称,注意该主机名称需要在/etc/hosts有记录
node.name: es-node-1
#端口绑定的ip ,使用0.0.0.0 默认所有网卡的ip都可以访问
network.host: 0.0.0.0
#es集群的主机名称,如果有多个es节点,需要填写多个主机名称
discovery.zen.ping.unicast.hosts: ["es-node-1"]
#系统配置
bootstrap.memory_lock: false
bootstrap.system_call_filter: false
#es启动内存默认最小需要1G, 如果需要调整可以修改 config/jvm.options
vim config/jvm.options
-Xms1g
-Xmx1g
#第一次启动可以使用前台启动,这样有错误日志可以及时发现
./bin/elasticsearch
#前台启动成功,使用后台启动
./bin/elasticsearch -d
#检查是否启动成功
curl localhost:9200
{
"name" : "jdk8",
"cluster_name" : "my-application",
"cluster_uuid" : "I9J3ZwFWRY2tXB7MKEDoLw",
"version" : {
"number" : "6.8.10",
"build_flavor" : "default",
"build_type" : "tar",
"build_hash" : "537cb22",
"build_date" : "2020-05-28T14:47:19.882936Z",
"build_snapshot" : false,
"lucene_version" : "7.7.3",
"minimum_wire_compatibility_version" : "5.6.0",
"minimum_index_compatibility_version" : "5.0.0"
},
"tagline" : "You Know, for Search"
}
安装Kibana
#下载安装包
wget https://xxxxx.aliyuncs.com/kibana-6.8.0-linux-x86_64.tar.gz
tar xf kibana-6.8.0-linux-x86_64.tar.gz -C /usr/local
chown -R elasticsearch.elasticsearch /usr/local/kibana-6.8.0-linux-x86_64
#切换到 elasticsearch 用户
su - elasticsearch
cd /usr/local/kibana-6.8.0-linux-x86_64
#修改配置
vim config/kibana.yml
#绑定的网卡ip,使用0.0.0.0 默认所有网卡的ip都可以访问
server.host: "0.0.0.0"
#es节点信息, 如果集群填多个
elasticsearch.hosts: ["http://10.0.0.77:9200"]
#后台启动kibana
nohup ./bin/kibana &
访问kibana
mysql中创建测试库和测试表
create database mytest;
use mytest;
CREATE TABLE `user` (
`id` int(10) NOT NULL,
`name` varchar(100) DEFAULT NULL,
`role_id` int(10) NOT NULL,
`c_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
`c_utime` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
es创建目标索引
在kibana上建立索引
PUT /mytest_user
{
"mappings": {
"_doc": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"role_id": {
"type": "long"
},
"c_time": {
"type": "date"
}
}
}
}
}
安装Kafka
1、安装
安装单机版kafka
2、创建一个canal发送消息的主题
kafka-topics.sh --create --zookeeper localhost:2181 - --replication-factor 1 --partitions 1 --topic canal-mytest-user
安装Canal-server
canal-server的作用是伪装成Slave获取MySQL Master主库的binlog日志,并将binlog按照启动模式存储在消息队列或者本地文件中
安装前,需要授权 canal 链接 MySQL 账号需要具有作为 MySQL slave 的权限, 如果已有账户可直接使用,如果使用的是RDS,默认创建的账号已经拥有slave 的权限,可直接使用
#在MySQL创建拥有主从复制权限的账号
CREATE USER canal IDENTIFIED BY 'canal';
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'canal'@'%';
-- GRANT ALL PRIVILEGES ON *.* TO 'canal'@'%' ;
FLUSH PRIVILEGES;
#下载安装包
wget https://xxxxx.aliyuncs.com/canal.deployer-1.1.4.tar.gz
mkdir /usr/local/canal-server
tar xf canal.deployer-1.1.4.tar.gz -C /usr/local/canal-server
#修改配置文件
cd /usr/local/canal-server
#修改canal主配置文件
vim conf/canal.properties
#canal有tcp、kafka、RocketMQ 3种模式, 在这里我们使用kafka
canal.serverMode = kafka
#mq的ip配置
canal.mq.servers = 10.0.0.77:9092,10.0.0.77:9093
#修改实例配置
vim conf/example/instance.properties
#MySQL master节点ip端口
canal.instance.master.address=10.0.0.99:3306
#发送的主题名称
canal.mq.topic=canal-mytest-user
#第一次启动, 默认从position为0的位置开始拉取,但是binlog都会定期清,导致获取binlog失败,如果出现这种情况,下面这个配置可以指定从某个时间戳后开始拉取binlog日志
canal.instance.master.timestamp=
#设置MySQL拥有主从复制权限的账号
canal.instance.dbUsername=canal
canal.instance.dbPassword=canal
#过滤关注的库和表,下面这个表达式是关注所有库和表
#canal.instance.filter.regex=.*\\..*
#过滤关注库和表,在这里我们显示声明过滤出我们的测试和测试表
canal.instance.filter.regex=mytest.user
#启动canal
./bin/startup.sh
#检查是否启动成功
jps |grep CanalLauncher
20457 CanalLauncher
#检查日志输出是否正常
tail -500 ./logs/canal/canal.log
2020-06-05 12:37:58.427 [main] INFO com.alibaba.otter.canal.deployer.CanalController - ## start the canal server[10.0.0.77(10.0.0.77):11111]
2020-06-05 12:37:59.303 [main] INFO com.alibaba.otter.canal.deployer.CanalStarter - ## the canal server is running now ......
tail -500 ./logs/example/example.log
2020-06-07 00:51:22.452 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [canal.properties]
2020-06-07 00:51:22.455 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [example/instance.properties]
2020-06-07 00:51:22.587 [main] WARN o.s.beans.GenericTypeAwarePropertyDescriptor - Invalid JavaBean property 'connectionCharset' being accessed! Ambiguous write methods found next to actually used [public void com.alibaba.otter.canal.parse.inbound.mysql.AbstractMysqlEventParser.setConnectionCharset(java.nio.charset.Charset)]: [public void com.alibaba.otter.canal.parse.inbound.mysql.AbstractMysqlEventParser.setConnectionCharset(java.lang.String)]
2020-06-07 00:51:22.614 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [canal.properties]
2020-06-07 00:51:22.615 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [example/instance.properties]
2020-06-07 00:51:22.920 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start CannalInstance for 1-example
2020-06-07 00:51:22.926 [main] WARN c.a.o.canal.parse.inbound.mysql.dbsync.LogEventConvert - --> init table filter : ^.*\..*$
2020-06-07 00:51:22.927 [main] WARN c.a.o.canal.parse.inbound.mysql.dbsync.LogEventConvert - --> init table black filter :
2020-06-07 00:51:23.021 [destination = example , address = /10.0.0.99:3306 , EventParser] WARN c.a.o.c.p.inbound.mysql.rds.RdsBinlogEventParserProxy - ---> begin to find start position, it will be long time for reset or first position
2020-06-07 00:51:23.079 [main] INFO c.a.otter.canal.instance.core.AbstractCanalInstance - start successful....
2020-06-07 00:51:23.128 [destination = example , address = /10.0.0.99:3306 , EventParser] WARN c.a.o.c.p.inbound.mysql.rds.RdsBinlogEventParserProxy - prepare to find start position just last position
检测Canal-server是否正确投递binlog消息到kakfa
1、开启consumer,监听canal-mytest-user队列
#监听主题最新的消息
kafka-console-consumer.sh --bootstrap-server 10.0.0.77:9092 --topic canal-mytest-user
2、新增测试数据
INSERT INTO `mytest`.`user` (`id`, `name`, `role_id`, `c_time`, `c_utime`) VALUES ('1', '1', '2', '2020-06-07 21:14:52', '2020-06-07 21:14:52');
3、检测成功,增量数据有进到主题中
[root@jdk8 ~]# kafka-console-consumer.sh --bootstrap-server 10.0.0.77:9092 --topic canal-mytest-user
{"data":[{"id":"1","name":"1","role_id":"2","c_time":"2020-06-07 21:14:52","c_utime":"2020-06-07 21:14:52"}],"database":"mytest","es":1591535692000,"id":2,"isDdl":false,"mysqlType":{"id":"int(10)","name":"varchar(100)","role_id":"int(10)","c_time":"timestamp","c_utime":"timestamp"},"old":null,"pkNames":["id"],"sql":"","sqlType":{"id":4,"name":12,"role_id":4,"c_time":93,"c_utime":93},"table":"user","ts":1591535693361,"type":"INSERT"}
2、新增测试数据
安装Canal-adapter
Canal-adapter的作用主要是消费binlog日志内容,根据配置策略将binlog日志解析后同步到HBase、RDB、Elasticsearch
#下载安装包
wget https://xxxx.aliyuncs.com/canal.adapter-1.1.4.tar.gz
#解压
mkdir /usr/local/canal-adapter
tar xf canal.adapter-1.1.4.tar.gz -C /usr/local/canal-adapter
cd /usr/local/canal-adapter
#修改主配置文件
vim conf/application.yml
server:
port: 8081
spring:
jackson:
date-format: yyyy-MM-dd HH:mm:ss
time-zone: GMT+8
default-property-inclusion: non_null
canal.conf:
mode: kafka # kafka rocketMQ
canalServerHost: 127.0.0.1:11111
# zookeeperHosts: slave1:2181
mqServers: 10.0.0.77:9092 #or rocketmq
# flatMessage: true
batchSize: 500
syncBatchSize: 1000
retries: -1 #重试次数,-1为Integr.MAX_VALUE
timeout:
accessKey:
secretKey:
srcDataSources:
defaultDS:
url: jdbc:mysql://10.0.0.99:3306/mytest?useUnicode=true
username: root
password: 123456
canalAdapters:
- instance: canal-mytest-user # canal instance Name or mq topic name
groups:
- groupId: g1
outerAdapters:
- name: logger
# - name: rdb
# key: mysql1
# properties:
# jdbc.driverClassName: com.mysql.jdbc.Driver
# jdbc.url: jdbc:mysql://127.0.0.1:3306/mytest2?useUnicode=true
# jdbc.username: root
# jdbc.password: 121212
# - name: rdb
# key: oracle1
# properties:
# jdbc.driverClassName: oracle.jdbc.OracleDriver
# jdbc.url: jdbc:oracle:thin:@localhost:49161:XE
# jdbc.username: mytest
# jdbc.password: m121212
# - name: rdb
# key: postgres1
# properties:
# jdbc.driverClassName: org.postgresql.Driver
# jdbc.url: jdbc:postgresql://localhost:5432/postgres
# jdbc.username: postgres
# jdbc.password: 121212
# threads: 1
# commitSize: 3000
# - name: hbase
# properties:
# hbase.zookeeper.quorum: 127.0.0.1
# hbase.zookeeper.property.clientPort: 2181
# zookeeper.znode.parent: /hbase
- name: es
hosts: 10.0.0.77:9300
properties:
# mode: transport
# # security.auth: test:123456 # only used for rest mode
cluster.name: my-application
#修改es的配置文件
vim conf/es/mytest_user.yml
dataSourceKey: defaultDS
destination: canal-mytest-user
groupId: g1
esMapping:
_index: mytest_user
_type: _doc
_id: _id
upsert: true
# pk: id
#这个配置用于ETL全量更新要执行的sql
sql: "select a.id as _id, a.name, a.role_id, a.c_time from user a"
# objFields:
# _labels: array:;
# 这个配置主要是执行ETL全量更新要执行的条件语句,{} 是调用ETL接口的入参条件
etlCondition: "where a.c_time<={}"
commitBatch: 3000
#启动
./bin/startup.sh
测试全量更新
[root@jdk8 ~]# curl --request POST \
> --url http://localhost:8081/etl/es/mytest_user.yml \
> --form 'params=2020-06-07 10:56:51'
{"succeeded":true,"resultMessage":"导入ES 数据:1 条"}
测试增量更新
1、往数据表增加一条记录
INSERT INTO `mytest`.`user` (`id`, `name`, `role_id`, `c_time`, `c_utime`) VALUES ('3', 'third', '4', 'CURRENT_TIMESTAMP', 'CURRENT_TIMESTAMP');
2、在kibana检查es是否有增量数据进来
#发起请求
GET mytest_user/_search
{
"query": {
"match_all": {}
}
}