起因
由于业务的特点有时候数据会需要复杂的查询逻辑才能得到,由于数据库本身对于复杂查询的支持不足,那么把数据同步到能力中间件就成了常用的解决方案,而同步到Elasticsearch就是其中一种。
方案选型
我们使用的数据源是MySql,选择同步到ES,想法是通过订阅Binlog实现,减少侵入性。 解决方案其实也有一些,比如:go-mysql-elasticsearch、canal、gravity
然而这些方案通常只支持1对1建索引,也就是一张表一个索引,而数据表中的1对1和1对多关系则无法在索引中体体现。而业务中又常需要这种关系。因此综合考虑还是基于gravity做个插件。
gravity是摩拜开源的数据同步中间件,目前数据源支持:MySql和Mongo,TiDB和PostgreSQL在开发中,同步目标支持:MySQL/TiDB和Kafka,Elasticsearch还在bate阶段,并且支持6.
EsModel同步插件
项目地址:gravity 欢迎star :)。
同步策略
- 支持主表及一对一和一对多字表的同步,可以同步到一个索引结构中。
- 一对一关系支持以平铺或子对象形式同步。
- 支持ES版本:6、7
例如有四张表:student
、student_class
、student_detail
、student_parent
其中student
是学生表(主表),student_class
学生班级(一对一子表),student_detail
学生详情(一对一子表),student_parent
学生父母(一对多子表)。student_class
使用子对象形式同步,student_detail
使用平铺形式同步。
Sql脚本如下:
DROP TABLE IF EXISTS `student`;
CREATE TABLE `student` (
`id` bigint(20) NOT NULL,
`name` varchar(64) NOT NULL DEFAULT '',
`birthday` date NOT NULL DEFAULT '1970-01-01',
`high` int(11) NOT NULL DEFAULT '0',
`sex` tinyint(4) NOT NULL DEFAULT '1',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of student
-- ----------------------------
BEGIN;
INSERT INTO `student` VALUES (1, '张三', '2010-02-05', 156, 1, '2019-08-29 19:55:36');
INSERT INTO `student` VALUES (2, '李四', '2010-03-05', 176, 2, '2019-08-29 19:55:36');
INSERT INTO `student` VALUES (3, '王平', '2010-03-05', 176, 2, '2019-08-29 20:09:03');
COMMIT;
-- ----------------------------
-- Table structure for student_class
-- ----------------------------
DROP TABLE IF EXISTS `student_class`;
CREATE TABLE `student_class` (
`id` bigint(20) NOT NULL,
`student_id` bigint(20) NOT NULL DEFAULT '0',
`name` varchar(255) NOT NULL DEFAULT '',
`student_count` int(11) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of student_class
-- ----------------------------
BEGIN;
INSERT INTO `student_class` VALUES (1, 1, '一(1)班', 32);
INSERT INTO `student_class` VALUES (2, 2, '二(2)班', 12);
COMMIT;
-- ----------------------------
-- Table structure for student_detail
-- ----------------------------
DROP TABLE IF EXISTS `student_detail`;
CREATE TABLE `student_detail` (
`id` bigint(20) NOT NULL,
`student_id` bigint(20) NOT NULL DEFAULT '0',
`introduce` varchar(255) NOT NULL DEFAULT '',
`mobile` varchar(64) NOT NULL DEFAULT '',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of student_detail
-- ----------------------------
BEGIN;
INSERT INTO `student_detail` VALUES (1, 1, '张三介绍', '18888888888');
INSERT INTO `student_detail` VALUES (2, 2, '李四介绍', '13333333333');
COMMIT;
-- ----------------------------
-- Table structure for student_parent
-- ----------------------------
DROP TABLE IF EXISTS `student_parent`;
CREATE TABLE `student_parent` (
`id` bigint(20) NOT NULL,
`student_id` bigint(20) NOT NULL DEFAULT '0',
`name` varchar(255) NOT NULL DEFAULT '',
`birthday` date NOT NULL DEFAULT '1970-01-01',
`sex` tinyint(11) NOT NULL DEFAULT '1',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of student_parent
-- ----------------------------
BEGIN;
INSERT INTO `student_parent` VALUES (1, 1, '张三父亲', '1980-02-02', 1, '2019-08-29 20:00:58');
INSERT INTO `student_parent` VALUES (2, 1, '张三母亲', '1982-07-07', 2, '2019-08-29 20:00:58');
INSERT INTO `student_parent` VALUES (3, 2, '李四父亲', '1979-03-03', 1, '2019-08-29 20:00:58');
INSERT INTO `student_parent` VALUES (4, 2, '李四母亲', '1981-06-06', 2, '2019-08-29 20:00:58');
COMMIT;
同步配置如下:
# name 必填
name = "mysql2esmodelDemo"
# 内部用于保存位点、心跳等事项的库名,默认为 _gravity
internal-db-name = "_gravity"
#
# Input 插件的定义,此处定义使用 mysql
#
[input]
type = "mysql"
mode = "replication"
[input.config.source]
host = "192.168.1.148"
username = "root"
password = "mysqldev"
port = 3306
max-idle = 10
max-open = 10
[output]
type = "esmodel"
[output.config]
# 忽略 400(bad request)返回
# 当索引名不规范、解析错误时,Elasticsearch 会返回 400 错误
# 默认为 false,即遇到失败时会抛出异常,必须人工处理。设置为 true 时会忽略这些请求
ignore-bad-request = true
#
# 目标端 Elasticsearch 配置
# - 必选
#
[output.config.server]
# 连接的 Elasticsearch 地址,必选
urls = ["http://192.168.1.152:9200"]
# 是否进行节点嗅探,默认为 false
sniff = false
# 超时时间,默认为 1000ms
timeout = 500
#
# 目标端鉴权配置
# - 可选
#
[output.config.server.auth]
username = ""
password = ""
[[output.config.routes]]
match-schema = "test"
# 主表
match-table = "student"
#索引名
index-name="student_index"
#类型名,es7该项无效
type-name="student"
#分片数
shards-num=1
#副本数
replicas-num=0
#失败重试次数
retry-count=3
#包含的列,默认全部
include-column = []
#排除的列,默认没有
exclude-column = []
# 列名转义策略
[output.config.routes.convert-column]
name = "studentName"
[[output.config.routes.one-one]]
match-schema = "test"
match-table = "student_detail"
#外键列
fk-column = "student_id"
#包含的列,默认全部
include-column = []
#排除的列,默认没有
exclude-column = []
# 模式,1:子对象,2索引平铺
mode = 2
# 属性对象名,模式为1时有效,默认为源表名驼峰结构
property-name = "studentDetail"
# 属性前缀,模式为2时有效,默认为源表名驼峰结构
property-pre = "sd_"
[output.config.routes.one-one.convert-column]
introduce = "introduceInfo"
[[output.config.routes.one-one]]
match-schema = "test"
match-table = "student_class"
#外键列
fk-column = "student_id"
#包含的列,默认全部
include-column = []
#排除的列,默认没有
exclude-column = []
# 模式,1:子对象,2索引平铺
mode = 1
# 属性对象名,模式为1时有效,默认为源表名驼峰结构
property-name = "studentClass"
# 属性前缀,模式为2时有效,默认为源表名驼峰结构
property-pre = "sc_"
[output.config.routes.one-one.convert-column]
name = "className"
[[output.config.routes.one-more]]
match-schema = "test"
match-table = "student_parent"
#外键列
fk-column = "student_id"
#包含的列,默认全部
include-column = []
#排除的列,默认没有
exclude-column = []
# 属性对象名,默认为源表名驼峰结构
property-name = "studentParent"
[output.config.routes.one-more.convert-column]
name = "parentName"
到项目根目录make
编译,执行同步命令
./bin/gravity -config ./docs/2.0/example-mysql2esmodel.toml
同步后索引结构为:
{
"state": "open",
"settings": {
"index": {
"creation_date": "1567160065596",
"number_of_shards": "1",
"number_of_replicas": "0",
"uuid": "noe_V-RdTr6QaFDy4fPRjA",
"version": {
"created": "7030199"
},
"provided_name": "student_index"
}
},
"mappings": {
"_doc": {
"properties": {
"birthday": {
"type": "date"
},
"studentParent": {
"type": "nested",
"properties": {
"birthday": {
"type": "date"
},
"parentName": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"create_time": {
"type": "date"
},
"sex": {
"type": "long"
},
"student_id": {
"type": "long"
},
"id": {
"type": "long"
}
}
},
"high": {
"type": "long"
},
"create_time": {
"type": "date"
},
"sex": {
"type": "long"
},
"studentName": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"sd_student_id": {
"type": "long"
},
"introduceInfo": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"sd_id": {
"type": "long"
},
"id": {
"type": "long"
},
"sd_mobile": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"studentClass": {
"properties": {
"sc_id": {
"type": "long"
},
"className": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"sc_student_count": {
"type": "long"
},
"sc_student_id": {
"type": "long"
}
}
}
}
}
}
}
数据样例为:
{
"_index": "student_index",
"_type": "_doc",
"_id": "2",
"_version": 5,
"_score": 1,
"_source": {
"studentClass": {
"className": "二(2)班",
"sc_id": 2,
"sc_student_count": 12,
"sc_student_id": 2
},
"sd_student_id": 2,
"introduceInfo": "李四介绍",
"sd_id": 2,
"sd_mobile": "13333333333",
"studentParent": [
{
"birthday": "1981-06-06T00:00:00+08:00",
"parentName": "李四母亲",
"create_time": "2019-08-29T20:00:58+08:00",
"sex": 2,
"student_id": 2,
"id": 4
},
{
"birthday": "1979-03-03T00:00:00+08:00",
"parentName": "李四父亲",
"create_time": "2019-08-29T20:00:58+08:00",
"sex": 1,
"student_id": 2,
"id": 3
}
],
"birthday": "2010-03-05T00:00:00+08:00",
"high": 176,
"create_time": "2019-08-29T19:55:36+08:00",
"sex": 2,
"studentName": "李四",
"id": 2
}
}