使用datax将mysql数据同步到elasticsearch 使用elasticsearchwriter插件


一、DataX介绍

DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、DRDS 等各种异构数据源之间高效的数据同步功能。
项目地址: https://github.com/alibaba/DataX.

二、使用步骤

1.datax job

job.json文件如下(示例):

{
    "job": {
        "setting": {
            "speed": {
                 "channel": 3
            },
            "errorLimit": {
                "record": 0,
                "percentage": 0.02
            }
        },
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "username": "root",
                        "password": "123456",
                        "column": ["*"],
                        "splitPk": "id",
                        "connection": [
                            {
                                "table": [
                                    "student"
                                ],
                                "jdbcUrl": [
     "jdbc:mysql://localhost:3306/graduation?useUnicode=true&characterEncoding=UTF-8&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=Asia/Shanghai"
                                ]
                            }
                        ]
                    }
                },
               "writer": {
				  "name": "elasticsearchwriter",
				  "parameter": {
					"endpoint": "http://localhost:9200",
					"accessId": "elastic",
					"accessKey": "123456",
					"index": "graduation",
					"type": "_doc",
					"settings": {"index" :{"number_of_shards": 1, "number_of_replicas": 0}},
					"cleanup": false,
					"discovery": false,
					"batchSize": 1000,
					"column": [
					  {"name": "id", "type": "id"},
					  {"name": "teacher_id","type": "long" },
					  {"name": "student_no","type": "keyword" },
					  {"name": "name", "type": "text" },
					  {"name": "birthdate", "type": "date", "format": "yyyy-MM-dd"},
					  {"name": "status", "type": "keyword" },
					  {"name": "del_flag", "type": "keyword" },
					  {"name": "create_time", "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },
					  {"name": "remark", "type": "text" }
					]
				  }
				}
            }
        ]
    }
}

2.elasticsearchwriter插件

datax官方文档有elasticsearch的介绍,插件文件夹下却没有elasticsearchwriter插件。

a.下载源码手动编译。

$ git clone git@github.com:alibaba/DataX.git

b.项目打包只保留需要的模块

<modules>
    <module>common</module>
    <module>core</module>
    <module>transformer</module>

    <!-- reader -->
    <module>mysqlreader</module>

    <!-- writer -->
    <module>elasticsearchwriter</module>

    <!-- common support module -->
    <module>plugin-rdbms-util</module>
    <module>plugin-unstructured-storage-util</module>
    <module>hbase20xsqlreader</module>
    <module>hbase20xsqlwriter</module>
    <module>kuduwriter</module>
</modules>

将打包好的文件放至插件文件夹下的elasticsearchwriter内

c.新建plugin.json文件

代码如下(示例):

{
    "name": "elasticsearchwriter",
    "class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ESWriter",
    "description": "xxx",
    "developer": "alibaba"
}

d.将需要的lib包放至libs文件夹下

插件文件目录

<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.3.2" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.1.46.sec10" level="project" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.1.1" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.10" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.0.13" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.0.13" level="project" />
<orderEntry type="library" name="Maven: io.searchbox:jest-common:2.4.0" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:19.0" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.6.2" level="project" />
<orderEntry type="library" name="Maven: io.searchbox:jest:2.4.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.2" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.9" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.1" level="project" />
<orderEntry type="library" name="Maven: joda-time:joda-time:2.9.7" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.13.1" level="project" />
<orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />

需要的lib包

三、附件

elasticsearchwriter插件: 下载地址.
elasticsearchwriter插件lib包: 下载地址.

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值