文章目录
一、DataX介绍
DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、DRDS 等各种异构数据源之间高效的数据同步功能。
项目地址: https://github.com/alibaba/DataX.
二、使用步骤
1.datax job
job.json文件如下(示例):
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": ["*"],
"splitPk": "id",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://localhost:3306/graduation?useUnicode=true&characterEncoding=UTF-8&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=Asia/Shanghai"
]
}
]
}
},
"writer": {
"name": "elasticsearchwriter",
"parameter": {
"endpoint": "http://localhost:9200",
"accessId": "elastic",
"accessKey": "123456",
"index": "graduation",
"type": "_doc",
"settings": {"index" :{"number_of_shards": 1, "number_of_replicas": 0}},
"cleanup": false,
"discovery": false,
"batchSize": 1000,
"column": [
{"name": "id", "type": "id"},
{"name": "teacher_id","type": "long" },
{"name": "student_no","type": "keyword" },
{"name": "name", "type": "text" },
{"name": "birthdate", "type": "date", "format": "yyyy-MM-dd"},
{"name": "status", "type": "keyword" },
{"name": "del_flag", "type": "keyword" },
{"name": "create_time", "type": "date", "format": "yyyy-MM-dd HH:mm:ss" },
{"name": "remark", "type": "text" }
]
}
}
}
]
}
}
2.elasticsearchwriter插件
datax官方文档有elasticsearch的介绍,插件文件夹下却没有elasticsearchwriter插件。
a.下载源码手动编译。
$ git clone git@github.com:alibaba/DataX.git
b.项目打包只保留需要的模块
<modules>
<module>common</module>
<module>core</module>
<module>transformer</module>
<!-- reader -->
<module>mysqlreader</module>
<!-- writer -->
<module>elasticsearchwriter</module>
<!-- common support module -->
<module>plugin-rdbms-util</module>
<module>plugin-unstructured-storage-util</module>
<module>hbase20xsqlreader</module>
<module>hbase20xsqlwriter</module>
<module>kuduwriter</module>
</modules>
将打包好的文件放至插件文件夹下的elasticsearchwriter内
c.新建plugin.json文件
代码如下(示例):
{
"name": "elasticsearchwriter",
"class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ESWriter",
"description": "xxx",
"developer": "alibaba"
}
d.将需要的lib包放至libs文件夹下
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.3.2" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.1.46.sec10" level="project" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.1.1" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.10" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.0.13" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.0.13" level="project" />
<orderEntry type="library" name="Maven: io.searchbox:jest-common:2.4.0" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:19.0" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.6.2" level="project" />
<orderEntry type="library" name="Maven: io.searchbox:jest:2.4.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.2" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.9" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.1" level="project" />
<orderEntry type="library" name="Maven: joda-time:joda-time:2.9.7" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.13.1" level="project" />
<orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
三、附件
elasticsearchwriter插件: 下载地址.
elasticsearchwriter插件lib包: 下载地址.