官方文档地址:GitHub - alibaba/DataX: DataX是阿里云DataWorks数据集成的开源版本。
安装
-
直接下载DataX工具包:DataX下载地址
下载后解压至本地某个目录,进入bin目录,即可运行同步作业:
$ cd {YOUR_DATAX_HOME}/bin $ python datax.py {YOUR_JOB.json}
自检脚本: python {YOUR_DATAX_HOME}/bin/datax.py {YOUR_DATAX_HOME}/job/job.json
python datax.py ../job/job.json
-
读取Mysql
{ "job": { "setting": { "speed": { "channel": 3 }, "errorLimit": { "record": 0, "percentage": 0.02 } }, "content": [ { "reader": { "name": "mysqlreader", "parameter": { "username": "root", "password": "123456", "column": [ "id", "name", "age", "gender", "clazz", "last_mod" ], "splitPk": "age", "connection": [ { "table": [ "student" ], "jdbcUrl": [ "jdbc:mysql://master:3306/student" ] } ] } }, "writer": { "name": "streamwriter", "parameter": { "print":true } } } ] } }
MysqlToHDFS
-
{ "job": { "setting": { "speed": { "channel": 3 }, "errorLimit": { "record": 0, "percentage": 0.02 } }, "content": [ { "reader": { "name": "mysqlreader", "parameter": { "username": "root", "password": "123456", "column": [ "id", "name", "age", "gender", "clazz", "last_mod" ], "splitPk": "age", "connection": [ { "table": [ "student" ], "jdbcUrl": [ "jdbc:mysql://master:3306/bigdata22" ] } ] } }, "writer": { "name": "hdfswriter", "parameter": { "defaultFS": "hdfs://master:9000", "fileType": "text", "path": "/datax/stu01", "fileName": "stu_data", "column": [ { "name": "id", "type": "BIGINT" }, { "name": "name", "type": "STRING" }, { "name": "age", "type": "BIGINT" }, { "name": "gender", "type": "STRING" }, { "name": "clazz", "type": "STRING" }, { "name": "last_mod", "type": "STRING" } ], "writeMode": "append", "fieldDelimiter": "\t" } } } ] } }
HbasetoMysql
-
{ "job": { "setting": { "speed": { "channel": 1 } }, "content": [ { "reader": { "name": "hbase11xreader", "parameter": { "hbaseConfig": { "hbase.zookeeper.quorum": "master:2181,node1:2181,node2:2181" }, "table": "stu", "encoding": "utf-8", "mode": "normal", "column": [ { "name": "rowkey", //此处必须写为rowkey "type": "string" }, { "name": "cf1:name", "type": "string" }, { "name": "cf1:age", "type": "string" }, { "name": "cf1:gender", "type": "string" }, { "name": "cf1:clazz", "type": "string" } ], "range": { "startRowkey": "", "endRowkey": "", "isBinaryRowkey": false } } }, "writer": { "name": "mysqlwriter", "parameter": { "writeMode": "insert", "username": "root", "password": "123456", "column": [ "id", "name", "age", "gender", "clazz" ], "preSql": [ "truncate student" ], "connection": [ { "jdbcUrl": "jdbc:mysql://master:3306/ETL?useUnicode=true&characterEncoding=gbk", "table": [ "student" ] } ] } } } ] } }
其他参考官方文档进行模拟写