Datax官网
GitHub - alibaba/DataX: DataX是阿里云DataWorks数据集成的开源版本。
每日脚本
#!/bin/bash
do_date=`date -d '-1 day' +%F`
/home/bigdata/module/datax/bin/datax.py -p "-Dday=${do_date}" test.json
配置文件test.json
{
"job": {
"setting": {
"speed": {
"channel": 3
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/user/hive/bigdata/default/表/load_date=$day/*",
"defaultFS": "hdfs://master:8020",
"column": [
{
"index": 0,
"type": "string"
}
],
"fileType": "orc",
"encoding": "UTF-8"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "账号",
"password": "密码",
"column": [
"mysql字段"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://ip:端口/库名?useSSL=true&useUnicode=true&characterEncoding=utf8",
"table": [
"mysql对应的表名"
]
}
]
}
}
}
]
}
}