准备适用于clickhouse的reader(rdbmsreader)
1、把clickhousewriter/libs下的所有jar包复制到rdbmsreader/libs下,同名jar包直接替换
2、删掉rdbmsreader/libs/guava-r05.jar这个包
3、在rdbmsreader/plugin.json文件的"driver" 加"ru.yandex.clickhouse.ClickHouseDriver"
4、使用rdbmsreader进行数据读取
注:RDBMS Reader是一个通用的关系数据库读插件,可以通过添加、注册数据库驱动等方式增加各种关系型数据库的读支持。
整理datax的json脚本
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0
}
},
"content": [
{
"reader": {
"name": "rdbmsreader",
"parameter": {
"username": "**",
"password": "**",
"column": ["**"],
"connection": [
{"jdbcUrl": ["jdbc:clickhouse://***.**.**.**:8123/data_backup"],
"table": ["**"]
} ],
"where": "id>=${start} and id<${end}"
}
},
"writer": {
"name": "starrockswriter",
"parameter": {
"username": "**",
"password": "**",
"database": "mth_beacon",
"table": "beacon_request_meta_data_parse_result_bak3",
"column": ["**"],
"preSql": [],
"postSql": [],
"jdbcUrl": "jdbc:mysql://***.**.***.***:9030",
"loadUrl": [
"***.**.***.***:9030",
"***.**.***.***:9030",
"***.**.***.***:9030"
],
"loadProps": {
"column_separator": "\\x03",
"row_delimiter": "\\x02"
}
}
}
}
]
}
}
调用json脚本的shell脚本
#!/bin/bash
pace=5000000
for i in {0..600000000..5000000}
do
end=$(($i + $pace))
python /bigdata/datax/datax/bin/datax.py --jvm="-Xms1G -Xmx1G" -p"-Dstart=$i -Dend=$end" /bigdata/datax/datax/job/ck2strs.json>>/bigdata/datax/datax/log/ck2strs/${i}.log
done
调用shell脚本
nohup sh ck2strs.sh >a.log 2>& 1 &