工作中使用datax同步数据比较多,直接上示例
- 同步mysql 到 doris
python datax.py xxx.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"connection": [
{
"querySql": [
"SELECT xx,xx,xx from xx where xx"
],
"jdbcUrl": [
"jdbc:mysql://host:port/db"
]
}
],
"password": "pwd",
"username": "user",
"where": ""
}
},
"writer": {
"name": "doriswriter",
"parameter": {
"column": [
"xx",
"xx",
"xx",
"xx",
"xx",
"xx",
"xx"
],
"loadUrl":[
"host:httport"
],
"loadProps":{
"format":"json",
"strip_outer_array":"true"
},
"username":"doris_user",
"password":"doris_pwd",
"postSql":[
],
"preSql":[
],
"connection":[
{
"jdbcUrl":"jdbc:mysql://doris_host:doris_mysql_port/db",
"table":[
"table"
],
"selectedDatabase":"db"
}
],
"maxBatchRows":10000000,
"batchSize":536870912000,
"writeMode": "truncate"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
同步mongo 到 doris
python datax.py xxx.json
查询条件使用: "query": "{ \"col\": 'xx' }",
{
"job": {
"content": [
{
"reader": {
"name": "mongodbreader",
"parameter": {
"address": ["mongodb1:mongo_port","mongodb2:mongo_port"],
"userName": "user",
"userPassword": "pwd",
"dbName": "db",
"authDb": "admin",
"collectionName": "table",
"query": "{ \"col\": 'value' }",
"column": [
{
"name": "xx",
"type": "xx"
},
{
"name": "xx",
"type": "xx"
},
{
"name": "xx",
"type": "xx"
}
]
}
},
"writer": {
"name": "doriswriter",
"parameter": {
"column": [
"xx",
"xx",
"xx"
],
"loadUrl":[
"doris_host:doris_http_port"
],
"loadProps":{
"format":"json",
"strip_outer_array":"true"
},
"username":"doris_user",
"password":"doris_pwd",
"postSql":[
],
"preSql":[
],
"connection":[
{
"jdbcUrl":"jdbc:mysql://doris_host:doris_mysql_port/db",
"table":[
"table"
],
"selectedDatabase":"db"
}
],
"maxBatchRows":100000,
"batchSize":536870912
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
datax动态传参
python datax.py test.json -p "-Dyesterday='date -d "0 days ago" +"%Y%m%d"'"
#mongo 同步到doris中使用json
"query": "{ \"day\": '${yesterday}' }"
datax 同步数据mongo时间格式数据与同步后时区存在相差比较
datax 启动的时候调用 -Duser.timezone=xx时区
或者直接修改datax配置的
/datax/conf/core.json
修改时区:"timeZone": "GMT",