使用 DataX 同步数据
安装 DataX
- JDK1.8 以上 python2.6 以上 Maven3.x 以上
- 相关说明文档:https://github.com/alibaba/DataX
下载DataX源码文件
$ git clone https://github.com/alibaba/DataX.git
$ cd DataX
$ mvn install
# datax的家目录在DataX/target/datax/datax
$ ls DataX/target/datax/datax
bin conf job lib plugin script tmp
同步数据
#创建好指定的json文件如:postgresql2es.json
python DataX/target/datax/datax/bin/datax.py postgresql2es.json
Json 文件示例
- postgresql 同步到 elasticsearch
{
"job": {
"setting": {
"speed": {
"byte": 4096,
"channel": 32
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "postgresXL",
"password": "xxxxxxx",
"connection": [
{
"querySql": [
"SELECT * FROM \"WRYPC-test\"" //可筛选字段,需要和es的column对应
],
"jdbcUrl": ["jdbc:postgresql://192.168.1.1:5432/tag_test"]
}
]
}
},
"writer": {
"name": "elasticsearchwriter",
"parameter": {
"endpoint": "http://192.168.1.1:13451",
"accessId": "elastic",
"accessKey": "xxxxxx",
"index": "tag_test", // 索引名称
"type": "doc", // 文档类型
"cleanup": false,
"aliasMode": "exclusive",
"dynamic": false,
"settings": {
"index": {
"number_of_shards": 0, // 分片数量
"number_of_replicas": 1 // 副本数量
}
},
"batchSize": 10000,
"splitter": ",",
"column": [
{
"name": "id",
"type": "keyword"
},{
"name": "unit_name",
"type": "keyword"
},{
"name": "community",
"type": "keyword"
},{
"name": "address",
"type": "keyword"
},{
"name": "construction_type",
"type": "keyword"
},{
"name": "location",
"type": "keyword"
}
]
}
}
}]
}
}