1.准备增量导入脚本incrSyncTask.sh
按日期增量导入
#!/bin/bash
. /etc/profile
#开始日期格式20210101
begin_date=$1
#结束日期
end_date=$2
# 读库的IP
r_ip=" 读库的ip"
# 读库的端口
r_port="读库的port"
# 服务名称
r_dbname="服务名称"
# 读库的账号
r_username="账号"
# 读库的密码
r_password="密码"
#分区
#r_partition=$1
# 写库的IP
w_ip="写库的IP"
# 写库的端口
w_port="写库的端口"
# 写库的数据库名称
w_dbname="写库的数据库名称"
# 写库的账号
w_username="写库的账号"
# 写库的密码
w_password="写库的密码"
while [ "$begin_date" -le "$end_date" ];
do
act_date=$(date -d "${begin_date}" +%Y-%m-%d)
python /opt/datax/bin/datax.py --jvm="-Xms8G -Xmx8G" /opt/datax/job/incr_job.json -p "-Dr_ip=$r_ip -Dr_port=$r_port -Dr_dbname=$r_dbname -Dr_username=$r_username -Dr_password=$r_password -Dr_partition=$act_date -Dw_ip=$w_ip -Dw_port=$w_port -Dw_dbname=$w_dbname -Dw_username=$w_username -Dw_password=$w_password "
begin_date=$(date -d "${begin_date}+1days" +%Y%m%d)
done
2.准备DataX的incr_job.json文件
{
"job": {
"setting": {
"speed": {
"channel": 5
}
},
"content": [
{
"reader": {
"name": "oraclereader",
"parameter": {
"username": "${r_username}",
"password": "${r_password}",
"connection": [
{
"querySql": ["SELECT ID,ROWKEY,。。。。。。PASS_TIME FROM 表 PARTITION FOR ( to_date('${r_partition}','yyyy-MM-dd'))" ],
"jdbcUrl": [
"jdbc:oracle:thin:@//${r_ip}:${r_port}/${r_dbname}"
]
}
]
}
},
"writer": {
"name": "starrockswriter",
"parameter": {
"username": "${w_username}",
"password": "",
"database": "${w_dbname}",
"table": "表名",
"column": [
"ID",
"ROWKEY",
。。。。。。
"PASS_TIME"
],
"preSql": [ ],
"postSql": [ ],
"jdbcUrl": "jdbc:mysql://${w_ip}:${w_port}/",
"loadUrl": [
"ip地址:8030",
"ip地址:8030"
],
"loadProps": {
"column_separator": "\\x01",
"row_delimiter": "\\x02"
}
}
}
}
]
}
}
3.执行脚本incrSyncTask.sh 开始日期 结束日期