背景
临时需求:全量同步1亿 数据到 hive。
中间遇到的问题
1、查询mysql账户受限制,每次查询超过5秒,断开链接
2、dba给一个不受限制的账号,测试发现受数据库每次查询数据超过1G限制
3、经尝试各种datax参数不起作用后,写循环同步
#! /bin/bash
mkdir /tmp/
m=1
n=1000000
for ((i=1; i<=103; i++))
do
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>循环第 $i 次"
cat>/tmp/mysql_2_hive_test_$i.json<<EOF
{
"job":{
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"username":"user",
"password":"password",
"readSize": 1000,
"connection":[
{
"jdbcUrl":[
"jdbc:mysql://ip:3306/db?useSSL=false"
],
"querySql":[
"SELECT * FROM table where id between $m and $n;"
]
}
]
},
"setting": {
"speed": 1000,
"fetchSize": 1000
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"column":[
{
"name":"id",
"type":"bigint"
},
{
"name":"userinfoid",
"type":"bigint"
},
{
"name":"createtime",
"type":"bigint"
},
{
"name":"modifytime",
"type":"string"
}
],
"defaultFS":"hdfs://prod-emr-110",
"path":"/usr/hive/warehouse/mid.db/pay_transfer",
"fieldDelimiter":"\t",
"fileType":"text",
"fileName":"pay_transfer",
"writeMode":"append"
}
}
}
],
"setting":{
"speed":{
"channel":"1"
}
}
}
}
EOF
echo "end create file <<<<<<"
((m=$m+1000000))
((n=$n+1000000))
python /data/DataX/bin/datax.py /tmp/mysql_2_hive_test_$i.json
echo "endcreatefile <<<<<<"
sleep 10 # 根据每次同步时间设定一定间隔,防止mysql或执行机负载过高
done
另外一个没有select 的
{
"job":{
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"column":[
"id",
"userinfoId",
"createTime",
"modifyTime"
],
"username":"user",
"password":"password",
"readSize":1000,
"connection":[
{
"jdbcUrl":[
"jdbc:mysql://ip:3306/db?useSSL=false"
],
"table":[
"transfer"
]
}
]
},
"setting":{
"speed":1000,
"fetchSize":1000
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"column":[
{
"name":"id",
"type":"bigint"
},
{
"name":"userinfoid",
"type":"bigint"
},
{
"name":"createtime",
"type":"bigint"
},
{
"name":"modifytime",
"type":"string"
}
],
"defaultFS":"hdfs://prod-emr-110",
"path":"/usr/hive/warehouse/mid.db/pay_transfer",
"fieldDelimiter":"\t",
"fileType":"text",
"fileName":"pay_transfer",
"writeMode":"append"
}
}
}
],
"setting":{
"speed":{
"channel":"1"
}
}
}
}
如有问题,请指正