#! /bin/sh
parallel_count=16
mapred_job_queue_name=queuetest
#sqlserver prd
data_db_url="jdbc:sqlserver://localhost:1433;DatabaseName=test"
data_db_username=sa
data_db_password=123456
mssql_table="dbo.test"
hive_table="test"
mssql_table_array=($mssql_table)
hive_table_array=($hive_table)
length=${#mssql_table_array[@]}
echo "#########################[total has ${length} table ]#########################"
for ((i=0; i<$length; i++))
do
mtable=${mssql_table_array[$i]}
htable=${hive_table_array[$i]}
echo "#########################[ `(expr 1 + ${i})` ${mtable}, ${htable} begin]#########################"
data_dir=/apps-data/user/sqlexp/${mtable}
echo "#########################[export data to temp dir ${data_dir}]#########################"
hive -e "
insert overwrite directory '${data_dir}'
select * from ${htable} ;
";
echo "#########################[truncate table]#########################"
sqoop eval \
-D mapred.job.queue.name=$mapred_job_queue_name \
--connect $data_db_url \
--username $data_db_username \
--password $data_db_password \
--verbose \
--e " truncate table ${mtable};"
echo "#########################[export data to buffer table]#########################"
sqoop export \
-D mapred.task.timeout=0 \
--connect $data_db_url \
--username $data_db_username \
--password $data_db_password \
--table ${mtable} \
--export-dir ${data_dir} \
--input-null-string '\\N' \
--input-null-non-string '\\N' \
--input-fields-terminated-by '\001' \
-m ${parallel_count}
echo "#########################[delete temp dir ${data_dir}]#########################"
hadoop fs -rmr ${data_dir}
echo "#########################[ `(expr 1 + ${i})` ${mtable}, ${htable} finished ]#########################"
done
parallel_count=16
mapred_job_queue_name=queuetest
#sqlserver prd
data_db_url="jdbc:sqlserver://localhost:1433;DatabaseName=test"
data_db_username=sa
data_db_password=123456
mssql_table="dbo.test"
hive_table="test"
mssql_table_array=($mssql_table)
hive_table_array=($hive_table)
length=${#mssql_table_array[@]}
echo "#########################[total has ${length} table ]#########################"
for ((i=0; i<$length; i++))
do
mtable=${mssql_table_array[$i]}
htable=${hive_table_array[$i]}
echo "#########################[ `(expr 1 + ${i})` ${mtable}, ${htable} begin]#########################"
data_dir=/apps-data/user/sqlexp/${mtable}
echo "#########################[export data to temp dir ${data_dir}]#########################"
hive -e "
insert overwrite directory '${data_dir}'
select * from ${htable} ;
";
echo "#########################[truncate table]#########################"
sqoop eval \
-D mapred.job.queue.name=$mapred_job_queue_name \
--connect $data_db_url \
--username $data_db_username \
--password $data_db_password \
--verbose \
--e " truncate table ${mtable};"
echo "#########################[export data to buffer table]#########################"
sqoop export \
-D mapred.task.timeout=0 \
--connect $data_db_url \
--username $data_db_username \
--password $data_db_password \
--table ${mtable} \
--export-dir ${data_dir} \
--input-null-string '\\N' \
--input-null-non-string '\\N' \
--input-fields-terminated-by '\001' \
-m ${parallel_count}
echo "#########################[delete temp dir ${data_dir}]#########################"
hadoop fs -rmr ${data_dir}
echo "#########################[ `(expr 1 + ${i})` ${mtable}, ${htable} finished ]#########################"
done
echo "#########################[${length} table export finished ]#########################"
Sqoop跨集群导入:在集群1上通过oozie调度Sqoop将数据导入集群2的HBase
sqoop import \
-D hbase.zookeeper.quorum=n3 \
-D hbase.zookeeper.property.clientPort=2181 \
-D hbase.master=n1:60000 \
--connect "jdbc:mysql://localhost:3306/dotcms2" \
--username root \
--password 123456 \
--verbose \
--target-dir /home/user/root/20150316/ \
--fields-terminated-by '\001' \
--lines-terminated-by '\n' \
-m 1 \
--e "select * from virtual_link where 1=1 and \$CONDITIONS" \
--hbase-table virtual_link \
--hbase-row-key inode \
--hbase-create-table \
--column-family cf \
--null-string '\\N' \
--null-non-string '\\N'