从oracle导出数据为文本格式
代码
https://github.com/reader-sword/oracle-transform-TDH
驱动包位置:
oracle-drives.rar
https://download.csdn.net/my
#!/bin/bash
touch /mnt/oracle_transport/sqoop.log
echo "" > /mnt/oracle_transport/sqoop.log
i=0
cat table.txt |while read line
do
#db=`echo $line | awk -F "." '{print $1}'`
table=`echo $line `
echo " -------------------------------------------begin hdfs dfs -rm ${table}------------------------------------------"
#>> ./sqoop/${table}.log
hdfs dfs -rm -r /tmp/data/${table}
# --fields-terminated-by "指定分隔符"
#在原有sqoop语句中添加 --hive-drop-import-delims 可以将如mysql中取到的\n, \r, and \01等特殊字符丢弃
# 在从mysql(或者别的RDBMS)导入数据到hdfs后会发现原来在mysql中字段值明明是NULL, 为什么到hive查询后 where field is null 会没有结果呢,然后通过检查一看 居然NULL值都变成了字段串'null'。
# 其实你在导入的时候加上以下两个参数就可以解决了,
# --null-string '\\N'
# --null-non-string '\\N'
#这里要注意一点。 在hive里面。NULL是用\N来表示的。
echo " ---------------------------------------------begin sqoop import ${table} --------------------------------------------------"
sqoop import --connect jdbc:oracle:thin:@10.20.1.140:1521:orcl --username *** --password **** --query " select * from ${table} where \$CONDITIONS" --target-dir /tmp/data/${table} -m 1 --fields-terminated-by "\001" --hive-drop-import-delims --null-string '\\N' --null-non-string '\\N'
let i++
echo "${table} $i finished" >>/mnt/oracle_transport/sqoop.log
echo " -------------------------------------- sqoop import finished ${table} $i--------------------------------------------"
done
创建外表
#!/bin/bash
cat table.txt |while read line
do
echo "DROP TABLE IF EXISTS JIAXING_PRE_MEACH.${line}_EXT;
CREATE EXTERNAL TABLE JIAXING_PRE_MEACH.${line}_EXT (
ROWKEY STRING, NUM INT, COUNTRY INT, RD INT
)
row format delimited fields terminated by \"\\001\" location '/tmp/data/${line}';" >/mnt/oracle_transport/create_ext/create_ext_${line}.sql
echo "-------------------------------------create_ext_${line}.sql finished ----------------------------------------------"
done
执行建外表命令并创建orc表
#!/bin/bash
i=0
rm /mnt/oracle_transport/create_ext_orc_all.log
touch /mnt/oracle_transport/create_ext_orc_all.log
cat table.txt |while read line
do
beeline -u jdbc:hive2://10.20.1.144:10000 -f /mnt/oracle_transport/create_ext/create_ext_${line}.sql
statement="DROP TABLE IF EXISTS JIAXING_PRE_MEACH.${line}_ORC;
CREATE TABLE JIAXING_PRE_MEACH.${line}_ORC STORED AS ORC AS SELECT * FROM JIAXING_PRE_MEACH.${line}_EXT;select count(*) from JIAXING_PRE_MEACH.${line}_ORC;"
beeline -u jdbc:hive2://10.20.1.144:10000 -e "$statement"
let i++
echo "JIAXING_PRE_MEACH.${line}_ORC $i finished" >>/mnt/oracle_transport/create_ext_orc_all.log
echo "--------------------------------JIAXING_PRE_MEACH.${line}_ORC $i finished---------------------------------------------"
done