hbase shell 或者脚本删除数据一般都根据rowkey进行操作,因此,此处还是要先根据时间戳范围找出rowkey,然后进行deleteall 操作,以下简单分四步进行操作
#第一步:通过时间戳找到要删除的数据
#第二步:构建删除数据的shell
#第三步:给delete_all_by_rowkey.sh 加可执行权限 执行删除shell
#!/bin/bash -l
echo '--------------程序从这里开始------------'
# ${1} ns:table_name
# ${2} columns
# ${3} ttl
# ${4} stop_date
# ${5} start_time : if ${4} do not input, the start time is defaults to 0;
table_name=${1}
columns=${2}
ttl=${3}
stop_date=${4}
start_date=0
if [ $# -eq 5 ];then
start_date=${5}
fi
echo "
table_name : ${table_name}
columns : ${columns}
ttl : ${ttl}
stop_date : ${stop_date}
start_date : ${start_date}
"
base_path=$(cd `dirname $0`; pwd)
echo '---------------正在创建缓存文件夹--------------'
mkdir -p ${base_path}/cache_of_delete/${table_name}/
touch ${base_path}/cache_of_delete/${table_name}/rowkey.txt
touch ${base_path}/cache_of_delete/${table_name}/delete_all_by_rowkey.sh
# #######第一步:通过时间戳找到要删除的数据
# 注:这里只有rowkey和其中一列,因为目的是找到rowkey
echo " scan '${table_name}',{COLUMNS=>'${columns}', TIMERANGE=>[${start_date},${stop_date}]}" | hbase shell | grep 'column' | grep 'timestamp' |awk '{print $1}' > ${base_path}/cache_of_delete/${table_name}/rowkey.txt
# ######第二步:构建删除数据的shell
echo '#!/bin/bash -l ' > ${base_path}/cache_of_delete/${table_name}/delete_all_by_rowkey.sh
echo 'exec hbase shell <<EOF ' >> ${base_path}/cache_of_delete/${table_name}/delete_all_by_rowkey.sh
cat ${base_path}/cache_of_delete/${table_name}/rowkey.txt|awk '{print "deleteall '\'${table_name}\''", ",", "'\''"$1"'\''"}' >> ${base_path}/cache_of_delete/${table_name}/delete_all_by_rowkey.sh
echo "EOF " >> ${base_path}/cache_of_delete/${table_name}/delete_all_by_rowkey.sh
# ########第三步:给delete_all_by_rowkey.sh 加可执行权限 执行删除shell
chmod +x ${base_path}/cache_of_delete/${table_name}/delete_all_by_rowkey.sh
#sh ${base_path}/cache_of_delete/${table_name}/delete_all_by_rowkey.sh >> ${base_path}/cache_of_delete/${table_name}/delete.log
# ##### 第四步: 修改hbase的TTL值
echo '#!/bin/bash -l ' > ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
echo 'exec hbase shell <<EOF ' >> ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
echo 'desc '${table_name}' '>> ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
echo 'disable '${table_name}' ' >> ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
echo 'alter '${table_name}', { NAME=>'f',TTL=>'${ttl}'} '>> ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
echo 'enable '${table_name}' ' >> ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
echo 'desc '${table_name}' '>> ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
echo "EOF " >> ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
chmod +x ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
#sh ${base_path}/cache_of_delete/${table_name}/alter_ttl.sh
echo '---------------正在删除缓存文件夹--------------'
#rm -rf ${base_path}/cache_of_delete/${table_name}/delete_all_by_rowkey.sh
echo '--------------程序到这里结束------------'