Background
- 作为时许数据库,从设计原则上来说,为了提高写入和查询的性能,在数据删除、更新等操作上进行了严格的限制,influxdb人家也是不提倡对数据进行删除修改操作的,大数据Hadoop上亦是如此。具体就不深入探讨,这里写删除的脚本确实业务上有这样的要求(那个数据有问题啊、当时传感器受到波动、客户不想看到这样的数据等等,当然这些问题通过数据预处理可以解决,但是怎么解决也需要提前说啊,通常是看到数据有问题才说有问题。。),官方的相关命令又不太友善,所以开发个脚本一劳永逸(其实是上个脚本的改进版哈)。
- Influxdb官方是支持根据tag或时间进行批量删除,不支持根据field进行批量删除,这里field条件的批量删除实际也是根据时间一条一条删除的【速度大概50条/秒】,所以建议最多几千条的field条件,太多时间久还耗服务性能。
- 该脚本每次删除数据会在数据库
db_backup
备份数据,表名为【原表名_有删除权限的操作人_时间戳】,恢复数据时需要给这个表名。 - 这个脚本自己用的,可能通用性不那么好,有bug提,尽量改进。
influxdb-delete.sh
#!/bin/bash
:<<!
【脚本说明】
1、此脚本适用根据field条件批量删除数据,有两个必传参数;
2、参数一:操作的数据库;
3、参数二:条件sql;
4、注意:
1)第二个参数sql请用双引号;
2)sql中不能用*号;
3)第二个参数中若有时间格式请用标准格式(yyyy-MM-dd HH:mm:ss.SSS)
4)官方支持根据时间或tag进行批量删除,不支持field条件批量删除,这里其实是根据时间一条条删除的。
!
script_name=$0
type=$1
db=$2
sql=$3
s1="【Delete Function】"
s2="\n If there are fields in the condition, please input params 【field <db> <\"select_sql\">】"
s3="\n If there are no fields in the condition, please input params 【timetag <db> <\"delete_sql\">】"
s4="\n eg.【sh $script_name field db_gkc_mix \"select time,v1 from raw_yb where pid = '49' and time >= '2021-06-04T07:40:00+08:00' and abs(v1) < 16\"】"
s5="\n eg.【sh $script_name timetag db_gkc_mix \"delete from raw_yb where pid = '49' and time >= '2021-06-04T07:40:00+08:00'\"】"
s6="\n ps.【The sql time format must be [yyyy-MM-ddTHH:mm:ss.SSS+08:00]】"
s7="\n ps.【This script will automatically back up the data to the db_backup database before deleting it】"
s8="\n\n 【Restore Function】"
s9="\n If restore data, please input params 【restore <srctbl> <dstdb>】"
s10="\n eg.【sh $script_name restore raw_yb_wlf_20210707110522 db_gkc_mix】"
msg="$s1$s2$s3$s4$s5$s6$s7$s8$s9$s10"
function custom_print(){
echo -e "\033[5;34m ***** \033[0m"
echo -e "\033[32m $@ ! \033[0m"
echo -e "\033[5;34m ***** \033[0m"
}
function get_tbl(){
t1="${sql#*FROM}"
t2=${t1%WHERE*}
echo $t2
}
function get_utc(){
suffix='+08:00'
chz_time=$@
utc_time=${chz_time/[[:space:]]/T}$suffix
echo $utc_time
}
function auth(){
read -r -p "你是谁?请输入你的姓名首字母:" input
case $input in
[dhq|lsc|wlf|xca|zcy])
msg="Auth success"
custom_print $msg
;;
*)
msg="Sorry, you don't have the permission"
custom_print $msg
exit 1
;;
esac
}
function get_bk_tbl(){
now=`date +'%Y%m%d%H%M%S'`
mid="_"
tbl_bk=$(get_tbl)$mid$1$mid$now
echo $tbl_bk
}
function backup_data(){
username=$1
dstdb="db_backup"
influx -username admin -password '123456' -execute "CREATE DATABASE $dstdb"
dsttbl=$(get_bk_tbl $username)
srcdb=$db
srctbl=$(get_tbl)
wer="WHERE "
sql_suffix="${sql#*WHERE}"
condition=$wer$sql_suffix
sql_bk="SELECT * INTO \"$dstdb\".\"autogen\".\"$dsttbl\" FROM \"$srcdb\".\"autogen\".\"$srctbl\" $condition GROUP BY *"
influx -username admin -password '123456' -execute "$sql_bk" > /dev/null
}
function get_dst_tbl(){
srctbl=$db
usertbl=${srctbl%_*}
dsttbl=${usertbl%_*}
echo $dsttbl
}
function delete_by_time(){
starttime=`date +'%Y-%m-%d %H:%M:%S'`
declare -i num=0
declare -i remain_count=0
echo -e "\033[?25l\c"
for time in $(cat aa.txt)
do
influx -username admin -password '123456' -database "$db" -execute "delete from $tbl where time = $time"
num=$((num+1))
remain_count=$((record_size-num))
echo -e "\r\033[32m$remain_count \033[0m\c"
done
endtime=`date +'%Y-%m-%d %H:%M:%S'`
start_seconds=$(date --date="$starttime" +%s)
end_seconds=$(date --date="$endtime" +%s)
echo -e "\033[?25h"
echo "耗时:"$((end_seconds-start_seconds))" s"
}
if [[ -z $type || -z $db || -z $sql || $type = "help" ]]; then
custom_print $msg
elif [[ $type = "field" ]]; then
tbl=$(get_tbl)
influx -username admin -password '123456' -database "$db" -execute "$sql" > a.txt
awk '/16/ {print $1}' a.txt > aa.txt
record_size=$(cat aa.txt | wc -l)
msg="查询到符合条件的记录数为:$record_size"
custom_print $msg
if [[ $record_size -gt 0 ]]; then
while true
do
read -r -p "确认删除这$record_size条记录么? [Y/n] " input
case $input in
[yY][eE][sS]|[yY])
auth
backup_data $input
echo -e "deleting\033[5;32m... \033[0m"
delete_by_time
rm -rf a.txt aa.txt b.txt bb.txt
echo "Finshed !"
exit 1
;;
[nN][oO]|[nN])
echo "Cancel delete !"
rm -rf a.txt aa.txt b.txt bb.txt
exit 1
;;
*)
echo "Invalid input..."
;;
esac
done
else
rm -rf a.txt aa.txt
fi
elif [[ $type = "timetag" ]]; then
delete_sql=$sql
select_sql=${delete_sql/delete/select}
influx -username admin -password '123456' -database "$db" -execute "$select_sql" > a.txt
record_size=`awk '/16/ {print $2}' a.txt`
msg="查询到符合条件的记录数为:$record_size"
custom_print $msg
if [[ $record_size -gt 0 ]]; then
while true
do
read -r -p "确认删除这$record_size条记录么? [Y/n] " input
case $input in
[yY][eE][sS]|[yY])
auth
backup_data $input
influx -username admin -password '123456' -database "$db" -execute "$delete_sql"
echo "Finshed !"
exit 1
;;
[nN][oO]|[nN])
echo "Cancel delete !"
rm -rf a.txt
exit 1
;;
*)
echo "Invalid input..."
;;
esac
done
else
rm -rf a.txt
fi
elif [[ $type = "restore" ]]; then
srcdb="db_backup"
srctbl=$db
dstdb=$sql
dsttbl=$(get_dst_tbl)
sql_rs="SELECT * INTO \"$dstdb\".\"autogen\".\"$dsttbl\" FROM \"$srcdb\".\"autogen\".\"$srctbl\" GROUP BY *"
influx -username admin -password '123456' -execute "$sql_rs" > /dev/null
msg='Restore success'
custom_print $msg
fi