今天,咱来说说awk的用法,前面加几句,一般写shell脚本会干的事。
#!/bin/sh 脚本一般得加这句
PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin #sh存放路径
export PATHawk用法:
#取日期中的年,通过 awk中的substr实现
day=`date +%Y%m%d -d '-1 hours' | awk '{print substr($0, 3, length($0))}'`
#将按空格分成的多个域,循环右移,再按数字递减排序cat $res_file | awk '{printf("%s ",$NF); for(i=1;i<NF;i++)printf("%s ", $i); printf("\n")}' | sort -nr > $send_file
#接下来,这段code是结合awk命令将数据入库
awk 'BEGIN{
printf("delete from tmp_table where date='$sDate';\n") >> "'$sqlFile'"
printf("load data local infile '\'''$sqlLoadFile''\'' into table `tmp_table` fields terminated by '\'','\'' (client_name, `channel`, `count`, `date`)\n") >> "'$sqlFile'"
}{
if( ($1 ~/^[0-9,a-z,A-Z]+$/) && ($2 ~/^[0-9,a-z,A-Z,.]+$/)){
pv[$1, $2]++
#printf("%s,%s,%s,'$sDate'\n", $1, $2, $3) >> "'$sqlLoadFile'"
}
}
END{
for(item in pv){
split(item, strs, SUBSEP)
appid = strs[1]
channelid = strs[2]
printf("%s,%s,%d,'$sDate'\n", appid, channelid, pv[item]) >> "'$sqlLoadFile'"
}
}' $tmpDir/channel*.log
mysql -P3307 -ucmtadmin -pcmtadmin -S /opt/data/mysql/3307/my3307.sock -Dgreenskin_tmp<$sqlFile
#远程执行 awk命令一定要记得加转义符"\",如\$1表示分割后的域1
ssh -o StrictHostKeyChecking=no nginx@$host \
" cat $ngxlog | awk '!/mobile/&&/uvstat/&&!/cy2ziwlQkxIt/&&!/cyr5vMK8b/&&!/=_/&&/category/' | \
awk -F '&' '{\
if(substr(\$2,1,5) == "ntime") { print \$3, \$4 }
else { print \$2, \$3} }' | \
awk -F '[ =]' '{print \$2, \$4}' | \
awk -F '[ ;|]' '{for(i=2;i<=NF;i++) print \$1, \$i}' > $channellog "
awk -F '[=:&]' 可以指定多个不同的分割符。