awk字符串、常用选项、数组、脚本

 1、以:为分隔符,返回/etc/passwd中每行每个字段的长度。
    awk -f example_1.awk /etc/passwd
    cat example_1.awk 
    BEGIN{
            FS=":"
    }
    
    {
            i=1
            while(i<=NF)
            {
                    if(i==NF)
                            printf "%d",length($i)
                    else
                            printf "%d:",length($i)
                    i++
            }
            print ""
    }
2、搜索字符串"I have a dream"中出现"ea"字符串的位置。
    awk 'BEGIN{str="I have a dream";location=index(str,"ea");print location}'
    awk 'BEGIN{str="I have a dream";location=match(str,"ea");print location}'
3、将字符串"Hadoop is a bigdata Framework"全部转换为小写。
    awk 'BEGIN{str="Hadoop is a bigdata Framework";print tolower(str)}'    
4、将字符串"Hadoop is a bigdata Framework"全部转换为大写。
    awk 'BEGIN{str="Hadoop is a bigdata Framework";print toupper(str)}'
5、将字符串"Hadoop Kafka Spark Storm HDFS YARN Zookeeper",按照空格为分隔符,分隔每部分保存到数组arrary中
    awk 'BEGIN{str="Hadoop Kafka Spark Storm HDFS YARN Zookeeper";split(str,arr," ");for(a in arr) print arr[a]}'
6、搜索字符串"Tranction 2345 Start:Select * from master"第一个数字出现的位置。
    awk 'BEGIN{str="Tranction 2345 Start:Select * from master";location=match(str,/[0-9]/);print location}'
7、截取字符串"tranction start"的子串,截取条件从第4个字符开始,截取5位。
    awk 'BEGIN{str="Tranction 2345 Start:Select * from master";print substr(str,4,5)}'
8、替换字符串"Tranction 2345 Start,Event ID:9002"中第一个匹配到的字符串为$符号。
    awk 'BEGIN{str="Tranction 2345 Start,Event ID:9002";count=sub(/[0-9]+/,"$",str);print count,str}'

awk中常用选项
    -v    定义或引用变量
    -f    指定awk命令文件
    -F    指定分隔符
    -V    查看awk的版本号
    
    num1=20    var="hello world"
    awk -v num2=$num1 -v var1="$var" 'BEGIN{print num2,var1}'
    推荐:awk引用变量时加双引号""
    
    BEGIN{
    str="Tranction 2345 Start,Event ID:9002"
    count=sub(/[0-9]+/,"$",str)
    print count,str
    }
    awk -f 1.awk
    
    awk -F ":" '{print $7}' /etc/passwd
    
    awk -V

shell中数组的用法:
    arrary=("Allen","Lousie","Messi","Jerry","Zhangsan","Lisi")
    
    打印元素:        echo ${arrary[0]}
    打印元素个数:    echo ${#arrary[@]}
    打印元素长度:    echo ${#arrary[3]}
    给元素赋值:    arrary[3]="Wang"
    删除元素:        unset arrary[2];unset arrary
    分片访问:        echo ${arrary[@]:1:3}
    元素内容替换:    ${arrary[@]/e/E} 只替换第一个e;${arrary[@]//e/E} 替换所有e
    数组的遍历:
        for a in ${arrary[@]}
        do
            echo $a

awk中数组用法:
1、统计主机上所有TCP连接状态数,按照每个TCP状态分类。
netstat -an|grep tcp|awk '{arrary[$6]++}END{for (a in arrary) print a,arrary[a]}'

2、计算横向数据总和,计算纵向数据总和;
# student.txt
allen   80  90  87  91
mike    78  86  93  96
Kobe    66  92  82  78
Jerry   98  74  66  54
Wang    87  21  100 43


awk 'BEGIN{printf "%-10s%-10s%-10s%-10s%-10s\n","Name","Chinese","Math","English","Physical"}{total=$2+$3+$4+$5;printf "%-10s%-10d%-10d%-10d%-10d%-10d%-10d\n",$1,$2,$3,$4,$5,total}' student.txt

#或者
vim student.awk

BEGIN{
    printf "%-10s%-10s%-10s%-10s%-10s\n","Name","Chinese","Math","English","Physical","Total"
}

{
    total=$2+$3+$4+$5
    chinese_sum+=$2
    math_sum+=$3
    english_sum+=$4
    physical_sum+=$5
    printf "%-10s%-10d%-10d%-10d%-10d%-10d%-10d\n",$1,$2,$3,$4,$5,total
}
END {
    printf "%-10s%-10d%-10d%-10d%-10d\n","",chinese_sum,math_sum,english_sum,physical_sum
}

3、打印平均分超过90分同学各科目的成绩。
cat student.txt 
Name    Chinese    English      Math    Physical        Average
Allen   80         90           96      98
Mike    93         98           92      91
Zhang   78         76           87      92
Jerry   86         89           68      92
Han     85         95           75      90
Li      78         88           98      100

cat student.awk 
BEGIN{
        printf "%-10s%-10s%-10s%-10s%-10s%-10s\n","NAME","Chinese","English","Math","Physical","Average"
}
{
        total=$2+$3+$4+$5
        avg=total/4
        if(avg>90)
        {
                printf "%-10s%-10d%-10d%-10d%-10d%-0.2f\n",$1,$2,$3,$4,$5,avg
                score_chinese+=$2
                score_english+=$3
                score_math+=$4
                score_physical+=$5
        }
}
END{
        printf "%-10d%-10d%-10d%-10d%-10d\n","",score_chinese,score_english,score_math,score_physical
}

需求描述:利用awk处理日志,并生成结果报告。
生成数据脚本insert.sh,内容如下:
#!/bin/bash
#

function create_random()
{
    min=$1
    max=$(($2-$min+1))
    num=$(date +%s%N)
    echo $(($num%$max+$min))
}

INDEX=1

while true
do
    for user in Allen Mike Jerry Tracy Hanmeimei Lilei
    do
        COUNT=$RANDOM
        NUM1=`create_random 1 $COUNT`
        NUM2=`expr $COUNT - $NUM`
        echo "`date '+%Y-%m-%d %H:%M:%S'` $INDEX Batches: user $user INSERT $COUNT records into database:product table:detail, Insert $NUM1 records successfully, failed $NUM2 record" >> ./db.log.`date +%Y%m%d`
        INDEX=`expr $INDEX + 1`
    done
done

数据格式如下:
2019-01-29 00:58:30 1 Batches:user allen insert 22498 records into database:product table detail,insert 20771 records successfully,failed 1727 records
2019-01-29 00:58:30 2 Batches:user mike insert 29378 records into database:product table detail,insert 21426 records successfully,failed 7952 records
2019-01-29 00:58:30 3 Batches:user jerry insert 22779 records into database:product table detail, insert 9397 records successfully, failed 13382 records
2019-01-29 00:58:30 4 Batches:user tracy insert 25232 records into database:product table detail, insert 21255 records successfully, failed 3977 records

1、统计每个人员分别插入了多少条record进数据库
#输出结果:
USER    Total_Records
allen   493082
mike    349287

vim exam_1.awk

BEGIN{
    printf "%-10s%-10s\n","User","Total Records"
}

{
    USER[$6]+=$8
}

END
{
    for(u in USER)
        printf "%-10s%-20d\n" u,USER[u]
}

#脚本文件执行命令
awk -f exam_1.awk db.log.20190129

2、统计每个人分别插入成功了多少record,失败了多少record;
#输出结果:
User    Success_Record  Failed_Records
jerry   3472738         283737
mike    2738237         28373

BEGIN {
    printf "%-10s%-20s%-20s\n","User","Success_Records","Failed_Records"
}

{
    SUCCESS[$6]+=$14
    FAIL[$6]+=$17
}

END {
    for(u in SUCCESS)
        printf "%-10s%-20d%-20d%-20d\n",u,SUCCESS[u],FAIL[u]
}

3、将例子1和例子2结合起来,一起输出,输出每个人分别插入多少数据,多少成功,多少失败,并且格式化输出,加上标题;
vim exam_3.awk

BEGIN {
    printf "%-10s%-20s%-20s%-20s\n","User","Total","Success","Failed"
}

{
    TOTAL[$6]+=$8
    SUCCESS[$6]+=$14
    FAIL[$6]+=$17
}

END {
    for(u in SUCCESS)
        printf "%-10s%-20d%-20d%-20d%-20d\n",u,TOTAL[u],SUCCESS[u],FAIL[u]
}

#脚本文件执行命令
awk -f exam_3.awk db.log.20190129

4、在例子3的基础上,加上结尾,统计全部插入记录数,成功记录数,失败记录数;
vim exam_4.awk

BEGIN {
    printf "%-10s%-20s%-20s%-20s\n","User","Total","Success","Failed"
}

{
    TOTAL[$6]+=$8
    SUCCESS[$6]+=$14
    FAIL[$6]+=$17
}

END {
    for(u in SUCCESS)
    {
        total+=TOTAL[u]
        success+=SUCCESS[u]
        fail+=FAIL[u]
        printf "%-10s%-20d%-20d%-20d%-20d\n",u,TOTAL[u],SUCCESS[u],FAIL[u]
    }
    
    printf "%-10s%-20d%-20d%-20d%-20d\n","",total,success,fail
}

5、查找丢失数据的现象,也就是成功+失败的记录数,不等于一共插入的记录数。找出这些数字并显示行号和对应行的日志信息;
awk '{if($8!=$14+$17) print NR,$0}' db.log.20190129

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值