AWK Programing Language | 史上最全 awk 一本通

在这里插入图片描述

博客原文

awk 工作模式

与 sed 相同, 都是逐行处理

语法格式

  1. awk ‘BEGIN{}pattern{commands}END{} filename’
  2. stdout | awk ‘BEGIN{}pattern{commands}END{}’
语法格式说明
BEGIN{}处理文本前执行
pattern匹配模式
{commands}处理命令, ;隔开
END{}处理文本后执行

BEGIN{}, pattern, END{} 多可省略

内置变量

内置变量含义
$0整行内容
1 − 1- 1n按分隔符的第 1-n 个字段
NF (Number Field)当前行的字段个数(多少列)
NR (Number Row)当前行行号, 从 1 开始计数
FNR (File Number Row)多文件处理时, 每个文件单独技术, 从 1 开始
FS (Field Separate)输入字段分隔符, 不指定为空格或 tab
RS (Row Separator)输入行分隔符, 默认回车
OFS (Output Field Separator)输出字段分隔符
ORS (Output Row Separator)输出行分隔符, 默认回车
FILENAME当前输入文件名
ARGC命令行参数个数
ARGV命令行参数数组

基本使用:


$ awk '{print $0}' passwd
$ awk '{print $1,$3}' list
$ awk '{print NF}' list
$ awk '{print NR}' list
$ awk '{print FNR}' list awk.txt

$ awk 'BEGIN{FS=":"}{print $1}' passwd
$ awk 'BEGIN{RS="--"}{print $0}' list
$ awk 'BEGIN{FS=":";OFS="|"}{print $1,$3}' passwd  # 每行输出字段
$ awk 'BEGIN{ORS="--"}{print $0}' passwd

$ awk '{print FILENAME}' passwd
$ awk '{print ARGC}' passwd list  # 3 个参数 awk, passwd, list

格式化输出 printf

格式符含义
%s字符串
%d十进制
%f浮点数
%x十六进制
%o八进制
%e科学计数法
%c单个字符

修饰符:

修饰符含义
-左对齐
+右对齐
#打印 十六进制与八进制时使用, 在前打印进制标识

示例

# %s: 默认左对齐
# %10s: 默认右对齐
$ awk 'BEGIN{FS=":"}{printf "%s",$7}' passwd
$ awk 'BEGIN{FS=":"}{printf "%10s",$7}' passwd
$ awk 'BEGIN{FS=":"}{printf "%-10s",$7}' passwd

$ awk 'BEGIN{FS=":"}{printf "%d",$3}' passwd
$ awk 'BEGIN{FS=":"}{printf "%0.3f",$3}' passwd
$ awk 'BEGIN{FS=":"}{printf "%x",$3}' passwd
$ awk 'BEGIN{FS=":"}{printf "%#x",$3}' passwd  # 显示 16 进制标识
$ awk 'BEGIN{FS=":"}{printf "%0",$3}' passwd
$ awk 'BEGIN{FS=":"}{printf "%e",$3}' passwd

模式匹配

  1. 正则表达式 (固定写法//)
  2. 关系运算匹配

正则表达式:****

# 含有 root 的行
$ awk 'BEGIN{FS=':'}/root/{print $0}' passwd
# 以 nginx 开头
$ awk 'BEGIN{FS=':'}/^nginx/{print $0}' passwd

关系运算符:

关系运算符含义
< 数值小于
> 数值大于
<= 数值小于等于
>= 数值大于等于
==等于
!=不等于
~匹配正则
!~不匹配正则
# 第 3 个字段小于 50
$ awk 'BEGIN{FS=":"}$3<50{print $0}' passwd

# 第 7 个字段为 /bin/bash
$ awk 'BEGIN{FS=":"}$7=="/bin/bash"{print $0}' passwd
$ awk 'BEGIN{FS=":"}$7!="/bin/bash"{print $0}' passwd

# 第 3 个字段包含 3 个及以上数字
$ awk 'BEGIN{FS=":"}$3~/[0-9]{3,}/{print $0}' passwd

逻辑运算符:

逻辑运算符含义
||
&&
!
$ awk 'BEGIN{FS=":"}$1=="root"||$1=="nginx"{print $0}' passwd

$ awk 'BEGIN{FS=":"}$3<50 && $3 >30 {print $0}' passwd

awk 算数运算

运算符含义
+
-
*
/
%取余
^ 或 **乘方
x++ ; x–先返回 x, 后 +/- x
++x ; --x先 +/- x, 后 返回 x

练习计算课程平均值

# 右对齐
$ awk 'BEGIN{printf "%10s%10s%10s%10s%10s\n","Name", "YuWen","ShuXue", "English", "AVG"}{total=$1+$2+$3;AVG=total/3;printf "%10s%10d%10d%10d%10.2f\n",$1,$2,$3,$4,AVG}' list

$ awk 'BEGIN{printf "%-10s%-10s%-10s%-10s%-10s\n","Name", "YuWen","ShuXue", "English", "AVG"}{total=$1+$2+$3;AVG=total/3;printf "%-10s%-10d%-10d%-10d%-10.2f\n",$1,$2,$3,$4,AVG}' list

条件语句

if-else

示例: script.awk

BEGIN{
	FS=":"
}

{
	if($3<50)
	{
		printf "%-20s%-10s%10d\n","UID<50",$1,$3
	}
	else if($3>50 && $3 <100)
	{
		printf "%-20s%-10s%10d\n","50<UID<100",$1,$3
	}
	else
	{
		printf "%-20s%-10s%10d\n","UID>100",$1,$3
	}
}
$ awk -f script.awk /etc/passwd

循环语句

  1. do-while
  2. while
  3. for

计算 1+2+…100

  1. do-while
BEGIN{

    do{
        sum += i
        i++
    }while(i<=100)

    print sum
}
  1. while
BEGIN{
    while(i<=100)
    {
        sum += i
        i++
    }
    print sum
}
  1. for
BEGIN{
    for(i=0;i<=100;i++)
    {
        sum += i
    }

    print sum
}

练习: 打印平均分大于 70的, 并计算平均分

BEGIN{
    printf "%-10s%-10s%-10s%-10s%-10s\n","Name","YuWen","Math","English","AVG"
}

{
    total = $2 + $3 + $3
    avg = total / 3
    if (avg > 70)
    {
        printf "%-10s%-10d%-10d%-10d%-0.2f\n",$1,$2,$3,$4,avg
        score_yuwen += $2
        score_math += $3
        score_english += $4
        score_avg += avg
        count++
    }

}

END{
    printf "%-10s%-10.2f%-10.2f%-10.2f%-0.2f\n","",score_yuwen/count,score_math/count,score_english/count,score_avg/count
}

字符串函数

函数名解释返回值
length(str)计算字符串长度长度值
index(str1,str2)在 str1 中查找 str2返回位置索引, 从 1 计数
tolower(str)转小写转小写后的字符串
toupper(str)转大写转大写后的字符串
substr(str,m,n)从 str m 字符, 截取 n 位(n 可省略)截取后的子串
split(str,arr,fs)按 fs 切割字符串, 结果保存到 arr切割后的子串个数
match(str,RE)与 index() 类似, 但支持正则(RE)返回索引位置
sub(RE,RepStr,str)在 str 中搜索符合 RE 的子串将其替换为 RepStr; 只替换第一个替换个数
gsub(RE,RepStr,str)在 str 中搜索符合 RE 的子串将其替换为 RepStr; 替换全部替换个数

1. 打印 passwd 每个字段长度:

BEGIN{
    FS=":"
}

{
    i=1
    while(i<=NF)
    {
        if(i==NF)
            printf "%d",length($i)
        else
            printf "%d:",length($i)
        i++
    }
    print ""
}

2. 查询"I have a dream"中"ea"索引

$ awk 'BEGIN{str="I have a dream";localtion=index(str,"ea");print localtion}'
# 12
$ awk 'BEGIN{str="I have a dream";localtion=match(str,"ea");print localtion}'
# 12

3. 大小写转换

$ awk 'BEGIN{str="I have a dream";print tolower(str)}'
# i have a dream
$ awk 'BEGIN{str="I have a dream";print toupper(str)}'
# I HAVE A DREAM

4. 切分数组

$ awk 'BEGIN{str="I have a dream";split(str,arr," ");print arr[2]}'
$ awk 'BEGIN{str="I have a dream";split(str,arr);print arr[2]}'  # 默认空格分隔
# have

# 遍历, 不是顺序遍历
$ awk 'BEGIN{str="I have a dream";split(str,arr);for(a in arr) print arr[a]}'
# dream
# I
# have
# a

5. 搜索第一个出现的数字

# 正则必须用 //
$ awk 'BEGIN{str="I have a 123 dream"; print match(str, /[0-9]/)}'
# 10

6. 截取子串

$ awk 'BEGIN{str="I have a 123 dream"; print substr(str,3,7)}'
# have a 

$ awk 'BEGIN{str="I have a 123 dream"; print substr(str,3)}'
# have a 123 dream

7. 替换数字

$ awk 'BEGIN{str="I have a 123 dream 324 hello"; print sub(/[0-9]+/,"$",str); print str}'
# 1
# I have a $ dream 324 hello

$ awk 'BEGIN{str="I have a 123 dream 324 hello"; print gsub(/[0-9]+/,"$",str); print str}'
# 2
# I have a $ dream $ hello

awk 常用选项

选项说明
-v参数传递
-f指定脚本文件
-F指定分隔符
-V查看版本

如果变量有空格, 要使用""

$ num=13
$ var="hello world"
$ awk -v num1=$num -v var1=$var 'BEGIN{print num1,var1}'
# awk: fatal: cannot open file `BEGIN{print num1,var1}' for reading (No such file or directory)

$ awk -v num1="$num" -v var1="$var" 'BEGIN{print num1,var1}'
# 13 hello world


$ awk -F ":" '{print $0}' /etc/passwd

awk 与 shell 中数组

shell 中数组

下标从 0 开始

打印数组:

$ arr=("kubernetes" "etcd" "time" "redis")

# 打印数组
$ echo ${arr[@]}
$ echo ${arr[*]}
# kubernetes etcd time redis

# 打印元素
$ echo ${arr[2]}
# time

打印数组/元素长度; 分片访问; 元素操作; 删除元素:

$ arr=("kubernetes" "etcd" "time" "redis")

# 打印数组
$ echo ${#arr[@]}
$ echo ${#arr[*]}
# 4

# 打印元素
$ echo ${#arr[3]}
# 5

# 分片访问
$ echo ${arr[@]:1:3}
# etcd time redis

# 元素赋值
$ arr[2]=mysqlserver
$ echo ${arr[@]}
# kubernetes etcd mysqlserver redis

# 元素内容替换
$ echo ${arr[@]/e/E}
# kubErnetes Etcd mysqlsErver rEdis
$ echo ${arr[@]//e/E}
# kubErnEtEs Etcd mysqlsErvEr rEdis

# 元素删除  *** 通过下标删除后, 被删除的下标的元素为空, 原数组的其他元素下标不变
$ unset arr[0]
$ echo ${arr[@]}
# etcd mysqlserver redis
$ unset arr[0]
$ echo ${arr[@]}
# etcd mysqlserver redis
$ unset arr[1]
$ echo ${arr[@]}
# mysqlserver redis

# 删除数组
$ unset arr

通过下标删除后, 被删除的下标的元素为空, 原数组的其他元素下标不变

数组的遍历

$ arr=("kubernetes" "etcd" "time" "redis")
$ for a in ${arr[@]}; do echo $a; done
# kubernetes
# etcd
# time
# redis

awk 中数组

脚本练习

数据生成脚本:

#!/bin/bash
#

function create_random()
{
    min=$1
    max=$(($2-$min+1))
    num=$(date +%s%N)
    echo $(($num%$max+min))
}

INDEX=1

while true
do
    for user in allen mike jerry tracy han lilei
    do
        COUNT=$RANDOM
        NUM1=`create_random 1 $COUNT`
        NUM2=`expr $COUNT - $NUM1`
        echo "`date '+%Y-%m-%d %H:%M:%S'` $INDEX Batches: user $user insert $COUNT records into database:product table:detail, insert $NUM1 records successfully, failed $NUM2 records" >> ./db.log.`date +%Y%m%d`
        INDEX=`expr $INDEX + 1`
    done
done

数据格式

2023-12-12 02:49:31 1 Batches: user allen insert 25719 records into database:product table:detail, insert 24482 records successfully, failed 1237 records
2023-12-12 02:49:31 2 Batches: user mike insert 32653 records into database:product table:detail, insert 26055 records successfully, failed 6598 records
2023-12-12 02:49:31 3 Batches: user jerry insert 16986 records into database:product table:detail, insert 11636 records successfully, failed 5350 records
2023-12-12 02:49:31 4 Batches: user tracy insert 31899 records into database:product table:detail, insert 9250 records successfully, failed 22649 records
2023-12-12 02:49:31 5 Batches: user han insert 24256 records into database:product table:detail, insert 24033 records successfully, failed 223 records

统计所有成功, 失败, 总共记录数

count.awk:

BEGIN{
    printf "%-10s%-20s%-20s%-20s\n","User","Total","Sucess","Failed"
}

{
    TOTAL[$6]+=$8
    SUCESS[$6]+=$14
    FAILED[$6]+=$18
}

END{
    for(t in TOTAL)
    {
        total += TOTAL[t]
        sucess += SUCESS[t]
        failed += FAILED[t]
        printf "%-10s%-20s%-20s%-20s\n",t,TOTAL[t],SUCESS[t],FAILED[t]
    }
    printf "%-10s%-20s%-20s%-20s\n","",total,sucess,failed
}
$ awk -f count.awk db.log.20231212
User      Total               Sucess              Failed
tracy     6096344             2963340             3133004
allen     6293470             3182865             3110605
mike      5845083             2912982             2932101
jerry     5996178             3080723             2915455
lilei     6217104             3028971             3188133
han       5923975             3089899             2834076
          36372154            18258780            18113374

2. 打印丢失记录的行数

一条记录行中, 总记录数 != 成功记录数 + 失败记录数

$ awk '{if($8 != $14 + $18) print NR}' db.log.20231212
  • 44
    点赞
  • 21
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

M·K·T

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值