上一篇讲了方法,这里提供例子~~~
1 命令行使用awk
[root@A10050362_Ztmp]# last -n 5
root pts/0 10.43.110.12 Wed Feb 11 09:03 still logged in
root pts/0 10.43.110.158 Tue Feb 10 04:11 - 07:46 (1+03:34)
root pts/0 10.43.110.158 Tue Feb 10 02:34 - 04:11 (01:37)
root pts/1 10.43.110.12 Mon Feb 9 12:10 - 08:39 (1+20:29)
root pts/0 10.43.110.12 Mon Feb 9 06:54 - 13:49 (06:55)
wtmp begins WedJul 16 08:51:10 2014
如果只是显示最近登录的5个IP
[root@A10050362_Ztmp]# last -n 5| awk '{print $3}'
10.43.110.12
10.43.110.158
10.43.110.158
10.43.110.12
10.43.110.12
2 awk脚本
脚本内容如下,打印文件中包含rabbit字段的行
#!/bin/awk –f
#example3.awk
{if($0~/rabbit/)print}
修改example3.awk的权限,增加可执行权限,执行脚本
使用方法:
[root@A10050362_Ztmp]# ./example3.awk nova.conf
# Deprecatedgroup;name - DEFAULT;rabbit_durable_queues
#rabbit_host=localhost
rabbit_host=10.43.175.83
#rabbit_port=5672
rabbit_port=5672
#rabbit_hosts=$rabbit_host:$rabbit_port
rabbit_hosts=10.43.175.83:5672
#rabbit_use_ssl=false
rabbit_use_ssl=False
#rabbit_userid=guest
rabbit_userid=guest
#rabbit_password=guest
rabbit_password=guest
#rabbit_login_method=AMQPLAIN
#rabbit_virtual_host=/
rabbit_virtual_host=/
#rabbit_retry_interval=1
#rabbit_retry_backoff=2
#rabbit_max_retries=0
#rabbit_ha_queues=false
rabbit_ha_queues=False
#fake_rabbit=false
# The messagingdriver to use, defaults to rabbit. Other
#rpc_backend=rabbit
[root@A10050362_Ztmp]#
3 文本方式使用awk
将下面awk语句写入文档test2.txt
[root@A10050362_Ztmp]# cat test2.txt
{if($0~/rabbit/)print}
使用方法,如下两种方式
[root@A10050362_Zawktest]# awk -f example2.txt nova.conf
# Deprecatedgroup;name - DEFAULT;rabbit_durable_queues
#rabbit_host=localhost
rabbit_host=10.43.175.83
#rabbit_port=5672
rabbit_port=5672
#rabbit_hosts=$rabbit_host:$rabbit_port
rabbit_hosts=10.43.175.83:5672
#rabbit_use_ssl=false
rabbit_use_ssl=False
#rabbit_userid=guest
rabbit_userid=guest
#rabbit_password=guest
rabbit_password=guest
#rabbit_login_method=AMQPLAIN
#rabbit_virtual_host=/
rabbit_virtual_host=/
#rabbit_retry_interval=1
#rabbit_retry_backoff=2
#rabbit_max_retries=0
#rabbit_ha_queues=false
rabbit_ha_queues=False
#fake_rabbit=false
# The messagingdriver to use, defaults to rabbit. Other
#rpc_backend=rabbit
[root@A10050362_Zawktest]#
[root@A10050362_Zawktest]# cat nova.conf |awk -f example2.txt
……
4 域
[root@A10050362_Zawktest]# cat test2.txt
1 2 3 4 5 6 7 8
one two threefour five six seven eight
11 12 13 14 1516 17
[root@A10050362_Zawktest]# awk '{print $1,$4}' test2.txt
1 4
one four
11 14
[root@A10050362_Zawktest]# awk '{print $1"\t"$4}' test2.txt
1 4
one four
[root@A10050362_Zawktest]# awk '{print $1":"$4}' test2.txt
1:4
one:four
11:14
[root@A10050362_Zawktest]# awk '{print NR,NF,$1,$NF}' e.txt
1 2 1 2
2 3 3 5
3 0
4 2 6 7
5 3 8 10
6 2 11 12
7 0
8 1 hello hello
9 0
[root@A10050362_Zawktest]#
[root@A10050362_Zawktest]# cat address.awk
#!/bin/awk -f
#adress.awk
BEGIN{FS="\n";RS=""}
{
print $1 "," $2 ", " $3
}
[root@A10050362_Zawktest]# ./address.awkaddress.txt
Jimmy theWeasel, 100 Pleasant Drive, San Francisco, CA 12345
Big Tony, 200Incognito Ave., Suburbia, WA 67890
[root@A10050362_Zawktest]#
也可以如下实现:
[root@A10050362_Zawktest]# ./address2.awk address.txt
Jimmy theWeasel,100 Pleasant Drive,San Francisco, CA 12345
Big Tony,200Incognito Ave.,Suburbia, WA 67890
[root@A10050362_Zawktest]# cat address2.awk
#!/bin/awk -f
#adress2.awk
BEGIN {
FS="\n"
RS=""
OFS=","}
{
print $1 ,$2, $3
}
[root@A10050362_Zawktest]#
显示文件的当前记录号、域数和每一行的第一个和最后一个域。
5 文件内容提取
提取文件nova.conf中不包含#的非空行:
[root@A10050362_Ztmp]# awk '{if ( $0!~"#" && /./)print $0}' nova.conf>test.txt
[root@A10050362_Ztmp]# cat test.txt
[DEFAULT]
amqp_durable_queues=False
rabbit_host=10.43.175.83
rabbit_port=5672
rabbit_hosts=10.43.175.83:5672
rabbit_use_ssl=False
rabbit_userid=guest
rabbit_password=guest
rabbit_virtual_host=/
rabbit_ha_queues=False
notification_driver=
……
其中:
" ~ " 用来在记录或者域内匹配正则表达式
" && " 运算符用来对两个条件进行与操作
[root@A10050362_Ztmp]# awk '/rabbit/,/host/' nova.conf
# Deprecatedgroup;name - DEFAULT;rabbit_durable_queues
#amqp_durable_queues=false
amqp_durable_queues=False
上面匹配包含rabbit 到 host之间的行
[root@A10050362_Ztmp]# awk '/rabbit/&&/host/' nova.conf
#rabbit_host=localhost
rabbit_host=10.43.175.83
#rabbit_hosts=$rabbit_host:$rabbit_port
rabbit_hosts=10.43.175.83:5672
#rabbit_virtual_host=/
rabbit_virtual_host=/
[root@A10050362_Ztmp]#
上面匹配包含rabbit 和 host的行
6 分隔符
[root@A10050362_Ztmp]# ll
total 244
-rwxr-xr-x 1root root 27 Feb 12 02:18 awktest
-rwxr-xr-x 1root root 397 Feb 12 03:25 example1.awk
-rwxr-xr-x 1root root 38 Feb 12 03:26example2.awk
-rwxrw-rw- 1root root 108459 Feb 11 17:51 nova.conf
-rw-r--r-- 1root root 2549 Feb 11 09:59nova_new2.conf
-rw-r--r-- 1root root 107360 Feb 11 09:59 nova_new.conf
drwxr-xr-x 2root root 4096 Feb 3 12:44 plugins
-rw-r--r-- 1root root 25 Feb 12 03:23 test2.txt
-rw-r--r-- 1root root 2549 Feb 12 05:58 test.txt
[root@A10050362_Ztmp]# ls -l | awk -F ':' '{print $1}'
total 244
-rwxr-xr-x 1root root 27 Feb 12 02
-rwxr-xr-x 1 rootroot 397 Feb 12 03
-rwxr-xr-x 1root root 38 Feb 12 03
-rwxrw-rw- 1root root 108459 Feb 11 17
-rw-r--r-- 1root root 2549 Feb 11 09
-rw-r--r-- 1root root 107360 Feb 11 09
drwxr-xr-x 2root root 4096 Feb 3 12
-rw-r--r-- 1root root 25 Feb 12 03
-rw-r--r-- 1root root 2549 Feb 12 05
以上ls -l | awk -F ':' '{print $1}' 查看以冒号作为分隔符的域1
[root@A10050362_Ztmp]# ls -l | awk -F ':' '{print $1,$2}'
total 244
-rwxr-xr-x 1root root 27 Feb 12 02 18 awktest
-rwxr-xr-x 1root root 397 Feb 12 03 25example1.awk
-rwxr-xr-x 1root root 38 Feb 12 03 26example2.awk
-rwxrw-rw- 1root root 108459 Feb 11 17 51 nova.conf
-rw-r--r-- 1root root 2549 Feb 11 09 59nova_new2.conf
-rw-r--r-- 1root root 107360 Feb 11 09 59 nova_new.conf
drwxr-xr-x 2root root 4096 Feb 3 12 44 plugins
-rw-r--r-- 1root root 25 Feb 12 03 23 test2.txt
-rw-r--r-- 1root root 2549 Feb 12 05 58 test.txt
ls -l | awk -F ':' '{print $1,$2}' 查看以冒号作为分隔符的域1和域2
[root@A10050362_Ztmp]# ls -l | awk '{print $1"..."$3}'
total...
-rwxr-xr-x...root
-rwxr-xr-x...root
-rwxr-xr-x...root
-rwxrw-rw-...root
-rw-r--r--...root
-rw-r--r--...root
drwxr-xr-x...root
-rw-r--r--...root
-rw-r--r--...root
查看第一列和第三列,以“…“作为分隔符
[root@A10050362_Ztmp]# ls -l | awk '{print $1"..."$3"***"$7}'
total...***
-rwxr-xr-x...root***12
-rwxr-xr-x...root***12
-rwxr-xr-x...root***12
-rwxrw-rw-...root***11
-rw-r--r--...root***11
-rw-r--r--...root***11
drwxr-xr-x...root***3
-rw-r--r--...root***12
-rw-r--r--...root***12
[root@A10050362_Ztmp]#
查看第一列和第三列,以“…“作为分隔符,第三列和第七列以“***“作为分隔符
7 BEGIN和END
[root@A10050362_Zawktest]# awk 'BEGIN{print NR FNR} {if(NR==FNR){print "处理文件a.txt"} if(NR >FNR){print "处理文件b.txt"}} END{prt NRFNR}' a.txt b.txt
00
处理文件a.txt
处理文件b.txt
21
[root@A10050362_Zawktest]#
awk的一般语法格式为:
awk [-参数 变量] 'BEGIN{初始化}条件类型1{动作1}条件类型2{动作2}……END{后处理}'
其中:BEGIN和END中的语句分别在开始读取文件(in_file)之前和读取完文件之后发挥作用,可以理解为初始化和扫尾。
awk脚本方式中BEGIN和END的使用方法:
[root@A10050362_Zawktest]# ./example1.awk a.txt
test begin
enter a
filename : a.txt
test finish
[root@A10050362_Zawktest]# cat example1.awk
#!/bin/awk -f
#example1.awk
BEGIN{print"test begin"}
{print $0,"\nfilename : ", FILENAME}
END{print"test finish"}
8 内置变量
[root@A10050362_Zawktest]# awk '{if(ARGIND==1){print "处理a文件"} if(ARGIND==2){print "处理b文件"}}' a.txt b.txt
处理a文件
处理b文件
[root@A10050362_Zawktest]#
ARGIND当前被处理文件的ARGV标志符
[root@A10050362_Zawktest]# awk 'NR==FNR{print "处理文件a.txt"} NR > FNR{print "处理文件b.txt"}' a.txt b.txt
处理文件a.txt
处理文件b.txt
[root@A10050362_Zawktest]#
NR 已经读出的总记录数(行数),包含所有文件
FNR 当前文件的记录数
[root@A10050362_Zawktest]# awk '{ print ARGIND NR FNR}' a.txt b.txt c.txt
111
221
232
243
351
[root@A10050362_Zawktest]#
[root@A10050362_Zawktest]# awk 'NR==10,NR==15{print}' nova.conf
amqp_durable_queues=False
# Auto-deletequeues in amqp. (boolean value)
#amqp_auto_delete=false
# Size of RPCconnection pool. (integer value)
[root@A10050362_Zawktest]#
打印10-15行
[root@A10050362_Zawktest]# awk -F ':' '{print $2}' b.txt
b.txt
2
3
[root@A10050362_Zawktest]# awk 'BEGIN{FS=":"}{print $2}' b.txt
b.txt
2
3
[root@A10050362_Zawktest]#
上面两个命令功能相同,其中FS是内置变量设置分隔符
[root@A10050362_Zawktest]# awk -F ':' '{print NF}' b.txt
2
2
2
[root@A10050362_Zawktest]# awk '{print NF}' b.txt
3
3
3
[root@A10050362_Zawktest]# cat b.txt
enter file:b.txt
test line: 2
test line: 3
[root@A10050362_Zawktest]#
NF为当前记录中的字段个数
[root@A10050362_Zawktest]# awk -F ':' 'BEGIN{OFS="="}{print $1,$2}' b.txt
enter file=b.txt
test line= 2
test line= 3
[root@A10050362_Zawktest]#
OFS为输出字段分隔符(缺省为:space:),也可以用如下命令实现:
[root@A10050362_Zawktest]# awk -F ':' '{print $1"="$2}' b.txt
enter file=b.txt
test line= 2
test line= 3
[root@A10050362_Zawktest]#
[root@A10050362_Z awktest]# awk 'BEGIN{ RS =";" } {print}' d.txt
hello world
today is2015.02.25
it's the firstworkday
happy new year!!
[root@A10050362_Zawktest]#
RS:输入记录分隔符,缺省为"\n"
缺省情况下,awk把一行看作一个记录;如果设置了RS,那么awk按照RS来分割记录
[root@A10050362_Zawktest]# awk -F ':' '{print "filename:" FILENAME",linenumber:" NR ",columns:" NF",linecontent:"$0}' e.txt
filename:e.txt,linenumber:1,columns:1,linecontent:12
filename:e.txt,linenumber:2,columns:1,linecontent:34 5
filename:e.txt,linenumber:3,columns:0,linecontent:
filename:e.txt,linenumber:4,columns:1,linecontent:67
filename:e.txt,linenumber:5,columns:1,linecontent:89 10
filename:e.txt,linenumber:6,columns:1,linecontent:1112
filename:e.txt,linenumber:7,columns:0,linecontent:
filename:e.txt,linenumber:8,columns:1,linecontent:hello
filename:e.txt,linenumber:9,columns:0,linecontent:
[root@A10050362_Zawktest]#
9 自定义变量
打印文件的行数
[root@A10050362_Z awktest]# awk 'BEGIN{count=0;print "[start]user count is ", count} {count=count+1}END{print "[end]user count is ", count}' e.txt
10 从shell中输入变量
[root@A10050362_Zawktest]# a=2
[root@A10050362_Zawktest]# cat d.txt
helloworld;today is 2015.02.25;it's the first workday;happy new year!!
[root@A10050362_Zawktest]# awk -F ";" -v var=$a '{print $var}' d.txt
today is2015.02.25
[root@A10050362_Zawktest]#
11 awk中调用shell命令
使用管道的方式
[root@A10050362_Zawktest]# cat d.txt | awk -F ";" -v var=$a '{print $var}'
today is2015.02.25
[root@A10050362_Zawktest]#
[root@A10050362_Zawktest]# awk -F ";" -v var=$a '{print $var}' d.txt
today is2015.02.25
[root@A10050362_Zawktest]#
上面两个命令功能完全相同
awk中使用管道有两种语法,分别是:
awk output| shell input
shelloutput | awk input
在shell output| awk input中awk的input只能是getline函数。shell执行的结果缓存于pipe中,再传送给awk处理,如果有多行数据,awk的getline命令可能调用多次。
awk 'BEGIN {
for(j=1;j<=12;j++)
{ flag=0;
printf "\n%d月份\n",j;
for(i=1;i<=31;i++)
{
if(j==2&&i>28) flag=1;
if((j==4||j==6||j==9||j==11)&&i>30) flag=1;
if (flag==0){printf "%02d%02d ",j,i}
}
}
}'
12 字符串化变量
awk 的优点之一就是“简单和字符串化”。 awk 变量“字符串化”是因为所有 awk 变量在内部都是按字符串形式存储的。同时,awk 变量是“简单的”,因为可以对它执行数学操作,且只要变量包含有效数字字符串,awk 会自动处理字符串到数字的转换步骤。
[root@A10050362_Zawktest]# awk '{ print ($1^2)+1 }' test3.txt
2
5
10
17
26
37
50
[root@A10050362_Zawktest]# cat test3.txt
1
2
3
4
5
6
7
[root@A10050362_Zawktest]#
[root@A10050362_Zawktest]# awk '{ print $1+1 }' test4.txt
2.01
3.02
4
5
6
7
8
[root@A10050362_Zawktest]# cat test4.txt
1.01
2.02
3
4
5
6
7
[root@A10050362_Zawktest]#