1. 一个奇怪的rand()
[root@hadoop ~]# awk '{fr=int(100*rand());print fr}' tour.txt
23
29
84
15
58
19
81
17
如果加入srand();
[root@hadoop ~]# awk '{srand();fr=int(100*rand());print fr}' tour.txt
59
59
59
59
59
59
59
59
2.gsub 和sub
[root@hadoop ~]# awk '{go="this is 2019!";gsub(/[0-9]/,"*");sub("this","that",go);print $0,":",go}' tour.txt
air:**;hotel:**;nation:CHINA : that is 2019!
air:**;hotel:**;nation:USA : that is 2019!
air:**;hotel:**;nation:USA : that is 2019!
air:**;hotel:**;nation:CHINA : that is 2019!
air:**;hotel:**;nation:USA : that is 2019!
air:**;hotel:**;nation:USA : that is 2019!
air:**;hotel:**;nation:CHINA : that is 2019!
air:**;hotel:**;nation:CHINA : that is 2019!
3. index
[root@hadoop ~]# awk '{po=index($0,"air");print po,$0}' tour.txt
1 air:23;hotel:34;nation:CHINA
1 air:35;hotel:46;nation:USA
1 air:36;hotel:47;nation:USA
1 air:26;hotel:37;nation:CHINA
1 air:33;hotel:44;nation:USA
1 air:34;hotel:45;nation:USA
1 air:25;hotel:36;nation:CHINA
1 air:24;hotel:35;nation:CHINA
index的开始编号为1,找不到返回0
4. match 匹配
[root@hadoop ~]# awk '{po=match($0,/[0-9]/);print po,$0}' tour.txt
5 air:23;hotel:34;nation:CHINA
5 air:35;hotel:46;nation:USA
5 air:36;hotel:47;nation:USA
5 air:26;hotel:37;nation:CHINA
5 air:33;hotel:44;nation:USA
5 air:34;hotel:45;nation:USA
5 air:25;hotel:36;nation:CHINA
5 air:24;hotel:35;nation:CHINA
match同样以1为开始编号,只返回第一个匹配值
5. blength (字节)和length(字符)
[root@hadoop ~]# awk '{l=length;bl=blength;print l,";",bl,";",$0}' tour.txt
28 ; ; air:23;hotel:34;nation:CHINA
26 ; ; air:35;hotel:46;nation:USA
26 ; ; air:36;hotel:47;nation:USA
28 ; ; air:26;hotel:37;nation:CHINA
26 ; ; air:33;hotel:44;nation:USA
26 ; ; air:34;hotel:45;nation:USA
28 ; ; air:25;hotel:36;nation:CHINA
28 ; ; air:24;hotel:35;nation:CHINA
6. sbstr
[root@hadoop ~]# awk '{su=substr($0,3,5);print su}' tour.txt
r:23;
r:35;
r:36;
r:26;
r:33;
r:34;
r:25;
r:24;
7.split
[root@hadoop ~]# awk 'BEGIN{go="my name is wei hong rao";split(go,names," ");for( i in names) {print names[i]}}'
wei
hong
rao
my
name
is
由上可见一个思之玩味的现象:
for in 出来的东西是无序的,可以用另一种循环令他有序
[root@hadoop ~]# awk 'BEGIN{go="my name is wei hong rao";split(go,names," ");for(i=1; i<=length( names);i++) {print names[i]}}'
my
name
is
wei
hong
rao
8. getline
[root@hadoop ~]# awk 'BEGIN{while("cat tour.txt"|getline){print $0}}'
air:23;hotel:34;nation:CHINA
air:35;hotel:46;nation:USA
air:36;hotel:47;nation:USA
air:26;hotel:37;nation:CHINA
air:33;hotel:44;nation:USA
air:34;hotel:45;nation:USA
air:25;hotel:36;nation:CHINA
air:24;hotel:35;nation:CHINA
[root@hadoop ~]# awk 'BEGIN{while(getline< "tour.txt"){print $0}}'
air:23;hotel:34;nation:CHINA
air:35;hotel:46;nation:USA
air:36;hotel:47;nation:USA
air:26;hotel:37;nation:CHINA
air:33;hotel:44;nation:USA
air:34;hotel:45;nation:USA
air:25;hotel:36;nation:CHINA
air:24;hotel:35;nation:CHINA
[root@hadoop ~]# awk 'BEGIN{while("ls"|getline){print $0}}'
anaconda-ks.cfg
a.txt
derby.log
Desktop
Documents
Downloads
install.log
install.log.syslog
[root@hadoop usr]# head -n 3 a.txt
AIR 2013-09-01 4403 1348662.0
AIR 2013-09-02 26449 6701268.0
AIR 2013-09-03 45262 1.3755425E7
[root@hadoop usr]# head -n 3 b.txt
AIR 23
AIR 29
AIR 84
想要根据b.txt 来匹配a.txt里的行,但是如果下面没有close的话在循环中只会取到b.txt中的第一行
awk '{while(getline line < "b.txt"){print $0" "line};close("b.txt")}' a.txt
9.使用system
[root@hadoop ~]# awk 'BEGIN{files=system("ls");print files}'
anaconda-ks.cfg install.log ok test tour.txt
a.txt install.log.syslog ok2 test1 Videos
derby.log mapper.sh Pictures test2 workspace
Desktop metastore_db Public tour2.txt
Documents Music reducer.sh tour3.txt
Downloads newtour.txt Templates tour4.txt
0
10.日期
[root@hadoop ~]# awk 'BEGIN{D=systime();print d}'
[root@hadoop ~]# awk 'BEGIN{d=systime();print d}'
1378355219
[root@hadoop ~]# awk 'BEGIN{d=systime();print strftime("%D",d)}'
09/05/13
[root@hadoop ~]# awk 'BEGIN{d=systime();print strftime("%D",mktime(2013 09 05 12 20 20))}'
12/31/69