参考: http://www.gnu.org/software/gawk/
1. Numeric
int(x): return the nearest integer
rand(): return a random number
srand([x]): set the starting seed for random generator; if x is omitted, the current date and time are used for a seed.
$ awk 'BEGIN{print int(1.8), int(-1.8), int(010), int(0x0A), int(3a), int(b2)}'
1 -1 8 10 3 0
# rand()
function roll(n) { return 1 + int(rand() * n) }
{ printf("%d points", roll(6) + roll(6) + roll(6)); }
# fixed seed, unchanged random number
$ awk 'BEGIN{srand(1); print rand()}'
0.237788
$ awk 'BEGIN{srand(1); print rand()}'
0.237788
# the current date and time as a seed
$ awk 'BEGIN{srand(); print rand()}'
0.367979
$ awk 'BEGIN{srand(); print rand()}'
0.291809
2. String
2.1 sort
2.1.1 sorted by values.
len = asort(a): a is changed, the indices are replaced by sequential integers starting with one
len = asort(a, b): a isn’t changed. b is new array that comprises sequential integer indices and values of a.
2.1.1 sorted by indeces.
len = asorti(a): s is changed. the indices change into values, the sequential integers act as the indices
len = asorti(a, b): s isn’t changed. b is new array that comprises sequential integer indices and values came from the indices of a.
$ cat afile
last de
first sac
middle cul
$ awk '{a[$1]=$2} END{for(i in a) print i, a[i]}' afile
first sac
middle cul
last de
$ awk '{a[$1]=$2} END{len=asort(a); for(i=1;i<=len;i++) print i, a[i]}' afile
1 cul
2 de
3 sac
$ awk '{a[$1]=$2} END{len=asort(a,b); for(i=1;i<=len;i++) print i, b[i]}' afile
1 cul
2 de
3 sac
$ awk '{a[$1]=$2} END{len=asorti(a); for(i=1;i<=len;i++) print i, a[i]}' afile
1 first
2 last
3 middle
$ awk '{a[$1]=$2} END{len=asorti(a,b); for(i=1;i<=len;i++) print i, b[i], a[b[i]]}' afile
1 first sac
2 last de
3 middle cul
2.2 substitution
result = gensub(regexp, replacement, how, [target]): “how” is g/G/1-n
$ gawk 'BEGIN{a="abc def"; b=gensub(/(.+) (.+)/, "\\2 \\1", "g", a); print b}'
def abc
$ echo "a b c a b c" | gawk '{s=gensub(/a/, "AA", 2); print s}'
a b c AA b c
sub(regexp, replacement [, target])
gsub(regexp, replacement [, target]): global sub()
$ awk 'BEGIN{s="water, water, everywhere"; sub(/at/,"ith",s); print s}'
wither, water, everywhere
# the precise substring (&)
$ awk 'BEGIN{s="daabaaaa"; sub(/a+/, "C&C", s); print s}'
dCaaCbaaaa
$ cat bfile
American
Britain
China
Germany
France
$ awk '{gsub(/Britain/, "United Kingdom"); print}' bfile
American
United Kingdom
China
Germany
France
2.3 substring
substr(string, start [, length ])
split(string, array [, fieldsep [, seps ] ])
$ awk 'BEGIN{a="abcdefg";b=substr(a,1,2)"CDE"substr(a,6); print b}'
abCDEfg
$ awk 'BEGIN{split("cul-de-sac",a,"-",seps); for(i in a) print "a["i"]="a[i]; for(j in seps) print "seps["j"]="seps[j]}'
a[1]=cul
a[2]=de
a[3]=sac
seps[1]=-
seps[2]=-
2.4 others
pos = index(string, find)
len = length([string])
match(string, regexp [, array])
str = sprintf(format, expr-list)
num = strtonum(str)
tolower(str)
tolower(str)
$ awk 'BEGIN{print index("peanut", "an")}'
3
$ awk 'BEGIN{print length("peanut")}'
6
# the older versions of awk, the length() function could be called without any parentheses, but forget it.
$ awk 'BEGIN{len=length "peanut"; print len}'
0peanut
$ echo "foooobazbarrrrrr" | awk '{match($0, /(fo+).+(bar*)/, arr); print arr[1], arr[2]}'
foooo barrrrrr
$ awk 'BEGIN{pi=sprintf("%.2f (approx.)", 22/7); print pi}'
3.14 (approx.)
$ awk 'BEGIN{s="0x11"; printf("%d\n", strtonum(s))}'
17
$ awk 'BEGIN{print toupper("cest la vie")}'
CEST LA VIE
$ awk 'BEGIN{print tolower("Welcome You")}'
welcome you
3. IO
close(filename [, how]): “how” is “from” or “to”
fflush(filename)
system(command)
4. Time
mktime(“YYYY MM DD HH MM SS [DST]”)
strftime([format [, timestamp [, utc-flag] ] ])
systime(): the current time stamp since the system epoch
# default time format
$ awk 'BEGIN{print PROCINFO["strftime"]}'
%a %b %e %H:%M:%S %Z %Y
$ awk 'BEGIN{print strftime()}'
Mon Jul 6 09:29:13 EDT 2015
5. Bit-Manipulation
and(v1, v2 [, …])
or(v1, v2 [, …])
xor(v1, v2 [, …])
compl(val): the bitwise complement
lshift(val, count): shifted left by count bits
rshift(val, count): shifted right by count bits
$ cat bits2str.awk
function bits2str(bits, data, mask)
{
if (bits == 0)
return "0"
mask = 1;
for (; bits != 0; bits = rshift(bits, 1))
data = (and(bits, mask) ? "1" : "0") data
while ((length(data) % 8) != 0)
data = "0" data
return data
}
BEGIN {
printf "123 = %s\n", bits2str(123)
printf "0123 = %s\n", bits2str(0123)
printf "0x99 = %s\n", bits2str(0x99)
comp = compl(0x99)
printf "compl(0x99) = %#x = %s\n", comp, bits2str(comp)
shift = lshift(0x99, 2)
printf "lshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift)
shift = rshift(0x99, 2)
printf "rshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift)
}
$ awk -f bits2str.awk
123 = 01111011
0123 = 01010011
0x99 = 10011001
compl(0x99) = 0x1fffffffffff66 = 00011111111111111111111111111111111111111111111101100110
lshift(0x99, 2) = 0x264 = 0000001001100100
rshift(0x99, 2) = 0x26 = 00100110
6. Data Type(gawk 4.0+)
isarray(x): distinguish an array from a scalar variable
$ awk 'BEGIN{a[1]="one"; print isarray(a)}'
1
$ awk 'BEGIN{a="one"; print isarray(a)}'
0