awk&sed精要

最新推荐文章于 2024-08-21 16:58:05 发布

pianzif

最新推荐文章于 2024-08-21 16:58:05 发布

阅读量3.3k

点赞数

分类专栏：文件处理文章标签： awk Linux 例子

本文链接：https://blog.csdn.net/fuwencaho/article/details/25540011

版权

文件处理专栏收录该内容

19 篇文章 0 订阅

订阅专栏

[root@centos-fuwenchao ~]# awk /root/ /etc/passwd
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin
[root@centos-fuwenchao ~]# awk -F: /root/print $1 /etc/passwd
awk: /root/print
awk:       ^ syntax error
[root@centos-fuwenchao ~]# awk -F: /root/{print $1} /etc/passwd
awk: cmd. line:1: /root/{print
awk: cmd. line:1:             ^ unexpected newline or end of string
[root@centos-fuwenchao ~]# awk -F: '/root/{print $1}' /etc/passwd
root
operator
[root@centos-fuwenchao ~]# awk -F: 'begin {print "wenchao"}/root/{print $1}' /etc/passwd
root
operator
[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1}' /etc/passwd
wenchao
root
operator
[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1}end{print "wenchaojieshufu"}' /etc/passwd
wenchao
root
operator
[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1}END{print "wenchaojieshufu"}' /etc/passwd
wenchao
root
operator
wenchaojieshufu
[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1$7}END{print "wenchaojieshufu"}' /etc/passwd
wenchao
root/bin/bash
operator/sbin/nologin
wenchaojieshufu
[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1\T$7}END{print "wenchaojieshufu"}' /etc/passwd
awk: BEGIN {print "wenchao"}/root/{print $1\T$7}END{print "wenchaojieshufu"}
awk:                                       ^ backslash not last character on line
[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print $1\t$7}END{print "wenchaojieshufu"}' /etc/passwd
awk: BEGIN {print "wenchao"}/root/{print $1\t$7}END{print "wenchaojieshufu"}
awk:                                       ^ backslash not last character on line
[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wenchao"}/root/{print "$1\t$7"}END{print "wenchaojieshufu"}' /etc/passwd
wenchao
$1      $7
$1      $7
wenchaojieshufu
[root@centos-fuwenchao ~]# awk -F: 'BEGIN {print "wen\tchao"}/root/{print "$1\t$7"}END{print "wenchao\tjieshufu"}' /etc/passwd
wen     chao
$1      $7
$1      $7
wenchao jieshufu
[root@centos-fuwenchao ~]#

参见我的另外博客：http://fuwenchao.blog.51cto.com/6008712/1341500

[root@centos-fuwenchao tmp]# awk -f blank num.txt 
this is blank line
this is blank line
this is blank line
this is blank line
this is blank line
[root@centos-fuwenchao tmp]# more blank 
/^$/{print "this is blank line"}
[root@centos-fuwenchao tmp]#

# test for integer, string or empty line.
/[0-9]+/ { print "That is an integer" }
/[A-Za-z]+/ { print "This is a string" }
/^$/ { print "This is a blank line." }

[root@centos-fuwenchao tmp]# awk -f blankint 
4
That is an integer
5
That is an integer
t
This is a string
g
This is a string

This is a blank line.
^C
[root@centos-fuwenchao tmp]# more blankint 
# test for integer, string or empty line.
/[0-9]+/ { print "That is an integer" }
/[A-Za-z]+/ { print "This is a string" }
/^$/ { print "This is a blank line." }
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# echo a b c d | awk 'BEGIN { one = 1; two = 2 } { print $(one + two) }'
c
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f nameaddr.awk  nameaddr

John
Robinson,Koren
Inc.,978
4th Ave.,Boston,MA

Phyllis
Chapman,GVE
Corp.,34
Sea Drive,Amesbury,MA01881,879-0900
[root@centos-fuwenchao tmp]# more nameaddr.awk 
{ print "" # output blank line
print $1 # name
print $2 # company
print $3 # street
print $4, $5 # city, state zip
}
[root@centos-fuwenchao tmp]# more nameaddr
John Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987
Phyllis Chapman,GVE Corp.,34 Sea Drive,Amesbury,MA01881,879-0900
[root@centos-fuwenchao tmp]# awk -F , -f nameaddr.awk  nameaddr

John Robinson
Koren Inc.
978 4th Ave.
Boston MA 01760

Phyllis Chapman
GVE Corp.
34 Sea Drive
Amesbury MA01881
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f nameaddr.awk nameaddr

John Robinson
Koren Inc.
978 4th Ave.
Boston MA 01760

Phyllis Chapman
GVE Corp.
34 Sea Drive
Amesbury MA01881
[root@centos-fuwenchao tmp]# more nameaddr.awk 
BEGIN {FS=","}
{ print "" # output blank line
print $1 # name
print $2 # company
print $3 # street
print $4, $5 # city, state zip
}
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk '/John/' nameaddr
John Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987
[root@centos-fuwenchao tmp]# awk '~/John/' nameaddr
awk: ~/John/
awk: ^ syntax error
[root@centos-fuwenchao tmp]# awk '$1 ~ /John/' nameaddr
John Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987
[root@centos-fuwenchao tmp]# awk '$1 !~ /John/' nameaddr
Phyllis Chapman,GVE Corp.,34 Sea Drive,Amesbury,MA01881,879-0900
[root@centos-fuwenchao tmp]# awk '$2 !~ /John/' nameaddr
John Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987
Phyllis Chapman,GVE Corp.,34 Sea Drive,Amesbury,MA01881,879-0900
[root@centos-fuwenchao tmp]# awk '$2 ~ /John/' nameaddr
[root@centos-fuwenchao tmp]# more name
name: No such file or directory
[root@centos-fuwenchao tmp]# more nameaddr
John Robinson,Koren Inc.,978 4th Ave.,Boston,MA 01760,696-0987
Phyllis Chapman,GVE Corp.,34 Sea Drive,Amesbury,MA01881,879-0900
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk '/^$/{x++} END{print x}' num.txt 
5
[root@centos-fuwenchao tmp]# more num.txt 
1


2
3
4
5



6
7
8
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f culcu.awk culfile 
john 87.4
andrea 86
jasper 85.6
[root@centos-fuwenchao tmp]# more culcu.awk 
{ total = $2 + $3 + $4 + $5 + $6
avg = total / 5
print $1, avg }
[root@centos-fuwenchao tmp]# more culfile 
john 85 92 78 94 88
andrea 89 90 75 90 86
jasper 84 88 80 92 84
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao home]# ll
total 16
drwx------.  6 oracle      oinstall 4096 Mar 12 01:50 oracle
drwx------.  6 testuser    testuser 4096 Nov  5  2013 testuser
drwx------.  8 userwenchao groupq   4096 Nov  5  2013 userwenchao
drwx------. 27         500      500 4096 Nov  5  2013 wenchao
[root@centos-fuwenchao home]# ls -l $* | awk '{print $5, "\t", $9}'
         
4096     oracle
4096     testuser
4096     userwenchao
4096     wenchao
[root@centos-fuwenchao home]#

[root@centos-fuwenchao home]# ll |awk -f /tmp/filesize.awk 
BYTES    FILE
         
oracle   4096
testuser         4096
userwenchao      4096
wenchao          4096
Total:  16384 bytes (5 files)
[root@centos-fuwenchao home]# more /tmp/filesize.awk 
BEGIN { print "BYTES", "\t", "FILE" }
{
sum += $5
++filenum
print $9, "\t", $5
}
END { print "Total: ", sum, "bytes (" filenum " files)" }
[root@centos-fuwenchao home]# ll
total 16
drwx------.  6 oracle      oinstall 4096 Mar 12 01:50 oracle
drwx------.  6 testuser    testuser 4096 Nov  5  2013 testuser
drwx------.  8 userwenchao groupq   4096 Nov  5  2013 userwenchao
drwx------. 27         500      500 4096 Nov  5  2013 wenchao

[root@centos-fuwenchao home]# ll |awk -f /tmp/filesize.awk 
BYTES    FILE
         
oracle   4096
testuser         4096
userwenchao      4096
wenchao          4096
Total:  16384 bytes (5 files)
[root@centos-fuwenchao home]# ll
total 16
drwx------.  6 oracle      oinstall 4096 Mar 12 01:50 oracle
drwx------.  6 testuser    testuser 4096 Nov  5  2013 testuser
drwx------.  8 userwenchao groupq   4096 Nov  5  2013 userwenchao
drwx------. 27         500      500 4096 Nov  5  2013 wenchao
[root@centos-fuwenchao home]# more /tmp/filesize.awk 
BEGIN { print "BYTES", "\t", "FILE" }
{
sum += $5
filenum++
print $9, "\t", $5
}
END { print "Total: ", sum, "bytes (" filenum " files)" }
[root@centos-fuwenchao home]#

[root@centos-fuwenchao oracle]# ll
total 56
-rw-r--r--.  1 oracle oinstall    44 Mar 12 00:32 afiedt.buf
-rw-r--r--.  1 oracle oinstall  2093 Mar 11 11:12 demobld.sql
-rw-r--r--.  1 oracle oinstall   783 Feb 26 11:16 file_spool.lst
-rw-r--r--.  1 oracle oinstall   479 Mar 11 11:32 login.sql
drwxr-xr-x. 19 root   root      4096 Mar 11 11:29 oracleDSTom
drwxr-x---.  3 oracle oinstall  4096 Feb 19 15:32 oradiag_oracle
-rw-r--r--.  1 oracle oinstall   524 Feb 26 13:37 save.sql
-rw-r-----.  1 oracle oinstall 16890 Feb 19 16:07 sum
-rw-r--r--.  1 oracle oinstall   491 Feb 26 12:39 tempwenchaodb.txt
-rw-r--r--.  1 oracle oinstall    10 Feb 26 10:33 wen.sql
[root@centos-fuwenchao oracle]# ls -l $* | awk '
> # filesum: list files and total size in bytes
> # input: long listing produced by "ls -l"
> #1 output column headers
> BEGIN { print "BYTES", "\t", "FILE" }
> #2 test for 9 fields; files begin with "-"
> NF == 9 && /^-/ {
> sum += $5 # accumulate size of file
> ++filenum # count number of files
> print $5, "\t", $9 # print size and filename
> }
> #3 test for 9 fields; directory begins with "d"
> NF == 9 && /^d/ {
> print "<dir>", "\t", $9 # print <dir> and name
> }
> #4 test for ls -lR line ./dir:
> $1 ~ /^\..*:$/ {
> print "\t" $0 # print that line preceded by tab
> }
> #5 once all is done,
> END {
> # print total file size and number of files
> print "Total: ", sum, "bytes (" filenum " files)"
> }'
BYTES    FILE
44       afiedt.buf
2093     demobld.sql
783      file_spool.lst
479      login.sql
<dir>    oracleDSTom
<dir>    oradiag_oracle
524      save.sql
16890    sum
491      tempwenchaodb.txt
10       wen.sql
Total:  21314 bytes (8 files)
[root@centos-fuwenchao oracle]#

ls -l $* | awk '
# filesum: list files and total size in bytes
# input: long listing produced by "ls -l"
#1 output column headers
BEGIN { print "BYTES", "\t", "FILE" }
#2 test for 9 fields; files begin with "-"
NF == 9 && /^-/ {
sum += $5 # accumulate size of file
++filenum # count number of files
print $5, "\t", $9 # print size and filename
}
#3 test for 9 fields; directory begins with "d"
NF == 9 && /^d/ {
print "<dir>", "\t", $9 # print <dir> and name
}
#4 test for ls -lR line ./dir:
$1 ~ /^\..*:$/ {
print "\t" $0 # print that line preceded by tab
}
#5 once all is done,
END {
# print total file size and number of files
print "Total: ", sum, "bytes (" filenum " files)"
}'

printf ( format-expression [, arguments] )

c ASCII character
d Decimal integer
i Decimal integer. (Added in POSIX)
e Floating-point format ([-]d.precisione[+-]dd)
E Floating-point format ([-]d.precisionE[+-]dd)
f Floating-point format ([-]ddd.precision)
g e or f conversion, whichever is shortest, with trailing zeros removed
G E or f conversion, whichever is shortest, with trailing zeros removed
o Unsigned octal value
s String
x Unsigned hexadecimal number. Uses a-f for 10 to 15
X Unsigned hexadecimal number. Uses A-F for 10 to 15
% Literal %

printf("%d\t%s\n", $5, $9)

%-width.precision format-specifier

[root@centos-fuwenchao tmp]# awk '{printf("|%10s|\n", "hello")}' num.txt
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
|     hello|
[root@centos-fuwenchao tmp]# awk '{printf("%10s\n", "hello")}' num.txt
     hello
     hello
     hello
     hello
     hello
     hello
     hello
     hello
     hello
     hello
     hello
     hello
     hello
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk '{printf("|%-10s|\n", "hello")}' num.txt
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
|hello     |
[root@centos-fuwenchao tmp]#

The precision modifier, used for decimal or floating-point values, controls the number of digits that
appear to the right of the decimal point. For string values, it controls the maximum number of characters
from the string that will be printed. Note that the default precision for the output of numeric values is
"%.6g".
You can specify both the width and precision dynamically, via values in the printf or sprintf
argument list. You do this by specifying asterisks, instead of literal values.

printf("%*.*g\n", 5, 3, myvar);
In this example, the width is 5, the precision is 3, and the value to print will come from myvar.
The default precision used by the print statement when outputting numbers can be changed by setting
the system variable OFMT. For instance, if you are using awk to write reports that contain dollar values,
you might prefer to change OFMT to "%.2f".
Using the full syntax of the format expression can solv

[root@centos-fuwenchao tmp]# awk  '{print high "\t" low}' high=100 low=60 num.txt 
100     60
100     60
100     60
100     60
100     60
100     60
100     60
100     60
100     60
100     60
100     60
100     60
100     60
[root@centos-fuwenchao tmp]#

$ awk -f scriptfile high=100 low=60 datafile
Inside the script, these two variables are available and can be accessed as any awk variable. If you were
to put this script in a shell script wrapper, then you could pass the shell's command-line arguments as
values. (The shell makes available command-line arguments in the positional variables - $1 for the first
parameter, $2 for the second, and so on.)For instance, look at the shell script version of the previous
command:
[13] Careful! Don't confuse the shell's parameters with awk's field variables.
awk -f scriptfile "high=$1" "low=$2" datafile
If this shell script were named awket, it could be invoked as:
$ awket 100 60
"100" would be $1 and passed as the value assigned to the variable high.
In addition, environment variables or the output of a command can be passed as the value of a variable.
Here are two examples:

$ awk '{ print NR, $0 }' OFS='. ' names
1. Tom 656-5789
2. Dale 653-2133
3. Mary 543-1122
4. Joe 543-2211

[root@centos-fuwenchao tmp]# awk 'BEGIN { print n }
{
if (n == 1) print "Reading the first file"
if (n == 2) print "Reading the second file"
}' n=1 num.txt n=2 culcu.awk 

Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the first file
Reading the second file
Reading the second file
Reading the second file
[root@centos-fuwenchao tmp]# more num.txt 
1


2
3
4
5



6
7
8
[root@centos-fuwenchao tmp]# more culcu.awk 
{ total = $2 + $3 + $4 + $5 + $6
avg = total / 5
print $1, avg }
[root@centos-fuwenchao tmp]#

解释：

There are four command-line parameters: "n=1," "test," "n=2," and "test2". Now, if you remember that a
BEGIN procedure is "what we do before processing input," you'll understand why the reference to n in
the BEGIN procedure returns nothing. So the print statement will print a blank line. If the first
parameter were a file and not a variable assignment, the file would not be opened until the BEGIN
procedure had been executed.

The variable n is given an initial value of 1 from the first parameter. The second parameter supplies the
name of the file. Thus, for each line in test, the conditional "n == 1" will be true. After the input is
exhausted from test, the third parameter is evaluated, and it sets n to 2. Finally, the fourth parameter
supplies the name of a second file. Now the conditional "n == 2" in the main procedure will be true.

One consequence of the way parameters are evaluated is that you cannot use the BEGIN procedure to
test or verify parameters that are supplied on the command line. They are available only after a line of
input has been read. You can get around this limitation by composing the rule "NR == 1" and using its
procedure to verify the assignment. Another way is to test the command-line parameters in the shell
script before invoking awk.

POSIX awk provides a solution to the problem of defining parameters before any input is read. The -v
option[14] specifies variable assignments that you want to take place before executing the BEGIN
procedure (i.e., before the first line of input is read.) The -v option must be specified before a commandline
script. For instance, the following command uses the -v option to set the record separator for
multiline records.
[14] The -v option was not part of the original (1987) version of nawk (still used on
SunOS 4.1.x systems and some System V Release 3.x systems). It was added in 1989 after
Brian Kernighan of Bell Labs, the GNU awk authors, and the authors of MKS awk agreed
on a way to set variables on the command line that would be available inside the BEGIN
block. It is now part of the POSIX specification for awk.
$ awk -F"\n" -v RS="" '{ print }' phones.block
A separate -v option is required for each variable assignment that is passed to the program.

--传参

[root@centos-fuwenchao tmp]# ./acron.sh BASIC
BASIC Beginner's All-Purpose Symbolic Instruction Code
[root@centos-fuwenchao tmp]# more acron.sh 
#! /bin/sh
# assign shell's $1 to awk search variable
awk '$1 == search' search=$1 acronyms
[root@centos-fuwenchao tmp]# more acronyms 
BASIC Beginner's All-Purpose Symbolic Instruction Code
CICS Customer Information Control System
COBOL Common Business Oriented Language
DBMS Data Base Management System
GIGO Garbage In, Garbage Out
GIRL Generalized Information Retrieval Language
[root@centos-fuwenchao tmp]#

Notice that we tested the parameter as a string ($1 == search). We could also have written this as a regular
expression match ($1 ~ search).

== 是要是要完全一样如果你输入 BASI是没有任何打印的

~ 包含，上面的可以打印！

Conditionals, Loops, and Arrays

condition

if ( expression )
action1
[else
action2]

Remember that "==" is a relational operator and "=" is an assignment operator. We can also test whether
x matches a pattern using the pattern-matching operator "~":
if ( x ~ /[yY](es)?/ ) print x

if (avg >= 90) grade = "A"
else if (avg >= 80) grade = "B"
else if (avg >= 70) grade = "C"
else if (avg >= 60) grade = "D"
else grade = "F"

expr ? action1 : action2

grade = (avg >= 65) ? "Pass" : "Fail"

loop

while (condition)
action

i = 1
while ( i <= 4 ) {
print $i
++i
}

do
action
while (condition)

BEGIN {
do {
++x
print x
} while ( x <= 4 )
}

[root@centos-fuwenchao tmp]# awk -f do.awk 
1
2
3
4
5
[root@centos-fuwenchao tmp]# more do.awk 
BEGIN {
do {
++x
print x
} while ( x <= 4 )
}
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f prinum.awk tesenum 
3 : 130
2 : 115
4 : 114
1 : 108
[root@centos-fuwenchao tmp]# more prinum.awk 
{
 total=i= 0
do {
++i
total += $i
} while ( total <= 100 )
print i, ":", total
}
[root@centos-fuwenchao tmp]# more tesenum 
45 25 60 20
10 105 50 40
33 5 9 67
108 3 5 4
[root@centos-fuwenchao tmp]#

for

for ( set_counter ; test_counter ; increment_counter )
action

set_counter
Sets the initial value for a counter variable.
test_counter
States a condition that is tested at the top of the loop.
increment_counter
Increments the counter each time at the bottom of the loop, right before testing the test_counter
again.

total = $2 + $3 + $4 + $5 + $6
avg = total / 5

total = 0
for (i = 2; i <= NF; ++i)
total += $i
avg = total / (NF - 1)

[root@centos-fuwenchao tmp]# ./cheng.sh 
Enter number: 8
The factorial of 8 is 40320
[root@centos-fuwenchao tmp]# ./cheng.sh 
Enter number: 99
The factorial of 99 is 9.33262e+155
[root@centos-fuwenchao tmp]# more cheng.sh 
#!/bin/bash
awk '# factorial: return factorial of user-supplied number
BEGIN {
printf("Enter number: ")
}
# check that user enters a number
$1 ~ /^[0-9]+$/ {
# assign value of $1 to number & fact
number = $1
if (number == 0)
fact = 1
else
fact = number
# loop to multiply fact*x until x = 1
for (x = number - 1; x > 1; x--)
fact *= x
printf("The factorial of %d is %g\n", number, fact)
# exit -- saves user from typing CRTL-D.
exit
}
# if not a number, prompt again.
{ printf("\nInvalid entry. Enter a number: ")
}'  
[root@centos-fuwenchao tmp]#

脚本的最后（}后面）可以显示的使用 - 表示从标准输入中读取数据，否则默认从标准输入！

array

flavor_count = 5
for (x = 1; x <= flavor_count; ++x)
print flavor[x]

END {
for ( x = 1; x <= NR; x++ )
class_avg_total += student_avg[x]
class_average = class_avg_total / NR
for ( x = 1; x <= NR; x++ )
if (student_avg[x] >= class_average)
++above_average
else
++below_average

print "Class Average: ", class_average
print "At or Above Average: ", above_average
print "Below Average: ", below_average
}

for ( item in acro )
print item, acro[item]

[root@centos-fuwenchao tmp]# awk 'BEGIN { data[1.23] = "3.21";  printf "<%s>\n", data[1.23] }'
<3.21>
[root@centos-fuwenchao tmp]#

[root@centos-fuwenchao tmp]# awk -f grade.awk grade
mona    79      C
john    88      B
andrea  90.5    A
jasper  85      B
dunce   64.5    D
ellis   93.5    A

Class Average:  83.4167
At or Above Average:    4
Below Average:  2
A:      2
B:      2
C:      1
D:      1
[root@centos-fuwenchao tmp]# more grade
mona 70 77 85 83 70 89
john 85 92 78 94 88 91
andrea 89 90 85 94 90 95
jasper 84 88 80 92 84 82
dunce 64 80 60 60 61 62
ellis 90 98 89 96 96 92
[root@centos-fuwenchao tmp]# more grade.awk 
# grades.awk -- average student grades and determine
# letter grade as well as class averages.
# $1 = student name; $2 - $NF = test scores.
# set output field separator to tab.
BEGIN { OFS = "\t" }
# action applied to all input lines
{
# add up grades
total = 0
for (i = 2; i <= NF; ++i)
total += $i
# calculate average
avg = total / (NF - 1)
# assign student's average to element of array
student_avg[NR] = avg
# determine letter grade
if (avg >= 90) grade = "A"
else if (avg >= 80) grade = "B"
else if (avg >= 70) grade = "C"
else if (avg >= 60) grade = "D"
else grade = "F"
# increment counter for letter grade array
++class_grade[grade]
# print student name, average and letter grade
print $1, avg, grade
}
# print out class statistics
END {
# calculate class average
for (x = 1; x <= NR; x++)
class_avg_total += student_avg[x]
class_average = class_avg_total / NR
# determine how many above/below average
for (x = 1; x <= NR; x++)
if (student_avg[x] >= class_average)
++above_average
else
++below_average
# print results
print ""
print "Class Average: ", class_average
print "At or Above Average: ", above_average
print "Below Average: ", below_average
# print number of students per letter grade
for (letter_grade in class_grade)
print letter_grade ":", class_grade[letter_grade] | "sort"
}

[root@centos-fuwenchao tmp]#

解释：

However, an array makes this task much easier. We can define an array called class_grade, and
simply use the letter grade (A through F) as the index to the array.
++class_grade[grade]
Thus, if the grade is an "A" then the value of class_grade["A"] is incremented by one. At the end
of the program, we can print out these values in the END rule using the special for loop:
for (letter_grade in class_grade)
print letter_grade ":", class_grade[letter_grade] |
"sort"
The variable letter_grade references a single subscript of the array class_grade each time
through the loop. The output is piped to sort, to make sure the grades come out in the proper order.
(Piping output to programs is discussed in Chapter 10, The Bottom Drawer.) Since this is the last
addition we make to the grades.awk script, we can look at the full listing.

经典：输出参数，求出对应值，应用于目录索引

[root@centos-fuwenchao tmp]# more lookup.sh 
awk '# lookup -- reads local glossary file and prompts user for query
#0
BEGIN {  OFS = "\t"
# prompt user
printf("Enter a glossary term: ")
}
#1 read local file named glossary
FILENAME == "glossary" {
# load each glossary entry into an array
entry[$1] = $2
next
}
#2 scan for command to exit program
$0 ~ /^(quit|[qQ]|exit|[Xx])$/ { exit }
#3 process any non-empty line
$0 != "" {
if ( $0 in entry ) {
# it is there, print definition
print entry[$0]
} else
print $0 " not found"
}
#4 prompt user again for another term
{
printf("Enter another glossary term (q to quit): ")
}' glossary -
[root@centos-fuwenchao tmp]# more glossary 
BASIC Beginner's All-Purpose Symbolic Instruction Code
CICS Customer Information Control System
COBOL Common Business Oriented Language
DBMS Data Base Management System
GIGO Garbage In, Garbage Out
GIRL Generalized Information Retrieval Language
[root@centos-fuwenchao tmp]# ./lookup.sh 
Enter a glossary term: GIGO
Garbage
Enter another glossary term (q to quit): BASIC
Beginner's
Enter another glossary term (q to quit): WENCHAO
WENCHAO not found
Enter another glossary term (q to quit): Q
[root@centos-fuwenchao tmp]#

解释：lookup.sh 最后的-意味着用标准输入中读取参数，它保存在$0中，entry类似于entry[BASIC]=Beginner's，是文件中的项，而不是shell中的传参！

官方解释：

Once input from glossary is exhausted, awk reads from standard input because "-" is specified on the
command line. Standard input is where the user's response comes from. Rule #3 tests that the input line
($0) is not empty. This rule should match whatever the user types. The action uses in to see if the input
line is an index in the array. If it is, it simply prints out the corresponding value. Otherwise, we tell the
user that no valid entry was found.
After rule #3, rule #4 will be evaluated. This rule simply prompts the user for another entry. Note that
regardless of whether a valid entry was processed in rule #3, rule #4 is executed. The prompt also tells

the user how to quit the program. After this rule, awk looks for the next line of input.

那rule1中的next是什么作用呢？

我把next删掉，运行看下是什么效果！

[root@centos-fuwenchao tmp]# ./lookup.sh 
Enter a glossary term: BASIC Beginner's All-Purpose Symbolic Instruction Code not found
Enter another glossary term (q to quit): CICS Customer Information Control System not found
Enter another glossary term (q to quit): COBOL Common Business Oriented Language not found
Enter another glossary term (q to quit): DBMS Data Base Management System not found
Enter another glossary term (q to quit): GIGO Garbage In, Garbage Out not found
Enter another glossary term (q to quit): GIRL Generalized Information Retrieval Language not found
Enter another glossary term (q to quit): GIGO
Garbage
Enter another glossary term (q to quit): BASIC
Beginner's
Enter another glossary term (q to quit):

官方解释：

where $1 is the term and $2 is the definition. The next statement at the end of rule #1 is used to skip
other rules in the script and causes a new line of input to be read. So, until all the entries in the glossary
file are read, no other rule is evaluated.

现在具体解释一下这个脚本

首先是begin：他定义了输出分隔符。你也可以定义输入分隔符像这样】

BEGIN { FS = "\t"; OFS = "\t"
# prompt user
printf("Enter a glossary term: ")
}

接着

在#1这里：检查当前输入的文件是否是glossarry，如果是的话则通过entry[term] = definition载入数组，注意{前不用&&

当文件载入完全之后从标准输入中读取一个项，具体是靠next实现的，详细看我的另外博文！

从标准输入中读入的项目一次和#2 ， #3 匹配

if 输入的是q 则退出

if 输出的不为空，则进入代码块进行 if 判断，如果输入的项=entry索引项，则打印索引值，否则打印该项not found，接着运行到 #4，打印 Enter another glossary term (q to quit): ，

打印完了之后，接着等待接受标准输入的输入

（sed & awk P315）

具体解释看我的另外博文！

ps：

< Less than
> Greater than
<= Less than or equal to
>= Greater than or equal to
== Equal to
!= Not equal to
~ Matches
!~ Does not match

split

n = split(string, array, separator)

string is the input string to be parsed into elements of the named array. The array's indices start at 1 and
go to n, the number of elements in the array. The elements will be split based on the specified separator
character. If a separator is not specified, then the field separator (FS) is used. The separator can be a full
regular expression, not just a single character. Array splitting behaves identically to field splitting

z = split($1, array, " ")
for (i = 1; i <= z; ++i)
print i, array[i]

This shell script takes the first argument from the command line and echoes it as input to the awk
program.

echo $1 |
awk '# romanum -- convert number 1-10 to roman numeral
# define numerals as list of roman numerals 1-10
BEGIN {
# create array named numerals from list of romannumerals
split("I,II,III,IV,V,VI,VII,VIII,IX,X", numerals,",")
}
# look for number between 1 and 10
$1 > 0 && $1 <= 10 {
# print specified element
print numerals[$1]
exit
}
{ print "invalid number"
exit
}'
--
$ romanum 4
IV

日期转换

awk '
# date-month -- convert mm/dd/yy or mm-dd-yy to month day,
year
# build list of months and put in array.
BEGIN {
# the 3-step assignment is done for printing in book
listmonths = "January,February,March,April,May,June,"
listmonths = listmonths "July,August,September,"
listmonths = listmonths "October,November,December"
split(listmonths, month, ",")
}
# check that there is input
$1 != "" {
# split on "/" the first input field into elements of array
sizeOfArray = split($1, date, "/")
# check that only one field is returned
if (sizeOfArray == 1)
# try to split on "-"
sizeOfArray = split($1, date, "-")
# must be invalid
if (sizeOfArray == 1)
exit
# add 0 to number of month to coerce numeric type
date[1] += 0
# print month day, year
print month[date[1]], (date[2] ", 19" date[3])
}'

---

$ echo "5/11/55" | date-month
May 11, 1955

解释倒数第四行 date[1] += 0

However, before using

date[1], we coerce the type of date[1] by adding 0 to it. While awk will correctly interpret "11" as
a number, leading zeros may cause a number to be treated as a string. Thus, "06" might not be
recognized properly without type coercion. The element referenced by date[1] is used as the
subscript for month.

简言之：就是string转换为number，要不然使用month[date[1] ]的时候会出错

删除数组元素

delete array[subscript]