awk&sed精要(二)

查找文件扩展大写字母

[root@uyhd000225 tmp]# ./expand.sh sample 
The  U.S. Global Change Research Program (USGCRP) is a comprehensive
research effort that includes applied
as well as basic research.
The National Aeronautic and Space Administration (NASA) program Mission to Planet Earth
represents the principal space-based component
of the  U.S. Global Change Research Program (USGCRP) and includes new initiatives
such as EOS and Earthprobes.
[root@uyhd000225 tmp]# more sample 
The USGCRP is a comprehensive
research effort that includes applied
as well as basic research.
The NASA program Mission to Planet Earth
represents the principal space-based component
of the USGCRP and includes new initiatives
such as EOS and Earthprobes.
[root@uyhd000225 tmp]# more expand.sh 
awk '# awkro - expand acronyms
# load acronyms file into array "acro"
FILENAME == "acronyms" {
split($0, entry, "\t")
acro[entry[1]] = entry[2]
next
}
# process any input line containing caps
/[A-Z][A-Z]+/ {
# see if any field is an acronym
for (i = 1; i <= NF; i++)
if ( $i in acro ) {
# if it matches, add description
$i = acro[$i] " (" $i ")"
}
}
{
# print all lines
print $0
}' acronyms $*
[root@uyhd000225 tmp]#



There are other changes that would be good to make. In running the expand.sh program, we soon
discovered that it failed to match the acronym if it was followed by a punctuation mark. Our initial
solution was not to handle it in awk at all. Instead, we used two sed scripts, one before processing:
sed 's/\([^.,;:!][^.,;:!]*\)\([.,;:!]\)/\1 @@@\2/g'
and one after:
sed 's/ @@@\([.,;:!]\)/\1/g'

A sed script, run prior to invoking awk, could simply insert a space before any punctuation mark,
causing it to be interpreted as a separate field. A string of garbage characters (@@@) was also added so
we'd be able to easily identify and restore the punctuation mark. (The complicated expression used in the
first sed command makes sure that we catch the case of more than one punctuation mark on a line.)
This kind of solution, using another tool in the UNIX toolbox, demonstrates that not everything needs to
be done as an awk procedure. Awk is all the more valuable because it is situated in the UNIX
environment.
However, with POSIX awk, we can implement a different solution, one that uses a regular expression to
match the acronym. Such a solution can be implemented with the match() and sub() functions
described in the next chapter.


多维数组

file_array[NR, i] = $i


[root@uyhd000225 tmp]# awk -f multiarr.awk bitmap.test 
XOOOOOOOOOOX
OXOOOOOOOOXO
OOXOOOOOOXOO
OOOXOOOOXOOO
OOOOXOOXOOOO
OOOOOXXOOOOO
OOOOOXXOOOOO
OOOOXOOXOOOO
OOOXOOOOXOOO
OOXOOOOOOXOO
OXOOOOOOOOXO
XOOOOOOOOOOX
[root@uyhd000225 tmp]# more multiarr.awk 
BEGIN { FS = "," # comma-separated fields
# assign width and height of bitmap
WIDTH = 12
HEIGHT = 12
# loop to load entire array with "O"
for (i = 1; i <= WIDTH; ++i)
for (j = 1; j <= HEIGHT; ++j)
bitmap[i, j] = "O"
}
# read input of the form x,y.
{
# assign "X" to that element of array
bitmap[$1, $2] = "X"
}
# at end output multidimensional array
END {
for (i = 1; i <= WIDTH; ++i){
for (j = 1; j <= HEIGHT; ++j)
printf("%s", bitmap[i, j] )
# after each row, print newline
printf("\n")
}
}
[root@uyhd000225 tmp]# more bitmap.test 
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9
10,10
11,11
12,12
1,12
2,11
3,10
4,9
5,8
6,7
7,6
8,5
9,4
10,3
11,2
12,1
[root@uyhd000225 tmp]# 

if ((i, j) in array)


变量

ARGV

An array of command-line arguments, excluding the script itself and any options specified with
the invocation of awk. The number of elements in this array is available in ARGC. The index of
the first element of the array is 0 (unlike all other arrays in awk but consistent with C) and the last
is ARGC - 1.

[root@uyhd000225 tmp]# awk -f arg.awk 433 66 - 00 0-9
awk
433
66
-
00
0-9
6
[root@uyhd000225 tmp]# more arg.awk 
# argv.awk - print command-line parameters
BEGIN { for (x = 0; x < ARGC; ++x)
print ARGV[x]
print ARGC
}
[root@uyhd000225 tmp]# 

[root@uyhd000225 tmp]# awk -f arg.awk 433 66 - 00 0-9 -
awk
433
66
-
00
0-9
-
7
[root@uyhd000225 tmp]# 

The first element is the name of the command that
invoked the script. The last argument, in this case, is the filename, "-", for standard input. Note the "-f
argv.awk" does not appear in the parameter list.


[root@uyhd000225 tmp]# more arg.sh
awk '
# argv.sh - print command-line parameters
BEGIN {
for (x = 0; x < ARGC; ++x)
print ARGV[x]
print ARGC
}' $*
[root@uyhd000225 tmp]# chmod 777 arg.sh
[root@uyhd000225 tmp]# ./arg.sh rr tt 7-0 - -- -
awk
rr
tt
7-0
-
--
-
7
[root@uyhd000225 tmp]# 

参数验证

--只打印第一个

[root@uyhd000225 tmp]# awk -f numtest.awk 56 yy 88 9-0 -
yy is not an integer.
[root@uyhd000225 tmp]# awk -f numtest.awk 56 yy 88 9-0 - oo pp
yy is not an integer.
[root@uyhd000225 tmp]# awk -f numtest.awk 56 - yy 88 9-0 - oo pp
- is not an integer.
[root@uyhd000225 tmp]# more numtest.awk 
BEGIN {
for (x = 1; x < ARGC; ++x)
if ( ARGV[x] !~ /^[0-9]+$/ ) {
print ARGV[x], "is not an integer."
exit 1
}
}
[root@uyhd000225 tmp]# 
--打印所有不是整数的
[root@uyhd000225 tmp]# awk -f numtest.awk 56 yy 88 9-0 -
yy is not an integer.
[root@uyhd000225 tmp]# awk -f numtest.awk 56 yy 88 9-0 - oo pp
yy is not an integer.
[root@uyhd000225 tmp]# awk -f numtest.awk 56 - yy 88 9-0 - oo pp
- is not an integer.
[root@uyhd000225 tmp]# more numtest.awk 
BEGIN {
for (x = 1; x < ARGC; ++x)
if ( ARGV[x] !~ /^[0-9]+$/ ) {
print ARGV[x], "is not an integer."
exit 1
}
}
[root@uyhd000225 tmp]# 

--查找电话号码

[root@uyhd000225 tmp]# ./phone.sh 
Enter a name? jone
jone 1256787898 jone 1256787898
[root@uyhd000225 tmp]# more phone.sh 
awk '# phone - find phone number for person
# supply name of person on command line or at prompt.
BEGIN { FS = ","
# look for parameter
if ( ARGC > 2 ){
name = ARGV[1]
delete ARGV[1]
} else {
# loop until we get a name
while (! name) {
printf("Enter a name? ")
getline name < "-"
}
}
}
$1 ~ name {
print $1, $NF
}' $* phones.data
[root@uyhd000225 tmp]# ./phone.sh jone
jone 1256787898 jone 1256787898
[root@uyhd000225 tmp]# 
删除文件phones.data后

[root@uyhd000225 tmp]# ./phone.sh 
Enter a name? jome
awk: cmd. line:11: fatal: cannot open file `phones.data' for reading (没有那个文件或目录)
[root@uyhd000225 tmp]# ./phone.sh jone
awk: cmd. line:6: fatal: cannot open file `phones.data' for reading (没有那个文件或目录)

--这个很巧妙感觉

解释下全文


1:We test the ARGC variable to see if there are more than two parameters. By specifying "$*", we can pass
all the parameters from the shell command line inside to the awk command line

If this parameter has
been supplied, we assume the second parameter, ARGV[1], is the one we want and it is assigned to the
variable name. Then that parameter is deleted from the array. This is very important if the parameter
that is supplied on the command line is not of the form "var=value"; otherwise, it will later be
interpreted as a filename. If additional parameters are supplied, they will be interpreted as filenames of
alternative phone databases. If there are not more than two parameters, then we prompt for the name.
The getline function is discussed in Chapter 10; using this syntax, it reads the next line from standard
input.

As a special case, if the value of an ARGV element is the empty string (""), awk will skip over it and
continue on to the next element.




env

[root@uyhd000225 tmp]# more env.awk 
BEGIN {
for (env in ENVIRON)
print env "=" ENVIRON[env]
}
[root@uyhd000225 tmp]# awk -f env.awk 
TERM=vt100
G_BROKEN_FILENAMES=1
SHLVL=1
PWD=/tmp
PATH=/usr/lib/jvm/java-7-sun/bin:/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/X11R6/bin:/usr/java/j2sdk1.4.2_15/bin:/root/bin
CATALINA_BASE=/usr/local/jakarta-tomcat-5.0.28
CLASSPATH=.:/usr/lib/jvm/java-7-sun/lib:/usr/lib/jvm/java-7-sun/jre/lib
OLDPWD=/root
CATALINA_HOME=/usr/local/jakarta-tomcat-5.0.28
AWKPATH=.:/usr/share/awk
_=/bin/awk
CATALINA_3_BASE=/usr/local/Tomcat_Trans
LANG=zh_CN.UTF-8
LS_COLORS=no=00:fi=00:di=01
SSH_CONNECTION=123.134.196.20 5837 121.197.8.155 22
MAIL=/var/spool/mail/root
CATALINA_3_HOME=/usr/local/Tomcat_Trans
HOSTNAME=uyhd000225.hichina.com
INPUTRC=/etc/inputrc
SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass
USER=root
HISTSIZE=1000
CVS_RSH=ssh
SSH_TTY=/dev/pts/2
CATALINA_2_BASE=/usr/local/Tomcat_Calculate
CATALINA_2_HOME=/usr/local/Tomcat_Calculate
JAVA_HOME=/usr/lib/jvm/java-7-sun
JRE_HOME=/usr/lib/jvm/java-7-sun/jre
SHELL=/bin/bash
LESSOPEN=|/usr/bin/lesspipe.sh %s
LOGNAME=root
HOME=/root
SSH_CLIENT=123.134.196.20 5837 22
[root@uyhd000225 tmp]# 

You can reference any element, using the variable name as the index of the array:
ENVIRON["LOGNAME"]
You can also change any element of the ENVIRON array.
ENVIRON["LOGNAME"] = "Tom"


function

cos(x) Returns cosine of x (x is in radians).
exp(x) Returns e to the power x.
int(x) Returns truncated value of x.
log(x) Returns natural logarithm (base-e) of x.
sin(x) Returns sine of x (x is in radians).
sqrt(x) Returns square root of x.
atan2(y,x) Returns arctangent of y/x in the range - [pi] to [pi] .
rand() Returns pseudo-random number r, where 0 <= r < 1.
srand(x) Establishes new seed for rand(). If no seed is specified, uses time of day. Returns the
old seed.


int()

print 100/3
print int(100/3)
The output from these statements is shown below:
33.3333
33

The int() function simply truncates; it does not round up or down. (Use the printf format "%.0f" to
perform rounding.)

  rand()

The rand() function generates a pseudo-random floating-point number between 0 and 1. The srand
() function sets the seed or starting point for random number generation. If srand() is called without
an argument, it uses the time of day to generate the seed. With an argument x, srand() uses x as the
seed.

If you don't call srand() at all, awk acts as if srand() had been called with a constant argument
before your program started, causing you to get the same starting point every time you run your
program. This is useful if you want reproducible behavior for testing, but inappropriate if you really do
want your program to behave differently every time. Look at the following script:


menu

we look at a general use of the system() and getline functions to implement a
menu-based command generator.
$ cat uucp_commands
UUCP Status Menu
Look at files in PUBDIR:find /var/spool/uucppublic -print
Look at recent status in LOGFILE:tail /var/spool/uucp/
LOGFILE
Look for lock files:ls /var/spool/uucp/*.LCK

--
awk -v CMDFILE="uucp_commands" '# invoke -- menu-based
# command generator
# first line in CMDFILE is the title of the menu
# subsequent lines contain: $1 - Description;
# $2 Command to execute
BEGIN { FS = ":"
# process CMDFILE, reading items into menu array
if ((getline < CMDFILE) > 0)
title = $1
else
exit 1
while ((getline < CMDFILE) > 0) {
# load array
++sizeOfArray
# array of menu items
menu[sizeOfArray] = $1
# array of commands associated with items
command[sizeOfArray] = $2
}
# call function to display menu items and prompt
display_menu()
}
# Applies the user response to prompt
{
# test value of user response
if ($1 > 0 && $1 <= sizeOfArray) {
# print command that is executed
printf("Executing ... %s\n", command[$1])
# then execute it.
system(command[$1])
printf("<Press RETURN to continue>")
# wait for input before displaying menu again
getline
}
else
exit
# re-display menu
display_menu()
}
function display_menu() {
# clear screen -- if clear does not work, try "cls"
system("clear")
# print title, list of items, exit item, and prompt
print "\t" title
for (i = 1; i <= sizeOfArray; ++i)
printf "\t%d. %s\n", i, menu[i]
printf "\t%d. Exit\n", i
printf("Choose one: ")
}' -


--运行:
UUCP Status Menu
1. Look at files in PUBDIR
2. Look at recent status in LOGFILE
3. Look for lock files
4. Exit
Choose one:



重定向

--

print "a =", a, "b =", b, "max =", (a > b ? a : b) > "data.out"

--

print | command

--

{# words.awk - strip macros then get word count
sub(/^\.../,"")
print | "wc -w"
}

--

awk '{ # words -- strip macros
sub(/^\.../,"")
print
}' $* |
# get word count
wc -w

























评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值