awk模式匹配学习笔记（180813）

最新推荐文章于 2024-03-03 13:41:58 发布

知无牙

最新推荐文章于 2024-03-03 13:41:58 发布

阅读量1.4k

点赞数

分类专栏： Linux_Learning 文章标签： Linux awk

Linux_Learning 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

学习参考资料：https://coolshell.cn/articles/9070.html
1. 首先通过netstat命令获取测试文件netstat.txt（netstat - Print network connections, routing tables, interface statistics, masquerade connections, and multicast memberships）
2. #格式化输出
$ awk {print “%-8s %-8s %-8s %-8s %-22s %-15s\n”, $1,$2,$3,$4,$5,$6 } netstat.txt
3. #因为我的列表中LISTEN并不存在，所以我的和作者不一样
awk $3 == 0 $6 == “;LISTEN”netstat.txt
4. #条件过滤
awk $3>0 {print $0} netstat.txt
5. #需要表头，引入内建变量，如果是excel表格的话，就是第一行的内容，每列名称
awk $3 == 0 && $6 == “LISTEN” || NR == 1 netstat.txt

去掉NR == 1的话就没有了

awk $3 == 0 && $6 == “LISTEN” netstat
6. #再复杂点，格式化输出
awk $3 == 0 && $6 == “ESTABLISHED” || NR == 1 {print “%-20s %-20s $s\n”, $4, $5, $6}netstat.txt

#输出行号
awk $3 == 0 && $6 == “ESTABLISHED” || NR == 1 {print “%-20s %-20s %-20s %-20s %s\n”, NR, FNR, $4, $5, $6} netstat.txt
#指定分隔符
awk BEGIN{FS = “:”} {print $1, $3, $6}/etc/passwd

等同于-F

awk -F: {print $1, $3, $6}/etc/passwd

可以指定多个分隔符

awk -F [; : ,]

#\t作为分隔符输出
awk -F: {print $1, $3, $6} OFS == “\t” /etc/passwd
#字符串匹配，正则表达式匹配，~模式开始，//中是模式
awk $6 ~ /FIN/ || NR == 1 {print NR,$4,$5,$6} OFS=”\t” netstat.txt

再看一例

awk $6 ~ /WAIT/ || NR == 1 {print NR,$4,$5,$6} OFS = ”\t” netstat.txt

再看一例

awk /tcp/ netstat.txt
11. #用’/FIN | TIME/’ 来匹配FIN 或TIME
awk $6 ~ /FIN | TIME/ || NR == 1 {print NR,$4,$5,$6}OFS = “\t” netstat.txt
12. #模式取反
awk $6 !~ /WAIT/ || NR == 1 {print NR,$4,$5,$6} OFS = “\t” netstat.txt

再举一例

awk $1 !~/tcp/ || NR == 1 {print NR,$4,$5,$6} OFS = “\t” netstat.txt

也可以这样

awk ! /WAIT/ netstat.txt
13. #拆分文件
awk NR!=1{print > $6} netstat.txt
14. #指定的列输出到文件
awk NR !=1 {print $4,$5,$6} netstat.txt
15. #复杂一点，awk结合if else语句
awk NR!=1{if($6 ~ /TIME | ESTABLISHED/)print > “1.txt”; else if ($6 ~ /CONNECTED/)print “2.txt”; else print > “3.txt”} netstat.txt
16. #计算所有某种类型的文件的大小总和
$ ls
G_24hCFvsG_24hCM.DE_down.xls
G_24hCFvsG_24hCM.DElist_down.txt
G_24hCFvsG_24hCM.DElist.txt
G_24hCFvsG_24hCM.DElist_up.txt
G_24hCFvsG_24hCM.DE_up.xls
G_24hCFvsG_24hCM.DE.xls
G_24hCFvsG_24hCM.Differential_analysis_results.xls
ls -l *xls *txt | awk {sum += $5} END {print sum}
21361
17. # awk结合for循环语句
$ awk NR!=1{a[$6]++;} END {for (I in a ) print i “,” a[i]; } netstat.txt
18. #统计每个用户的进程占了多少内存
$ ps aux | awk NR!=1{a[$1]+=$6;} END {for (i in a) print i“,” a[i]”KB”;}
19. #BEGIN{这里面是执行前的语句}、END{这里面是处理完所有的行后要执行的语句}、{这里是处理每一行时要执行的语句}，下面是一个awk命令文件，运行该脚本的方式有$awk -f cal.awk students_score.txt，还有./cal.awk students_score.txt等等
#!/bin/awk -f
# before run
BEGIN{
math = 0
english = 0
computer = 0

`print "NAME NO. MATH ENGLISH COMPUTER TOTAL\n"`
`print "------------------------------------\n"`

} -#running {
math+=$3
english+=$4
computer+=$5
print "%-6s %-6s %4d %8d %8d\n", $1, $2, $3, $4, $5, $3+$4+$5 }
# runninng later
END {
print "-------------------------------------------------\n"
print " TOTAL: %10d %8d %8d \n",math, english,computer
print "AVERAGE: %10.2f %8.2f %8.2f\n", math/NR, english/NR,computer/NR }
20. #使用-v参数和ENVIRON，使用ENVIRON的环境变量需要export
$ x=5 $ y=10 $ export y $ echo $x $y 5 10
$ awk -v val = $x {print $1, $2, $3, $4+val, $5+ENVIRON[“y”]}OFS=”\t” students_score.txt
21. #找出文件中长度大于50 的行
awk length>50 students_score.txt
22. #按连接数查看客户端IP
$ netstat -ntu | awk {print $5} | cut -d: -f1 | sort | uniq -c | sort23. #打印99乘法表 $seq 9 | sed ‘H;g’ | awk -v RS=’’{for(i=1;i<=NF;i++) print(“%dx%d=%d%s”,i, NR, i*NR, i==NR?”\n”:”\t”)}`

知无牙

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
awk模式匹配学习笔记（180813）

学习参考资料：https://coolshell.cn/articles/9070.html 1. 首先通过netstat命令获取测试文件netstat.txt（netstat - Print network connections, routing tables, interface statistics, masquerade connections, and multicast memb...
复制链接

扫一扫

专栏目录