日志分析可以获得很多有用的信息,现在来试试最基本的,获取最多访问的前10个IP地址及访问次数。
既然是统计,那么awk是必不可少的,好用而高效。
命令如下:
awk '{nums[$1]+=1;} END{for(i in nums){print nums[i],i}}' access_log | sort | tail
首先用awk统计出来一个列表,然后用sort进行排序,最后用tail获取最后的10个。
以上参数可以略作修改显示更多的数据,比如将tail加上-n参数等,另外日志格式不同命令也可能需要稍作修改。
1. access_log日志文件在/etc/httpd/logs目录下
cd /etc/httpd/logs
2.查看access_log日志文件
cat access_log
# 内容如下:
192.168.178.1 - - [04/Feb/2023:10:29:09 +0800] "GET / HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
192.168.178.1 - - [04/Feb/2023:10:29:10 +0800] "GET / HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
192.168.178.1 - - [04/Feb/2023:10:29:10 +0800] "GET / HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
192.168.178.1 - - [04/Feb/2023:10:29:11 +0800] "GET / HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
192.168.178.1 - - [04/Feb/2023:10:29:11 +0800] "GET / HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
192.168.178.1 - - [04/Feb/2023:10:29:11 +0800] "GET / HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
192.168.178.1 - - [04/Feb/2023:10:29:11 +0800] "GET / HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
192.168.178.1 - - [04/Feb/2023:10:29:11 +0800] "GET / HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
192.168.178.1 - - [04/Feb/2023:10:29:56 +0800] "-" 408 - "-" "-"
192.168.178.151 - - [04/Feb/2023:10:43:40 +0800] "GET / HTTP/1.1" 200 20 "-" "curl/7.61.1"
192.168.178.151 - - [04/Feb/2023:10:43:43 +0800] "GET / HTTP/1.1" 200 20 "-" "curl/7.61.1"
192.168.178.151 - - [04/Feb/2023:10:43:46 +0800] "GET / HTTP/1.1" 200 20 "-" "curl/7.61.1"
3.awk获取访问前10位的ip地址
awk '{nums[$1]+=1;} END{for(i in nums){print nums[i],i}}' access_log | sort | tail
# {nums[$1]+=1} 将第一列的值作为数组的下标,数组的内容存储IP地址出现的次数
# END{for(i in nums){print nums[i],i}} END在主代码块和数据读取之后执行,循环数组的下标,输出数组的值和下标
# sort 排序
# tail 输出内容的前10行
4.其他功能命令:
查看日志中访问次数最多的前10个IP
#cat access_log |cut -d ' ' -f 1 | sort |uniq -c | sort -nr | awk '{print $0 }' | head -n 10 | less
查看日志中出现100次以上的IP
#cat access_log |cut -d ' ' -f 1 | sort |uniq -c | awk '{if ($1 > 100) print $0}'|sort -nr | less
查看最近访问量最高的文件
#cat access_log | tail -10000 | awk '{print $7}' | sort | uniq -c | sort -nr | less
查看日志中访问超过100次的页面
#cat access_log | cut -d ' ' -f 7 | sort |uniq -c | awk '{if ($1 > 100) print $0}' | less
统计某url,一天的访问次数
#cat access_log | grep '12/Aug/2009' | grep '/images/index/e1.gif' | wc | awk '{print $1}'
前五天的访问次数最多的网页
#cat access_log | awk '{print $7}' | uniq -c | sort -n -r | head -20
从日志里查看该ip在干嘛
#cat access_log | grep 218.66.36.119 | awk '{print $1"\t"$7}' | sort | uniq -c | sort -nr | less
列出传输时间超过 30 秒的文件
#cat access_log | awk '($NF > 30){print $7}' | sort -n | uniq -c | sort -nr | head -20
列出最最耗时的页面(超过60秒的)
#cat access_log | awk '($NF > 60 && $7~/\.php/){print $7}' | sort -n | uniq -c | sort -nr | head -100