(一)
1.统计域名
# 1.计算多少个是以domain1.com结尾的
# 记录每一行的信息
lines = ['47.29.201.179 - - [28/Feb/2019:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 200 5316 "https://domain1.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
'47.29.201.179 - - [28/Feb/2019:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 200 5316 "https://domain1.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
'47.29.201.179 - - [28/Feb/2019:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 404 5316 "https://domain.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
'47.29.201.179 - - [2/Match/2019:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 200 5316 "https://domain.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
]
sum_ = 0
for line in lines:
_nodes = line.split()
url = _nodes[10]
matchs = re.findall('domain1\.com',url)
if matchs:
sum_ += 1
print('domain1.com为域名的个数:',sum_)
domain1.com为域名的个数: 2
2.计算成功比例
# 筛选出成功的比例
import datetime
from datetime import datetime as dt
lines = ['47.29.201.179 - - [17/Nov/2022:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 200 5316 "https://domain1.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
'47.29.201.179 - - [17/Nov/2022:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 200 5316 "https://domain1.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
'47.29.201.179 - - [17/Nov/2022:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 404 5316 "https://domain1.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
'47.29.201.179 - - [2/Mar/2019:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 200 5316 "https://domain1.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
]
# 给定今日时间
now_time = dt.now()
# 给定日子的所有请求数量
total = 0
# 给定日子的所有成功请求(200)的数量
success = 0
for line in lines:
# 获取每一行时间
_nodes = line.split()
Time= _nodes[3][1:-1].replace(":"," ",1)
Time = dt.strptime(Time,"%d/%b/%Y %H:%M:%S")
# 选择今天的请求
if Time.year == now_time.year and Time.month == now_time.month and Time.day == now_time.day:
total += 1
status = _nodes[8]
# 判断是否成功
if status == '200':
success += 1
print('案例中当日(2022/11/17)成功的比例为:',success/total)
案例中当日(2022/11/17)成功的比例为: 0.6666666666666666
(二)
SELECT COUNT(*) FROM event_log WHERE event_timestamp >= UNIX_TIMESTAMP('2020-09-01') AND event_timestamp < UNIX_TIMESTAMP('2020-10-01') GROUP BY user_id HAVING COUNT(*) >= 1000 and COUNT(*) < 2000;