性能测试时,需使用生产环境各接口请求比例分配接口请求比,nginx统计脚本如下:
import re
import pandas as pd
import xlwt
obj = re.compile(
r'(?P<ip>.*?)- - \[(?P<time>.*?)\] "(?P<request>.*?)" (?P<request_time>.*?) (?P<status>.*?) (?P<bytes>.*?) "(?P<referer>.*?)" "(?P<ua>.*?)"')
def load_log(path):
lst = []
error_lst = []
i = 0
with open(path, mode="r", encoding="utf-8") as f:
for line in f:
line = line.strip()
dic = parse(line)
if dic:
lst.append(dic)
else:
error_lst.append(line)
i += 1
return lst, error_lst
def NumIn(s):
for char in s:
if char.isdigit():
return True
return False
def parse(line):
dic = {}
try:
result = obj.match(line)
time = result.group("time")
time = time.replace(" +0800", "")
time_min = time[:17]
time_10min = time[:16]
time_hour = time[:14]
dic['time'] = time
dic['time_min'] = time_min
dic['time_10min'] = time_10min
dic['time_hour'] = time_hour
request = result.group("request")
a = request.split()[1].split("?")[0]
c = '/'.join(a.split('/')[:5])
b = request.split()[0]
for item in c.split('/')[4]:
if NumIn(item):
c='/'.join(a.split('/')[:4])
dic['request'] = b + " " + c
return dic
except:
return False
def analyse(lst,project):
df = pd.DataFrame(lst)
df = df[df['request'].str.contains(project)]
request_time_count = pd.value_counts(df['time']).reset_index().rename(columns={"index": "time", "time": "count"}).iloc[:100, :]
request_time_min_count = pd.value_counts(df['time_min']).reset_index().rename(columns={"index": "time_min", "time_min": "count"}).iloc[:100, :]
request_time_10min_count = pd.value_counts(df['time_10min']).reset_index().rename(columns={"index": "time_10min", "time_10min": "count"}).iloc[:100, :]
request_time_hour_count = pd.value_counts(df['time_hour']).reset_index().rename(columns={"index": "time_hour", "time_hour": "count"}).iloc[:24, :]
request_count = pd.value_counts(df['request']).reset_index().rename(columns={"index": "request", "request": "count"}).iloc[:, :]
request_time_count_values = request_time_count.values
request_time_min_count_values = request_time_min_count.values
request_time_10min_count_values = request_time_10min_count.values
request_time_hour_count_values = request_time_hour_count.values
request_count_values = request_count.values
wb = xlwt.Workbook()
sheet = wb.add_sheet("url请求次数及占比")
row = 0
sheet.write(row, 0, "request_url")
sheet.write(row, 1, "request_type")
sheet.write(row, 2, "count")
sheet.write(row, 3, "百分比")
sheet.write(row, 4, "请求总数")
row += 1
sheet.write(row, 4, df.shape[0])
for item in request_count_values:
sheet.write(row, 0, item[0].split(" ")[1])
sheet.write(row, 1, item[0].split(" ")[0])
sheet.write(row, 2, item[1])
sheet.write(row, 3, "%.2f%%" % (round(float(item[1]/df.shape[0]) * 100, 2)))
row += 1
sheet = wb.add_sheet("秒级请求数top100")
row = 0
sheet.write(row, 0, "time")
sheet.write(row, 1, "count")
row += 1
for item in request_time_count_values:
sheet.write(row, 0, item[0])
sheet.write(row, 1, item[1])
row += 1
sheet = wb.add_sheet("分钟请求数top100")
row = 0
sheet.write(row, 0, "time_min")
sheet.write(row, 1, "count")
row += 1
for item in request_time_min_count_values:
sheet.write(row, 0, item[0]+':00'+"-"+item[0]+':59')
sheet.write(row, 1, item[1])
row += 1
sheet = wb.add_sheet("10分钟请求数top100")
row = 0
sheet.write(row, 0, "time10")
sheet.write(row, 1, "count")
row += 1
for item in request_time_10min_count_values:
sheet.write(row, 0, item[0]+'0:00'+"-"+item[0]+'9:59')
sheet.write(row, 1, item[1])
row += 1
sheet = wb.add_sheet("小时级请求数")
row = 0
sheet.write(row, 0, "timehour")
sheet.write(row, 1, "count")
row += 1
for item in request_time_hour_count_values:
sheet.write(row, 0, item[0]+':00:00'+"-"+item[0]+':59:59')
sheet.write(row, 1, item[1])
row += 1
wb.save("nginx_log.xls")
if __name__ == '__main__':
lst, error_lst = load_log(path="D:\Desktop\\****imc.log")
analyse(lst,project='/SVC***/')
统计结果如下: