fiddler代理获取的数据,如何解析为流量json格式
txt 格式为
#! -*- coding:utf-8 -*-
import argparse
import copy
import os
import sys
import traceback
import urlparse
path_cur = os.path.dirname(os.path.realpath(__file__))
path_parent = "%s/../" % path_cur
sys.path.append(path_parent)
from public.DDlog import DDlog
logger = DDlog(log_type='api')
# logger = log("HttpParseLog")
class HttpParseLog:
'''
http fiddler 日志解析
'''
def __init__(self):
logger.info("HttpParseLog")
self.request_end = "------------------------------------------------------------------"
def work(self, http_data_path):
request_json = {
"uri": "",
"method": "",
"scheme": "",
"host": "",
"params": "",
"request_header": [],
"referer": "",
# 空两行(Get 空行+空行 post:空行+post参数)
"request_body": "",
"response_status": "",
"response_header": [],
"response_content_type": "",
"discovery_time": "",
# 空一行
"response_body": "",
"response_body_raw_length":0
}
logger.info("work:%s" % http_data_path)
requests_data = []
request_d = []
request_flag = True
for line in open(http_data_path):
if line.startswith(self.request_end):
requests_data.append(request_d)
request_d = []
request_flag = True
continue
if line and line != self.request_end:
line_data_tmp = line.replace("\r",'').replace("\n",'')
if request_flag == True and line_data_tmp == '':
logger.info(u"请求开始前的空格忽略")
request_flag = False
continue
request_flag = False
request_d.append(line)
http_models = []
for request_data in requests_data:
request_json_tmp = copy.deepcopy(request_json)
null_line_num = 0
request_header = []
response_header = []
response_body = []
# postparam行号
first_null_line_num = 0
# status行号
response_status_line_num = 0
for i in range(len(request_data)):
# 第一行解析 host scheme uri method
# 知道第一个空行 request header
# 获取Referer
# 空行下第一行 request_body,若是Get 为空
# 空行下第二行 response_status
# 接着就是response_header ,并且获取response_content_type(Content-Type)及discovery_time(Date)属性
# 第二个空行下面全都是response_body
# response_body_raw_length 取len(response_body)
line = request_data[i]
if i == 0:
url = line
url_property = url.split(" ")
method = url_property[0]
url_tmp = url_property[1]
r = urlparse.urlparse(url_tmp)
host = r.netloc
uri = r.path + r.fragment
params = r.query
scheme = r.scheme
request_json_tmp['host'] = host
request_json_tmp['method'] = method
request_json_tmp['uri'] = uri
request_json_tmp['scheme'] = scheme
request_json_tmp['params'] = params
continue
line_data = line.replace("\r",'').replace("\n",'')
if null_line_num == 1 and i == first_null_line_num:
request_json_tmp['request_body'] = line_data
logger.info("get request_body data:%s" % line_data)
continue
if null_line_num == 1 and i == response_status_line_num:
logger.info("get response status data:%s" % line_data)
response_status_data = line_data.split(" ")
status = response_status_data[1]
request_json_tmp['status'] = status
continue
if line_data == '':
null_line_num += 1
if null_line_num == 1:
first_null_line_num = i+1
response_status_line_num = i + 2
request_json_tmp['request_header'] = request_header
if null_line_num == 2:
# response_status_line_num = i+1
request_json_tmp['response_header'] = response_header
else:
if null_line_num == 0:
# request
# 查看referer
if line_data.startswith("Referer"):
referer_line = line_data.split(":")
request_json_tmp['referer'] = referer_line[1]
continue
request_header.append(line_data)
elif null_line_num == 1:
# response_header
if line_data.startswith("Content-Type"):
referer_line = line_data.split(":")
request_json_tmp['response_content_type'] = referer_line[1]
continue
if line_data.startswith("Date"):
referer_line = line_data[line_data.index(':')+1:] #.split(":")
request_json_tmp['discovery_time'] = referer_line
continue
response_header.append(line_data)
elif null_line_num == 2:
response_body.append(line_data)
request_json_tmp["response_body"] = " ".join(response_body)
request_json_tmp["response_body_raw_length"] = len(request_json_tmp["response_body"])
http_models.append(request_json_tmp)
return http_models
if __name__ == '__main__':
try:
parser = argparse.ArgumentParser()
parser.add_argument("path", type=str, help="path")
args = parser.parse_args()
path = args.path
http_parse_cli = HttpParseLog()
meta = http_parse_cli.work(path)
print(meta)
except Exception, ex:
logger.error("Error: %s" % ex)
logger.error(traceback.format_exc())