运行环境 : python 3.6.0
最近做数据采集 , 经常用到的 请求 headers 不一样 , 每次都要手动复制粘贴 , 很麻烦 , 然后就写了个方法直接将复制过来的一整串字符串转换为字典 . 在这里记录一下 , 以后可能会用得到而不至于手动那么麻烦了
代码如下 :
# -*- encoding: utf-8 -*-
"""
Created On 2019-08-01 11:15
Module Environment: python 3.6.0
@Module Function: eval headers string to dict
@Author: 漫天丶飞雪
"""
import json
headers = """
GET /validate/code.html?c=0.6059167337934623?c=0.18653448649464743 HTTP/1.1
Host: www.ddocr.com
Connection: keep-alive
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36
Accept: image/webp,image/apng,image/*,*/*;q=0.8
Referer: http://www.ddocr.com/user/login.html
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.9
Cookie: JSESSIONID=16FBBD33985B98DEBDB8B27F1FA818BB
"""
def headers_eval(headers):
"""
headers 转换为字典
:param headers: 要转换的 headers
:return: 转换成为字典的 headers
"""
try:
headers = headers.splitlines() # 将每行独立为一个字符串
headers = [item.strip() for item in headers if item.strip() and ":" in item] # 去掉多余的信息 , 比如空行 , 非请求头内容
headers = [item.split(':') for item in headers] # 将 key value 分离
headers = [[item.strip() for item in items] for items in headers] # 去掉两边的空格
headers = {items[0]: items[1] for items in headers} # 粘合为字典
headers = json.dumps(headers, indent=4, ensure_ascii=False) # 将这个字典转换为 json 格式 , 主要是输出整齐一点
except Exception:
print("headers eval get error ...")
headers = dict()
return headers
if __name__ == '__main__':
headers = headers_eval(headers)
print(headers)
# 运行结果 :
"""
{
"Host": "www.ddocr.com",
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
"Accept": "image/webp,image/apng,image/*,*/*;q=0.8",
"Referer": "http",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cookie": "JSESSIONID=16FBBD33985B98DEBDB8B27F1FA818BB"
}
"""