Requests 源代码阅读
Requests 是什么
Requests is a simple, yet elegant, HTTP library. 是作者写在README里的一句话,Requests的功能包括以下几点:
- Keep-Alive & Connection Pooling 保持活动和连接池
- International Domains and URLs 国际域名和 URL
- Sessions with Cookie Persistence Cookie 持久性会话
- Browser-style TLS/SSL Verification 浏览器式 SSL 验证
- Basic & Digest Authentication 基本 / 摘要身份验证
- Familiar
dict
–like Cookies 优雅的键 / 值 Cookie- Automatic Content Decompression and Decoding 自动解压
- Multi-part File Uploads 文件分块上传
- SOCKS Proxy Support Unicode 响应机构
- Connection Timeouts 连接超时
- Streaming Downloads 流下载
- Automatic honoring of
.netrc
.netrc 支持- Chunked HTTP Requests 分块请求
api.py
from . import sessions
def request(method, url, **kwargs):
with sessions.Session() as session:
return session.request(method=method, url=url, **kwargs)
def get(url, params=None, **kwargs):
return request("get", url, params=params, **kwargs)
def options(url, **kwargs):
return request("options", url, **kwargs)
def head(url, **kwargs):
kwargs.setdefault("allow_redirects", False)
return request("head", url, **kwargs)
def post(url, data=None, json=None, **kwargs):
return request("post", url, data=data, json=json, **kwargs)
def put(url, data=None, **kwargs):
return request("put", url, data=data, **kwargs)
def patch(url, data=None, **kwargs):
return request("patch", url, data=data, **kwargs)
def delete(url, **kwargs):
return request("delete", url, **kwargs)
api.py中提供了8种方法,除了request方法,其他7个方法都是HTTP协议的基本方法(不知道,网上看来的),附上7种方法的含义
- GET方法:获取URL指定的资源
- OPTIONS方法:查询URL指定的资源支持的方法
- HEAD方法:获取报文首部,用于确认URL的有效性等
- POST方法:传输实体主体
- PUT方法:传输文件
- PATCH方法:用来对已知资源进行局部更新(对PUT方法的补充)
- DELETE方法:删除URL指定的资源
request作为核心端口,链接的是session.py,可以实行参数的传入,也就是代码第三行的位置,仔细阅读request函数的注释,理解其参数的传入
- 参数-method 创建Request对象需要的method:GET , OPTIONS , HEAD , POST , PUT , PATCH , DELETE
- 参数-url 创建Request对象需要的url
- 参数-params(可选) Request对象需要发送的查询字符串,可以是字典、列表、元组、字节
- 参数-data(可选) Request对象需要发送的body数据,是一个Json序列化对象
- 参数-json(可选) Request对象需要发送的body数据,可以是字典、列表、数组、字节或者类文件对象
- 参数-headers(可选) Request对象需要发送的一个字典对象HTTP请求头
- 参数-cookies(可选) Request对象需要发送的一个字典对象或者CookieJar对象
- 参数-files(可选) 用于分段编码上传的字典(‘name’: file-like-objects 或者 {‘name’: file-tuple}格式),file-tuple 可以是(‘filename’, fileobj)格式,或者(‘filename’, fileobj, ‘content_type’)格式,或者(‘filename’, fileobj, ‘content_type’, custom_headers),其中’content-type’是一个字符串,用来定义文件内容的类型。custom_headers 类似于一个字典,包含了为这个文件额外添加的标题
- 参数-auth(可选) 认证元组,用来启用Basic/Digest/Custom HTTP认证
- 参数-timeout(可选) 等待服务器响应的时间,单位是秒,超过设定的时间,则放弃,float型小数或者timeouts型元组
- allow_redirects(可选) 布尔类型,开启或者关闭GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD重定向,默认是True
- 参数-proxies(可选) 字典类型,映射协议到代理的URL
- 参数-verify(可选) 可以是布尔值,这时候它控制要不要验证服务器的TLS证书。也可以是字符串,这时候它必须是一个CA证书组的路径。默认是 True
- 参数-stream(可选) 如果是False,则将立即下载响应内容
- 参数-cert(可选) 如果是字符串,则必须是ssl证书(.pem)的路径。如果是元组,则必须是(‘cert’, ‘key’)对
- request方法响应:Response对象
- request方法体:
所以在使用request方法时,是通过with创建一个sessions对象session,然后调用session对象的request方法将参数全部传进去。
sessions.py
该模块提供了一个Session对象,用于跨请求(cookie、auth、代理)管理和持久化设置。
打开sessions文件先看import的库
import os
import sys
import time
from collections import OrderedDict
from datetime import timedelta
from ._internal_utils import to_native_string
from .adapters import HTTPAdapter
from .auth import _basic_auth_str
from .compat import Mapping, cookielib, urljoin, urlparse
from .cookies import (
RequestsCookieJar,
cookiejar_from_dict,
extract_cookies_to_jar,
merge_cookies,
)
from .exceptions import (
ChunkedEncodingError,
ContentDecodingError,
InvalidSchema,
TooManyRedirects,
)
from .hooks import default_hooks, dispatch_hook
# formerly defined here, reexposed here for backward compatibility
from .models import ( # noqa: F401
DEFAULT_REDIRECT_LIMIT,
REDIRECT_STATI,
PreparedRequest,
Request,
)
from .status_codes import codes
from .structures import CaseInsensitiveDict
from .utils import ( # noqa: F401
DEFAULT_PORTS,
default_headers,
get_auth_from_url,
get_environ_proxies,
get_netrc_auth,
requote_uri,
resolve_proxies,
rewind_body,
should_bypass_proxies,
to_key_val_list,
)
一堆import后是一个选择精确时钟的if语句
if sys.platform == "win32":
preferred_clock = time.perf_counter
else:
preferred_clock = time.time
接着是两个函数 merge_setting 和 merge_hooks ,用来合并request和session的设置
找到我们从api文件找到的调用方法sessions.session类
class Session(SessionRedirectMixin):
__attrs__ = [
"headers",
"cookies",
"auth",
"proxies",
"hooks",
"params",
"verify",
"cert",
"adapters",
"stream",
"trust_env",
"max_redirects",
]
def __init__(self):
self.headers = default_headers()
self.auth = None
self.proxies = {}
self.hooks = default_hooks()
self.params = {}
self.stream = False
self.verify = True
self.cert = None
self.max_redirects = DEFAULT_REDIRECT_LIMIT
self.trust_env = True
self.cookies = cookiejar_from_dict({})
self.adapters = OrderedDict()
self.mount("https://", HTTPAdapter())
self.mount("http://", HTTPAdapter())
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def prepare_request(self, request):
cookies = request.cookies or {}
if not isinstance(cookies, cookielib.CookieJar):
cookies = cookiejar_from_dict(cookies)
merged_cookies = merge_cookies(
merge_cookies(RequestsCookieJar(), self.cookies), cookies
)
auth = request.auth
if self.trust_env and not auth and not self.auth:
auth = get_netrc_auth(request.url)
p = PreparedRequest()
p.prepare(
method=request.method.upper(),
url=request.url,
files=request.files,
data=request.data,
json=request.json,
headers=merge_setting(
request.headers, self.headers, dict_class=CaseInsensitiveDict
),
params=merge_setting(request.params, self.params),
auth=merge_setting(auth, self.auth),
cookies=merged_cookies,
hooks=merge_hooks(request.hooks, self.hooks),
)
return p
def request(
self,
method,
url,
params=None,
data=None,
headers=None,
cookies=None,
files=None,
auth=None,
timeout=None,
allow_redirects=True,
proxies=None,
hooks=None,
stream=None,
verify=None,
cert=None,
json=None,
):
req = Request(
method=method.upper(),
url=url,
headers=headers,
files=files,
data=data or {},
json=json,
params=params or {},
auth=auth,
cookies=cookies,
hooks=hooks,
)
prep = self.prepare_request(req)
proxies = proxies or {}
settings = self.merge_environment_settings(
prep.url, proxies, stream, verify, cert
)
send_kwargs = {
"timeout": timeout,
"allow_redirects": allow_redirects,
}
send_kwargs.update(settings)
resp = self.send(prep, **send_kwargs)
return resp
def get(self, url, **kwargs):
kwargs.setdefault("allow_redirects", True)
return self.request("GET", url, **kwargs)
def options(self, url, **kwargs):
kwargs.setdefault("allow_redirects", True)
return self.request("OPTIONS", url, **kwargs)
def head(self, url, **kwargs):
kwargs.setdefault("allow_redirects", False)
return self.request("HEAD", url, **kwargs)
def post(self, url, data=None, json=None, **kwargs):
return self.request("POST", url, data=data, json=json, **kwargs)
def put(self, url, data=None, **kwargs):
return self.request("PUT", url, data=data, **kwargs)
def patch(self, url, data=None, **kwargs):
return self.request("PATCH", url, data=data, **kwargs)
def delete(self, url, **kwargs):
return self.request("DELETE", url, **kwargs)
def send(self, request, **kwargs):
kwargs.setdefault("stream", self.stream)
kwargs.setdefault("verify", self.verify)
kwargs.setdefault("cert", self.cert)
if "proxies" not in kwargs:
kwargs["proxies"] = resolve_proxies(request, self.proxies, self.trust_env)
if isinstance(request, Request):
raise ValueError("You can only send PreparedRequests.")
allow_redirects = kwargs.pop("allow_redirects", True)
stream = kwargs.get("stream")
hooks = request.hooks
adapter = self.get_adapter(url=request.url)
start = preferred_clock()
r = adapter.send(request, **kwargs)
elapsed = preferred_clock() - start
r.elapsed = timedelta(seconds=elapsed)
r = dispatch_hook("response", hooks, r, **kwargs)
if r.history:
for resp in r.history:
extract_cookies_to_jar(self.cookies, resp.request, resp.raw)
extract_cookies_to_jar(self.cookies, request, r.raw)
if allow_redirects:
gen = self.resolve_redirects(r, request, **kwargs)
history = [resp for resp in gen]
else:
history = []
if history:
history.insert(0, r)
r = history.pop()
r.history = history
if not allow_redirects:
try:
r._next = next(
self.resolve_redirects(r, request, yield_requests=True, **kwargs)
)
except StopIteration:
pass
if not stream:
r.content
return r
def merge_environment_settings(self, url, proxies, stream, verify, cert):
if self.trust_env:
no_proxy = proxies.get("no_proxy") if proxies is not None else None
env_proxies = get_environ_proxies(url, no_proxy=no_proxy)
for (k, v) in env_proxies.items():
proxies.setdefault(k, v)
if verify is True or verify is None:
verify = (
os.environ.get("REQUESTS_CA_BUNDLE")
or os.environ.get("CURL_CA_BUNDLE")
or verify
)
proxies = merge_setting(proxies, self.proxies)
stream = merge_setting(stream, self.stream)
verify = merge_setting(verify, self.verify)
cert = merge_setting(cert, self.cert)
return {"proxies": proxies, "stream": stream, "verify": verify, "cert": cert}
def get_adapter(self, url):
for (prefix, adapter) in self.adapters.items():
if url.lower().startswith(prefix.lower()):
return adapter
raise InvalidSchema(f"No connection adapters were found for {url!r}")
def close(self):
for v in self.adapters.values():
v.close()
def mount(self, prefix, adapter):
self.adapters[prefix] = adapter
keys_to_move = [k for k in self.adapters if len(k) < len(prefix)]
for key in keys_to_move:
self.adapters[key] = self.adapters.pop(key)
def __getstate__(self):
state = {attr: getattr(self, attr, None) for attr in self.__attrs__}
return state
def __setstate__(self, state):
for attr, value in state.items():
setattr(self, attr, value)
- session 注释中写出了,session提供三个功能,cookie的持久化,连接池和配置文件
- attrs = […] 这里面就写出了一堆session对象所拥有的属性
- 看到了在对于session连接池中,默认最大的连接池数量是DEFAULT_POOLSIZE = 10
__ init __.py
整体程序是在初始化,因为不熟悉urllib3标准库,因此只能大概感觉,其中两个函数是验证urllib3的兼容性和加密性的而后的try和except是在排除错误,进行一系列的验证。
__ version __.py
不是一个很有用的.py文件,记录了Requests的标题,描述,网址,版本号等等信息
_ internal _ utils.py
提供由请求在内部使用的实用程序函数,这些请求依赖非常少的外部帮助程序(如compat)
to_native_string
给定字符串对象,无论其类型是什么,返回的表示形式该字符串在本机字符串类型中,编码和解码的位置必要的。除非另有说明,否则假定使用ASCII。
def to_native_string(string, encoding="ascii"):
if isinstance(string, builtin_str):
out = string
else:
out = string.decode(encoding)
unicode_is_ascii
确定unicode字符串是否只包含ASCII字符。u_string:要检查的unicode字符串。必须是unicode而不是Python 2 的字符串。
def unicode_is_ascii(u_string):
assert isinstance(u_string, str)
try:
u_string.encode("ascii")
return True
except UnicodeEncodeError:
return False
adapters.py
此模块包含请求用来定义和维护连接的传输适配器。
auth.py
该模块包含请求的身份验证处理程序。
certs.py
该模块返回首选的默认CA证书包。只有一个——证书包里的那个。
如果您正在打包请求,例如,针对Linux发行版或托管环境,您可以更改where()的定义,以返回单独打包的CA包。
compat.py
此模块以前处理过Python 2和Python 3之间的导入兼容性问题。在下一个主要版本之前,它将保留向后兼容性。(Requests优雅的保证了兼容性问题)
cookies.py
兼容性代码能够使用’cookelib.CookieJar’与request。
request.utils从这里导入,所以要注意导入。
exceptions.py
这个模块包含了一组Requests的异常。
help.py
包含错误报告助手的模块
hooks.py
这个模块提供了请求钩子系统的功能。
models.py
该模块包含驱动请求的主要对象。
packages.py
# This code exists for backwards compatibility reasons.
# I don't like it either. Just look the other way. :)
# Kinda cool, though, right?
作者是这么说的,感觉上是一个查错性质的程序
status_codes.py
"codes "对象定义了从HTTP状态的通用名称到其数字代码的映射,可以作为属性或字典项访问。
有些codes有多个名称,名称的大小写版本都是允许的。例如,“codes.ok’ ‘,’ ‘codes.OK’ ',
’ ‘codes.okay’ '所有这些都对应于HTTP状态码200。
_codes = {
# Informational.
100: ("continue",),
101: ("switching_protocols",),
102: ("processing",),
103: ("checkpoint",),
122: ("uri_too_long", "request_uri_too_long"),
200: ("ok", "okay", "all_ok", "all_okay", "all_good", "\\o/", "✓"),
201: ("created",),
202: ("accepted",),
203: ("non_authoritative_info", "non_authoritative_information"),
204: ("no_content",),
205: ("reset_content", "reset"),
206: ("partial_content", "partial"),
207: ("multi_status", "multiple_status", "multi_stati", "multiple_stati"),
208: ("already_reported",),
226: ("im_used",),
# Redirection.
300: ("multiple_choices",),
301: ("moved_permanently", "moved", "\\o-"),
302: ("found",),
303: ("see_other", "other"),
304: ("not_modified",),
305: ("use_proxy",),
306: ("switch_proxy",),
307: ("temporary_redirect", "temporary_moved", "temporary"),
308: (
"permanent_redirect",
"resume_incomplete",
"resume",
), # "resume" and "resume_incomplete" to be removed in 3.0
# Client Error.
400: ("bad_request", "bad"),
401: ("unauthorized",),
402: ("payment_required", "payment"),
403: ("forbidden",),
404: ("not_found", "-o-"),
405: ("method_not_allowed", "not_allowed"),
406: ("not_acceptable",),
407: ("proxy_authentication_required", "proxy_auth", "proxy_authentication"),
408: ("request_timeout", "timeout"),
409: ("conflict",),
410: ("gone",),
411: ("length_required",),
412: ("precondition_failed", "precondition"),
413: ("request_entity_too_large",),
414: ("request_uri_too_large",),
415: ("unsupported_media_type", "unsupported_media", "media_type"),
416: (
"requested_range_not_satisfiable",
"requested_range",
"range_not_satisfiable",
),
417: ("expectation_failed",),
418: ("im_a_teapot", "teapot", "i_am_a_teapot"),
421: ("misdirected_request",),
422: ("unprocessable_entity", "unprocessable"),
423: ("locked",),
424: ("failed_dependency", "dependency"),
425: ("unordered_collection", "unordered"),
426: ("upgrade_required", "upgrade"),
428: ("precondition_required", "precondition"),
429: ("too_many_requests", "too_many"),
431: ("header_fields_too_large", "fields_too_large"),
444: ("no_response", "none"),
449: ("retry_with", "retry"),
450: ("blocked_by_windows_parental_controls", "parental_controls"),
451: ("unavailable_for_legal_reasons", "legal_reasons"),
499: ("client_closed_request",),
# Server Error.
500: ("internal_server_error", "server_error", "/o", "✗"),
501: ("not_implemented",),
502: ("bad_gateway",),
503: ("service_unavailable", "unavailable"),
504: ("gateway_timeout",),
505: ("http_version_not_supported", "http_version"),
506: ("variant_also_negotiates",),
507: ("insufficient_storage",),
509: ("bandwidth_limit_exceeded", "bandwidth"),
510: ("not_extended",),
511: ("network_authentication_required", "network_auth", "network_authentication"),
}
codes = LookupDict(name="status_codes")∅∅
structures.py
驱动请求的数据结构。
utils.py
此模块提供在请求中使用的实用程序函数,这些函数对外部引用也很有用。
小结
此次阅读,阅读了Requests项目中requests文件夹中的所有程序,即使很多代码读不明白,也能感觉到其简介而直白,收获不少,其一是了解了项目中的参数传递的方式,二是学习到了request简洁的api端口,全归中到request方法,request方法连结到Sessions程序,再进行处理,使得api文件及其精简,也学会了一层层剖析源代码项目,其三是学会了写README文件,不再像之前,都是胡乱的写。