总纲
1、会话对象session
会话对象让你能够跨请求保持某些参数。它也会在同一个 Session 实例发出的所有请求之间保持 cookie(状态保持)
s = requests.Session() s.get('http://httpbin.org/cookies/set/sessioncookie/123456789') r = s.get("http://httpbin.org/cookies")
会话还可以用作前后文管理器:
with requests.Session() as s: s.get('http://httpbin.org/cookies/set/sessioncookie/123456789')
Session类源码(requests/session.py)
class Session(SessionRedirectMixin): """A Requests session. Provides cookie persistence, connection-pooling, and configuration. Basic Usage:: >>> import requests >>> s = requests.Session() >>> s.get('https://httpbin.org/get') <Response [200]> Or as a context manager:: >>> with requests.Session() as s: ... s.get('https://httpbin.org/get') <Response [200]> """ __attrs__ = [ 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'adapters', 'stream', 'trust_env', 'max_redirects', ] def __init__(self): #: A case-insensitive dictionary of headers to be sent on each #: :class:`Request <Request>` sent from this #: :class:`Session <Session>`. self.headers = default_headers() #: Default Authentication tuple or object to attach to #: :class:`Request <Request>`. self.auth = None #: Dictionary mapping protocol or protocol and host to the URL of the proxy #: (e.g. {'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}) to #: be used on each :class:`Request <Request>`. self.proxies = {} #: Event-handling hooks. self.hooks = default_hooks() #: Dictionary of querystring data to attach to each #: :class:`Request <Request>`. The dictionary values may be lists for #: representing multivalued query parameters. self.params = {} #: Stream response content default. self.stream = False #: SSL Verification default. #: Defaults to `True`, requiring requests to verify the TLS certificate at the #: remote end. #: If verify is set to `False`, requests will accept any TLS certificate #: presented by the server, and will ignore hostname mismatches and/or #: expired certificates, which will make your application vulnerable to #: man-in-the-middle (MitM) attacks. #: Only set this to `False` for testing. self.verify = True #: SSL client certificate default, if String, path to ssl client #: cert file (.pem). If Tuple, ('cert', 'key') pair. self.cert = None #: Maximum number of redirects allowed. If the request exceeds this #: limit, a :class:`TooManyRedirects` exception is raised. #: This defaults to requests.models.DEFAULT_REDIRECT_LIMIT, which is #: 30. self.max_redirects = DEFAULT_REDIRECT_LIMIT #: Trust environment settings for proxy configuration, default #: authentication and similar. self.trust_env = True #: A CookieJar containing all currently outstanding cookies set on this #: session. By default it is a #: :class:`RequestsCookieJar <requests.cookies.RequestsCookieJar>`, but #: may be any other ``cookielib.CookieJar`` compatible object. self.cookies = cookiejar_from_dict({}) # Default connection adapters. self.adapters = OrderedDict() self.mount('https://', HTTPAdapter()) self.mount('http://', HTTPAdapter()) def __enter__(self): return self def __exit__(self, *args): self.close() def prepare_request(self, request): """Constructs a :class:`PreparedRequest <PreparedRequest>` for transmission and returns it. The :class:`PreparedRequest` has settings merged from the :class:`Request <Request>` instance and those of the :class:`Session`. :param request: :class:`Request` instance to prepare with this session's settings. :rtype: requests.PreparedRequest """ cookies = request.cookies or {} # Bootstrap CookieJar. if not isinstance(cookies, cookielib.CookieJar): cookies = cookiejar_from_dict(cookies) # Merge with session cookies merged_cookies = merge_cookies( merge_cookies(RequestsCookieJar(), self.cookies), cookies) # Set environment's basic authentication if not explicitly set. auth = request.auth if self.trust_env and not auth and not self.auth: auth = get_netrc_auth(request.url) p = PreparedRequest() p.prepare( method=request.method.upper(), url=request.url, files=request.files, data=request.data, json=request.json, headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict), params=merge_setting(request.params, self.params), auth=merge_setting(auth, self.auth), cookies=merged_cookies, hooks=merge_hooks(request.hooks, self.hooks), ) return p def request(self, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, json=None): """Constructs a :class:`Request <Request>`, prepares it and sends it. Returns :class:`Response <Response>` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of ``'filename': file-like-objects`` for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) <timeouts>` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Set to True by default. :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol or protocol and hostname to the URL of the proxy. :param stream: (optional) whether to immediately download the response content. Defaults to ``False``. :param verify: (optional) Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use. Defaults to ``True``. When set to ``False``, requests will accept any TLS certificate presented by the server, and will ignore hostname mismatches and/or expired certificates, which will make your application vulnerable to man-in-the-middle (MitM) attacks. Setting verify to ``False`` may be useful during local development or testing. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. :rtype: requests.Response """ # Create the Request. req = Request( method=method.upper(), url=url, headers=headers, files=files, data=data or {}, json=json, params=params or {}, auth=auth, cookies=cookies, hooks=hooks, ) prep = self.prepare_request(req) proxies = proxies or {} settings = self.merge_environment_settings( prep.url, proxies, stream, verify, cert ) # Send the request. send_kwargs = { 'timeout': timeout, 'allow_redirects': allow_redirects, } send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) return resp def get(self, url, **kwargs): r"""Sends a GET request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) return self.request('GET', url, **kwargs) def options(self, url, **kwargs): r"""Sends a OPTIONS request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) return self.request('OPTIONS', url, **kwargs) def head(self, url, **kwargs): r"""Sends a HEAD request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ kwargs.setdefault('allow_redirects', False) return self.request('HEAD', url, **kwargs) def post(self, url, data=None, json=None, **kwargs): r"""Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ return self.request('POST', url, data=data, json=json, **kwargs) def put(self, url, data=None, **kwargs): r"""Sends a PUT request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ return self.request('PUT', url, data=data, **kwargs) def patch(self, url, data=None, **kwargs): r"""Sends a PATCH request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ return self.request('PATCH', url, data=data, **kwargs) def delete(self, url, **kwargs): r"""Sends a DELETE request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. :rtype: requests.Response """ return self.request('DELETE', url, **kwargs) def send(self, request, **kwargs): """Send a given PreparedRequest. :rtype: requests.Response """ # Set defaults that the hooks can utilize to ensure they always have # the correct parameters to reproduce the previous request. kwargs.setdefault('stream', self.stream) kwargs.setdefault('verify', self.verify) kwargs.setdefault('cert', self.cert) if 'proxies' not in kwargs: kwargs['proxies'] = resolve_proxies( request, self.proxies, self.trust_env ) # It's possible that users might accidentally send a Request object. # Guard against that specific failure case. if isinstance(request, Request): raise ValueError('You can only send PreparedRequests.') # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') hooks = request.hooks # Get the appropriate adapter to use adapter = self.get_adapter(url=request.url) # Start time (approximately) of the request start = preferred_clock() # Send the request r = adapter.send(request, **kwargs) # Total elapsed time of the request (approximately) elapsed = preferred_clock() - start r.elapsed = timedelta(seconds=elapsed) # Response manipulation hooks r = dispatch_hook('response', hooks, r, **kwargs) # Persist cookies if r.history: # If the hooks create history then we want those cookies too for resp in r.history: extract_cookies_to_jar(self.cookies, resp.request, resp.raw) extract_cookies_to_jar(self.cookies, request, r.raw) # Resolve redirects if allowed. if allow_redirects: # Redirect resolving generator. gen = self.resolve_redirects(r, request, **kwargs) history = [resp for resp in gen] else: history = [] # Shuffle things around if there's history. if history: # Insert the first (original) request at the start history.insert(0, r) # Get the last request made r = history.pop() r.history = history # If redirects aren't being followed, store the response on the Request for Response.next(). if not allow_redirects: try: r._next = next(self.resolve_redirects(r, request, yield_requests=True, **kwargs)) except StopIteration: pass if not stream: r.content return r def merge_environment_settings(self, url, proxies, stream, verify, cert): """ Check the environment and merge it with some settings. :rtype: dict """ # Gather clues from the surrounding environment. if self.trust_env: # Set environment's proxies. no_proxy = proxies.get('no_proxy') if proxies is not None else None env_proxies = get_environ_proxies(url, no_proxy=no_proxy) for (k, v) in env_proxies.items(): proxies.setdefault(k, v) # Look for requests environment configuration and be compatible # with cURL. if verify is True or verify is None: verify = (os.environ.get('REQUESTS_CA_BUNDLE') or os.environ.get('CURL_CA_BUNDLE')) # Merge all the kwargs. proxies = merge_setting(proxies, self.proxies) stream = merge_setting(stream, self.stream) verify = merge_setting(verify, self.verify) cert = merge_setting(cert, self.cert) return {'verify': verify, 'proxies': proxies, 'stream': stream, 'cert': cert} def get_adapter(self, url): """ Returns the appropriate connection adapter for the given URL. :rtype: requests.adapters.BaseAdapter """ for (prefix, adapter) in self.adapters.items(): if url.lower().startswith(prefix.lower()): return adapter # Nothing matches :-/ raise InvalidSchema("No connection adapters were found for {!r}".format(url)) def close(self): """Closes all adapters and as such the session""" for v in self.adapters.values(): v.close() def mount(self, prefix, adapter): """Registers a connection adapter to a prefix. Adapters are sorted in descending order by prefix length. """ self.adapters[prefix] = adapter keys_to_move = [k for k in self.adapters if len(k) < len(prefix)] for key in keys_to_move: self.adapters[key] = self.adapters.pop(key) def __getstate__(self): state = {attr: getattr(self, attr, None) for attr in self.__attrs__} return state def __setstate__(self, state): for attr, value in state.items(): setattr(self, attr, value)
2、请求和响应对象
请求对象
>>> r.request.headers {'Accept-Encoding': 'identity, deflate, compress, gzip', 'Accept': '*/*', 'User-Agent': 'python-requests/0.13.1'}
响应对象
>>> r.headers {'content-length': '56170', 'x-content-type-options': 'nosniff', 'x-cache': 'HIT from cp1006.eqiad.wmnet, MISS from cp1010.eqiad.wmnet', 'content-encoding': 'gzip', 'age': '3080', 'content-language': 'en', 'vary': 'Accept-Encoding,Cookie', 'server': 'Apache', 'last-modified': 'Wed, 13 Jun 2012 01:33:50 GMT', 'connection': 'close', 'cache-control': 'private, s-maxage=0, max-age=0, must-revalidate', 'date': 'Thu, 14 Jun 2012 12:59:39 GMT', 'content-type': 'text/html; charset=UTF-8', 'x-cache-lookup': 'HIT from cp1006.eqiad.wmnet:3128, MISS from cp1010.eqiad.wmnet:80'}
3、准备的请求(prepare request)
from requests import Request, Session s = Session() req = Request('GET', url, data=data headers=headers ) prepped = s.prepare_request(req) # do something with prepped.body # do something with prepped.headers resp = s.send(prepped, stream=stream, verify=verify, proxies=proxies, cert=cert, timeout=timeout ) print(resp.status_code)
4、请求 -- SSL证书认证
Requests 可以为 HTTPS 请求验证 SSL 证书,就像 web 浏览器一样。SSL 验证默认是开启的,如果证书验证失败,Requests 会抛出 SSLError:
禁用SSL证书
>>> requests.get('http://github.com', verify=False) <Response [200]>
你可以为
verify
传入 CA_BUNDLE 文件的路径,或者包含可信任 CA 证书文件的文件夹路径>>> requests.get('https://github.com', verify='/path/to/certfile')
5、请求 -- 设置客户端证书。
你也可以指定一个本地证书用作客户端证书,可以是单个文件(包含密钥和证书)或一个包含两个文件路径的元组:
requests.get( 'https://kennethreitz.org', cert=('/path/client.cert', '/path/client.key')) < Response[200] >
会话对象中的:
s = requests.Session() s.cert = '/path/client.cert'
注意:
本地证书的私有 key 必须是解密状态。目前,Requests 不支持使用加密的 key。
6、CA证书
Requests 默认附带了一套它信任的根证书,来自于 Mozilla trust store。然而它们在每次 Requests 更新时才会更新。这意味着如果你固定使用某一版本的 Requests,你的证书有可能已经 太旧了。
从 Requests 2.4.0 版之后,如果系统中装了 certifi 包,Requests 会试图使用它里边的 证书。这样用户就可以在不修改代码的情况下更新他们的可信任证书。
为了安全起见,我们建议你经常更新 certifi!
7、响应 -- 响应体内容工作流 stream参数
默认情况下,当你进行网络请求后,响应体会立即被下载。你可以通过
stream
参数覆盖这个行为,推迟下载响应体直到访问 Response.content 属性:tarball_url = 'https://github.com/kennethreitz/requests/tarball/master' r = requests.get(tarball_url, stream=True) if int(r.headers['content-length']) < TOO_LONG: content = r.content
你可以进一步使用 Response.iter_content 和 Response.iter_lines 方法来控制工作流,或者以 Response.raw 从底层 urllib3 的
urllib3.HTTPResponse <urllib3.response.HTTPResponse
读取未解码的响应体。如果你在请求中把
stream
设为True
,Requests 无法将连接释放回连接池,除非你 消耗了所有的数据,或者调用了 Response.close。 这样会带来连接效率低下的问题。如果你发现你在使用stream=True
的同时还在部分读取请求的 body(或者完全没有读取 body),那么你就应该考虑使用 with 语句发送请求,这样可以保证请求一定会被关闭:with requests.get('http://httpbin.org/get', stream=True) as r: # 在此处理响应。
8、响应 -- 保持活动状态
注意:只有所有的响应体数据被读取完毕连接才会被释放为连接池;所以确保将
stream
设置为False
或读取Response
对象的content
属性。
9、请求 -- 流式上传文件
Requests支持流式上传,这允许你发送大的数据流或文件而无需先把它们读入内存。要使用流式上传,仅需为你的请求体提供一个类文件对象即可:
with open('massive-body') as f: requests.post('http://some.url/streamed', data=f)
注意:
我们强烈建议你用二进制模式(binary mode)打开文件。这是因为 requests 可能会为你提供 header 中的
Content-Length
,在这种情况下该值会被设为文件的字节数。如果你用文本模式打开文件,就可能碰到错误。
10、请求 -- 块编码请求
def gen():
yield 'hi'
yield 'there'
requests.post('http://some.url/chunked', data=gen())
11、请求 -- POST 多个分块编码的文件
你可以在一个请求中发送多个文件。例如,假设你要上传多个图像文件到一个 HTML 表单,使用一个多文件 field 叫做 "images":
>>> url = 'http://httpbin.org/post' >>> multiple_files = [ ('images', ('foo.png', open('foo.png', 'rb'), 'image/png')), ('images', ('bar.png', open('bar.png', 'rb'), 'image/png'))] >>> r = requests.post(url, files=multiple_files)
注意:
我们强烈建议你用二进制模式(binary mode)打开文件
12、请求 --- 事件挂钩HOOKS
Requests有一个钩子系统,你可以用来操控部分请求过程,或信号事件处理。
>>> requests.get('http://httpbin.org', hooks=dict(response=print_url)) http://httpbin.org <Response [200]>
13、请求 -- 身份认证
1、基本身份认证
许多要求身份认证的web服务都接受 HTTP Basic Auth。这是最简单的一种身份认证,并且 Requests 对这种认证方式的支持是直接开箱即可用
>>> from requests.auth import HTTPBasicAuth >>> requests.get('https://api.github.com/user', auth=HTTPBasicAuth('user', 'pass')) <Response [200]>
2、摘要式身份认证
HTTP 身份认证形式是摘要式身份认证,Requests 对它的支持也是开箱即可用的:
>>> from requests.auth import HTTPDigestAuth >>> url = 'http://httpbin.org/digest-auth/auth/user/pass' >>> requests.get(url, auth=HTTPDigestAuth('user', 'pass')) <Response [200]>
3、OAuth 1 认证
>>> import requests >>> from requests_oauthlib import OAuth1 >>> url = 'https://api.twitter.com/1.1/account/verify_credentials.json' >>> auth = OAuth1('YOUR_APP_KEY', 'YOUR_APP_SECRET', ... 'USER_OAUTH_TOKEN', 'USER_OAUTH_TOKEN_SECRET') >>> requests.get(url, auth=auth)
4、OAuth 2 与 OpenID 连接认证
5、其他身份认证形式
6、新的身份认证形式
14、请求 -- 流式请求
import json import requests r = requests.get('http://httpbin.org/stream/20', stream=True) for line in r.iter_lines(): # filter out keep-alive new lines if line: decoded_line = line.decode('utf-8') print(json.loads(decoded_line))
r = requests.get('http://httpbin.org/stream/20', stream=True) if r.encoding is None: r.encoding = 'utf-8' for line in r.iter_lines(decode_unicode=True): if line: print(json.loads(line))
15、请求 -- 设置代理
import requests proxies = { "http": "http://10.10.1.10:3128", "https": "http://10.10.1.10:1080", } requests.get("http://example.org", proxies=proxies)