Requests 是一个非常简洁优雅的python HTTP库,老版本基于urllib和urllib2,由大神 Kenneth Reitz 开发,是广为推崇的Pythonic 代码。
本文基于v0.2.0版本进行分析,v0.2.0版比较简单,核心代码400多行。KR只是简单的对urllib和urllib2进行封装。
README
简单的说明了KR造轮子的原因:Most existing Python modules for dealing HTTP requests are insane.
。用法看看测试代码就会了,我把谷歌的网址替换成了百度。
# -*- coding: utf-8 -*-
import unittest
import requests
class RequestsTestSuite(unittest.TestCase):
"""Requests test cases."""
def setUp(self):
pass
def tearDown(self):
"""Teardown."""
pass
def test_invalid_url(self):
self.assertRaises(ValueError, requests.get, 'hiwpefhipowhefopw')
def test_HTTP_200_OK_GET(self):
r = requests.get('http://baidu.com')
self.assertEqual(r.status_code, 200)
def test_HTTPS_200_OK_GET(self):
r = requests.get('https://baidu.com')
self.assertEqual(r.status_code, 200)
def test_HTTP_200_OK_HEAD(self):
r = requests.head('http://baidu.com')
self.assertEqual(r.status_code, 200)
def test_HTTPS_200_OK_HEAD(self):
r = requests.head('https://baidu.com')
self.assertEqual(r.status_code, 200)
# def test_AUTH_HTTPS_200_OK_GET(self):
# auth = requests.AuthObject('requeststest', 'requeststest')
# url = 'https://convore.com/api/account/verify.json'
# r = requests.get(url, auth=auth)
# self.assertEqual(r.status_code, 200)
if __name__ == '__main__':
unittest.main()
代码结构
代码在core.py
文件中,_Request
是对urllib2.Request的一个简单的包裹,整个代码的核心就是Request
这个类。
import urllib
import urllib2
# 系统变量: __变量名__
__title__ = 'requests'
__version__ = '0.2.0'
__build__ = 0x000200
__author__ = 'Kenneth Reitz'
__license__ = 'ISC'
__copyright__ = 'Copyright 2011 Kenneth Reitz'
AUTOAUTHS = []
class _Request(urllib2.Request):
"""Hidden wrapper around the urllib2.Request object. Allows for manual
setting of HTTP methods.
"""
def __init__(self,
url,
data=None,
headers={},
origin_req_host=None,
unverifiable=False,
method=None):
urllib2.Request.__init__(self, url, data, headers, origin_req_host, unverifiable)
self.method = method
def get_method(self):
if self.method:
return self.method
return urllib2.Request.get_method(self)
class Request(object):
class Response(object):
class AuthObject(object):
def get(url, params={}, headers={}, auth=None):
def head(url, params={}, headers={}, auth=None):
def post(url, data={}, headers={}, auth=None):
def put(url, data='', headers={}, auth=None):
def delete(url, params={}, headers={}, auth=None):
def add_autoauth(url, authobject):
def _detect_auth(url, auth):
def _get_autoauth(url):
# 自定义异常
class RequestException(Exception):
"""There was an ambiguous exception that occured while handling your request."""
class AuthenticationError(RequestException):
"""The authentication credentials provided were invalid."""
class URLRequired(RequestException):
"""A valid URL is required to make a request."""
class InvalidMethod(RequestException):
"""An inappropriate method was attempted."""
Request
python 会调用 _init_ 方法对类实例进行初始化操作,这里感觉可以把 url,headers这些属性暴露出来,_init_(url=None,headers=dict(),…)。
__setattr__方法会在对一个属性设置值的时候,被调用到,这里重写,对 method 的取值进行了限制。
class Request(object):
_METHODS = ('GET', 'HEAD', 'PUT', 'POST', 'DELETE')
def __init__(self):
self.url = None
self.headers = dict()
self.method = None
self.params = {}
self.data = {}
self.response = Response()
self.auth = None
self.sent = False
def __repr__(self):
# 更友好的显示
try:
repr = '<Request [%s]>' % (self.method)
except:
repr = '<Request object>'
return repr
def __setattr__(self, name, value):
# 重写__setattr__魔术方法,对 method 取值进行限制
if (name == 'method') and (value):
if not value in self._METHODS:
raise InvalidMethod()
object.__setattr__(self, name, value)
def _checks(self):
# 对url进行检查
if not self.url:
raise URLRequired
urllib2.urlopen()函数不支持验证、cookie或者其它HTTP高级功能。要支持这些功能,必须使用build_opener()函数创建自定义Opener对象
def _get_opener(self):
if self.auth:
# 创建密码管理器
authr = urllib2.HTTPPasswordMgrWithDefaultRealm()
# 添加用户名和密码
authr.add_password(None, self.url, self.auth.username, self.auth.password)
handler = urllib2.HTTPBasicAuthHandler(authr)
opener = urllib2.build_opener(handler)
# use the opener to fetch a URL
return opener.open
else:
return urllib2.urlopen
作者按照方法的不同,分别进行了处理,这里对 response 的赋值操作应该可以抽取出来。
def send(self, anyway=False):
# 这里传的是self
# 对URL进行了检查,不用处理异常,直接向上抛
self._checks()
success = False
# 按照方法不同分别处理
if self.method in ('GET', 'HEAD', 'DELETE'):
if (not self.sent) or anyway:
# url encode GET params if it's a dict
if isinstance(self.params, dict):
# 把key-value 解析成 id=1&name=xx
params = urllib.urlencode(self.params)
else:
params = self.params
req = _Request(("%s?%s" % (self.url, params)), method=self.method)
if self.headers:
req.headers = self.headers
opener = self._get_opener()
try:
resp = opener(req)
self.response.status_code = resp.code
self.response.headers = resp.info().dict
if self.method.lower() == 'get':
self.response.content = resp.read()
success = True
except urllib2.HTTPError as why:
self.response.status_code = why.code
elif self.method == 'PUT':
if (not self.sent) or anyway:
req = _Request(self.url, method='PUT')
if self.headers:
req.headers = self.headers
req.data = self.data
try:
opener = self._get_opener()
resp = opener(req)
self.response.status_code = resp.code
self.response.headers = resp.info().dict
self.response.content = resp.read()
success = True
except urllib2.HTTPError as why:
self.response.status_code = why.code
elif self.method == 'POST':
if (not self.sent) or anyway:
req = _Request(self.url, method='POST')
if self.headers:
req.headers = self.headers
# url encode form data if it's a dict
if isinstance(self.data, dict):
req.data = urllib.urlencode(self.data)
else:
req.data = self.data
try:
opener = self._get_opener()
resp = opener(req)
self.response.status_code = resp.code
self.response.headers = resp.info().dict
self.response.content = resp.read()
success = True
except urllib2.HTTPError as why:
self.response.status_code = why.code
self.sent = True if success else False
return success
其他
class Response(object):
"""The :class:`Request` object. All :class:`Request` objects contain a
:class:`Request.response <response>` attribute, which is an instance of
this class.
"""
def __init__(self):
self.content = None
self.status_code = None
self.headers = dict()
def __repr__(self):
try:
repr = '<Response [%s]>' % (self.status_code)
except:
repr = '<Response object>'
return repr
class AuthObject(object):
"""The :class:`AuthObject` is a simple HTTP Authentication token. When
given to a Requests function, it enables Basic HTTP Authentication for that
Request. You can also enable Authorization for domain realms with AutoAuth.
See AutoAuth for more details.s
:param username: Username to authenticate with.
:param password: Password for given username.
"""
def __init__(self, username, password):
self.username = username
self.password = password
def get(url, params={}, headers={}, auth=None):
"""Sends a GET request. Returns :class:`Response` object.
:param url: URL for the new :class:`Request` object.
:param params: (optional) Dictionary of GET Parameters to send with the :class:`Request`.
:param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`.
:param auth: (optional) AuthObject to enable Basic HTTP Auth.
"""
r = Request()
r.method = 'GET'
r.url = url
r.params = params
r.headers = headers
r.auth = _detect_auth(url, auth)
r.send()
return r.response
def head(url, params={}, headers={}, auth=None):
"""Sends a HEAD request. Returns :class:`Response` object.
:param url: URL for the new :class:`Request` object.
:param params: (optional) Dictionary of GET Parameters to send with the :class:`Request`.
:param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`.
:param auth: (optional) AuthObject to enable Basic HTTP Auth.
"""
r = Request()
r.method = 'HEAD'
r.url = url
# return response object
r.params = params
r.headers = headers
r.auth = _detect_auth(url, auth)
r.send()
return r.response
def post(url, data={}, headers={}, auth=None):
"""Sends a POST request. Returns :class:`Response` object.
:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary of POST Data to send with the :class:`Request`.
:param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`.
:param auth: (optional) AuthObject to enable Basic HTTP Auth.
"""
r = Request()
r.url = url
r.method = 'POST'
r.data = data
r.headers = headers
r.auth = _detect_auth(url, auth)
r.send()
return r.response
def put(url, data='', headers={}, auth=None):
"""Sends a PUT request. Returns :class:`Response` object.
:param url: URL for the new :class:`Request` object.
:param data: (optional) Bytes of PUT Data to send with the :class:`Request`.
:param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`.
:param auth: (optional) AuthObject to enable Basic HTTP Auth.
"""
r = Request()
r.url = url
r.method = 'PUT'
r.data = data
r.headers = headers
r.auth = _detect_auth(url, auth)
r.send()
return r.response
def delete(url, params={}, headers={}, auth=None):
"""Sends a DELETE request. Returns :class:`Response` object.
:param url: URL for the new :class:`Request` object.
:param params: (optional) Dictionary of GET Parameters to send with the :class:`Request`.
:param headers: (optional) Dictionary of HTTP Headers to sent with the :class:`Request`.
:param auth: (optional) AuthObject to enable Basic HTTP Auth.
"""
r = Request()
r.url = url
r.method = 'DELETE'
# return response object
r.headers = headers
r.auth = _detect_auth(url, auth)
r.send()
return r.response
def add_autoauth(url, authobject):
"""Registers given AuthObject to given URL domain. for auto-activation.
Once a URL is registered with an AuthObject, the configured HTTP
Authentication will be used for all requests with URLS containing the given
URL string.
Example: ::
>>> c_auth = requests.AuthObject('kennethreitz', 'xxxxxxx')
>>> requests.add_autoauth('https://convore.com/api/', c_auth)
>>> r = requests.get('https://convore.com/api/account/verify.json')
# Automatically HTTP Authenticated! Wh00t!
:param url: Base URL for given AuthObject to auto-activate for.
:param authobject: AuthObject to auto-activate.
"""
global AUTOAUTHS
AUTOAUTHS.append((url, authobject))
def _detect_auth(url, auth):
"""Returns registered AuthObject for given url if available, defaulting to
given AuthObject."""
return _get_autoauth(url) if not auth else auth
def _get_autoauth(url):
"""Returns registered AuthObject for given url if available.
"""
for (autoauth_url, auth) in AUTOAUTHS:
if autoauth_url in url:
return auth
return None
# 自定义异常
class RequestException(Exception):
"""There was an ambiguous exception that occured while handling your request."""
class AuthenticationError(RequestException):
"""The authentication credentials provided were invalid."""
class URLRequired(RequestException):
"""A valid URL is required to make a request."""
class InvalidMethod(RequestException):
"""An inappropriate method was attempted."""