本次我们要分析的Scrapy源码为Request模块,模块的位置:
from scrapytest.http import Request
首先把Request的源码附上:
class Request(object_ref):
def __init__(self, url, callback=None, method='GET', headers=None, body=None,
cookies=None, meta=None, encoding='utf-8', priority=0,
dont_filter=False, errback=None):
self._encoding = encoding # this one has to be set first 设定了request使用的编码
self.method = str(method).upper() #GET POST等等
self._set_url(url) #调用URL处理函数,设置url属性
self._set_body(body) #设置Body属性
assert isinstance(priority, int), "Request priority not an integer: %r" % priority #检查priority是否int
self.priority = priority
assert callback or not errback, "Cannot use errback without a callback"
self.callback = callback
self.errback = errback
self.cookies = cookies or {}
self.headers = Headers(headers or {}, encoding=encoding)
self.dont_filter = dont_filter
self._meta = dict(meta) if meta else None
@property #将meta作为一个属性
def meta(self):
if self._meta is None:
self._meta = {}
return self._meta
def _get_url(self):
return self._url
def _set_url(self, url): #设置url,输入要字符型的
if not isinstance(url, six.string_types):
raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
s = safe_url_string(url, self.encoding) #Urlencode
self._url = escape_ajax(s) #处理!#
if ':' not in self._url: #要有http://或者https://
raise ValueError('Missing scheme in request url: %s' % self._url)
url = property(_get_url, obsolete_setter(_set_url, 'url')) #将url设置为一个只读属性,修改url要通过.replace实现
def _get_body(self):
return self._body
def _set_body(self, body):
if body is None:
self._body = b'' #b''在py2.7中与普通字符串型一致,为了兼容py3
else:
self._body = to_bytes(body, self.encoding) #将unicode型转为utf-8等等byte类型
body = property(_get_body, obsolete_setter(_set_body, 'body')) #body也是只读属性,修改要通过replace
@property
def encoding(self):
return self._encoding
def __str__(self):
return "<%s %s>" % (self.method, self.url)
__repr__ = __str__
def copy(self):
"""Return a copy of this Request"""
return self.replace()
def replace(self, *args, **kwargs): #创建一个新的Request对象,对于新给的kwargs,使用新的,没给的使用原有对象的
"""Create a new Request with the same attributes except for those
given new values.
"""
for x in ['url', 'method', 'headers', 'body', 'cookies', 'meta',
'encoding', 'priority', 'dont_filter', 'callback', 'errback']:
kwargs.setdefault(x, getattr(self, x))
cls = kwargs.pop('cls', self.__class__)
return cls(*args, **kwargs)
下面我们来逐个函数分析:
def __init__(self, url, callback=None, method='GET', headers=None, body=None,
cookie