#看过源码的感觉就是,urllib2重构了很多层代码。。很多我们用不到。。。
#
#
在微博上闲逛,然后看到知道余弦大神说“知道创于研发技能表v3.0”马上就要面世,所以去官网找了找,没找到。。
-
Python
-
urllib2
-
打开请求响应调试
-
编辑urllib2的do_open里的h.set_debuglevel
-
改为h.set_debuglevel(1),这时可以清晰看到请求响应数据,包括https
-
-
-
我觉得吧,直接改源码是办法,但直接改函数内部代码也不是办法,所以就看了一下urllib2源码
其实可以考虑把AbstractHTTPHandler.__init__(self, debuglevel=0)默认值为1。 原因是防坑。。。
如果urllib2引入后没有使用过,会创建全局变量_opener
global _opener
如果存在_opener,则用_opener.open(url, data, timeout)
_opener 可以由build_opener()构建,返回类
OpenerDirector的实例.
default_classes默认有
[ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, FTPHandler, FileHandler, HTTPErrorProcessor]几个类
如果
hasattr(httplib, 'HTTPS')则追加
HTTPSHandler类至
default_classes
HTTPHandler和
HTTPSHandler继承自
AbstractHTTPHandler,可以设定调试级别
将
default_classes
所有的Handlers,依次实例,调用
OpenerDirector.add_handler(handler)
添加到opener中。
OpenerDirector.add_handler(handler)
dir()获取
handler所有的属性
,忽略
["redirect_request", "do_open", "proxy_open"]
根据_下划线分割为(
请求类型 /
操作) Ex: http_error, http_request, http_response, http_open
依次添加到
OpenerDirector.handle_open(字典格式,key值有http, https, ftp, file等,value是一个列表,存放多个handler)
疑问,不知道为什么要用bisect.insort添加至列表。作用仅仅是排序....
urllib2.urlopen就是调用_opener.
open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT)
创建
Request(fullurl, data)对象 (fullurl可以是url,或者是Request实例)
Request.get_type() 是根据url返回http/https等请求类型,赋值给
protocol
# 处理request,获取response
获取
OpenerDirector.handle_open
中http的类列表(handlers),依次执行
handler.http_request(request)获取
request.(其实列表里面只有一个handler)
OpenerDirector._open(req, data=None)
OpenerDirector._call_chain(chain, kind, meth_name, *args) chain就是指
OpenerDirector.handle_open字典,
kind指请求类型:
http/https等,
method就是要执行的操作
尝试获取
OpenerDirector._call_chain(...): OpenerDirector.handle_open.get("default", []),循环执行handles依次执行函数
default_open,
并返回
response
再尝试
OpenerDirector._call_chain(...): request.get_type()本身的请求类型,函数名为:"
http_open"
。
返回
response
最后尝试
OpenerDirector._call_chain(...):
请求类型为
"unknown",函数名为"
unknown_open"。返回
response
# 处理response
获取
OpenerDirector.handle_open中
http的类列表(handlers),依次执行
handler.http_response(request)
获取response
.(其实列表里面只有一个handler)
End: open(). return response
"""
>>>
import urllib2 as url
>>> url
>>> dir(url)
['AbstractBasicAuthHandler', 'AbstractDigestAuthHandler', 'AbstractHTTPHandler', 'BaseHandler', 'CacheFTPHandler', 'FTPHandler', 'FileHandler', 'HTTPBasicAuthHandler', 'HTTPCookieProcessor', 'HTTPDefaultErrorHandler', 'HTTPDigestAuthHandler', 'HTTPError', 'HTTPErrorProcessor', 'HTTPHandler', 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', 'HTTPRedirectHandler', 'HTTPSHandler', 'OpenerDirector', 'ProxyBasicAuthHandler', 'ProxyDigestAuthHandler', 'ProxyHandler', 'Request', 'StringIO', 'URLError', 'UnknownHandler', '__builtins__', '__doc__', '__file__', '__name__', '__package__', '__version__', '_cut_port_re', '_opener', '_parse_proxy', '_safe_gethostbyname', 'addinfourl', 'base64', 'bisect', 'build_opener', 'ftpwrapper', 'getproxies', 'hashlib', 'httplib', 'install_opener', 'localhost', 'mimetools', 'os', 'parse_http_list', 'parse_keqv_list', 'posixpath', 'proxy_bypass', 'quote', 'random', 'randombytes', 're', 'request_host', 'socket', 'splitattr', 'splithost', 'splitpasswd', 'splitport', 'splittag', 'splittype', 'splituser', 'splitvalue', 'sys', 'time', 'unquote', 'unwrap', 'url2pathname', 'urlopen', 'urlparse']
>>> url._opener
>>> url.build_opener
>>>
opener = url.build_opener()
>>> opener
>>> dir(opener)
['__doc__', '__init__', '__module__', '_call_chain', '_open', 'add_handler', 'addheaders', 'close', 'error', 'handle_error', 'handle_open', 'handlers', 'open', 'process_request', 'process_response']
>>> opener.handle_open
{'unknown': [], 'http': [], 'https': [], 'file': [], 'ftp': []}
>>> opener.handle_open['http']
[]
>>> dir(opener.handle_open['http'][0])
['__doc__', '__init__', '__lt__', '__module__', '_debuglevel', 'add_parent', 'close', 'do_open', 'do_request_', 'handler_order', 'http_open', 'http_request', 'parent', 'set_http_debuglevel']
>>>
>>> opener.handle_open['http'][0].set_http_debuglevel
>
>>> opener.handle_open['https'][0].set_http_debuglevel
>
>>>
opener.handle_open['https'][0].set_http_debuglevel(1)
>>>
opener.handle_open['http'][0].set_http_debuglevel(1)
>>>
>>>
# >>> HTTPHandler 一定会存在,HTTPSHandler在httplib支持https时才会创建. 都继承自:AbstractHTTPHandler
# >>> 关键代码是 HTTPHandler.do_open
# >>> httplib.HTTPConnection.set_debuglevel(level)
# >>> httplib.HTTPConnection.request(method, url, body=None, headers={})
# >>> httplib.HTTPConnection._send_request(method, url, body, headers)
# >>> httplib.HTTPConnection.endheaders(message_body=None)
# >>> httplib.HTTPConnection._send_output(message_body=None)
# >>> httplib.HTTPConnection.send(data) # 其实就是body
# >>> httplib.HTTPConnection.connect() # 创建socket
# >>> socket.create_connection(address, timeout= , source_address=None)
# >>> self.sock = socket.create_connection((self.host,self.port), self.timeout, self.source_address)
# >>> print send body # print 发送的数据包
# >>> self.sock.sendall(data)
# >>> httplib.HTTPConnection.getresponse()
# >>> HTTPResponse(sock, debuglevel=0, strict=0, method=None, buffering=False)
# >>> response = self.response_class(*args, **kwds) response_class就是: HTTPResponse
# >>> HTTPResponse.begin()
# >>> HTTPResponse._read_status()
# >>> print "reply:", repr(line) # print 接受到的数据 'HTTP/1.1 200 OK\r\n'
# >>> return (version, status, reason) # Ex: (HTTP/1.0, 200, OK)
# >>> 如果status == 101:
# >>> While True:
# >>> print "header:", skip # print 接收到的头信息
# >>> msg = HTTPMessage(self.fp, 0)
# >>> for hdr in HTTPMessage.headers:
# >>> print "header:", hdr,
# >>> HTTPMessage._check_close() #检查头信息:connection是否关闭
# >>> httplib.HTTPConnection.close()
# >>> self.sock.close()
# >>> HTTPResponse.close()
# >>> opener.handle_open['file'][0].set_http_debuglevel
# Traceback (most recent call last):
# File "", line 1, in
# AttributeError: FileHandler instance has no attribute 'set_http_debuglevel'
# >>> opener.handle_open['ftp'][0].set_http_debuglevel
# Traceback (most recent call last):
# File "", line 1, in
# AttributeError: FTPHandler instance has no attribute 'set_http_debuglevel'
# >>> opener.handle_open['ftp'][0].set_http_debuglevel(1)
# Traceback (most recent call last):
# File "", line 1, in
# AttributeError: FTPHandler instance has no attribute 'set_http_debuglevel'
>>>
url.install_opener(opener)
>>>
url.urlopen("http://www.baidu.com")
send: 'GET / HTTP/1.1\r\nAccept-Encoding: identity\r\nHost: www.baidu.com\r\nConnection: close\r\nUser-Agent: Python-urllib/2.7\r\n\r\n'
reply: 'HTTP/1.1 200 OK\r\n'
header: Date: Wed, 12 Aug 2015 05:40:47 GMT
header: Content-Type: text/html; charset=utf-8
header: Transfer-Encoding: chunked
header: Connection: Close
header: Vary: Accept-Encoding
header: Set-Cookie: BAIDUID=8E25E0CC918EE71717BE5AA3D3472F62:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
header: Set-Cookie: BIDUPSID=8E25E0CC918EE71717BE5AA3D3472F62; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
header: Set-Cookie: PSTM=1439358047; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
header: Set-Cookie: BDSVRTM=0; path=/
header: Set-Cookie: BD_HOME=0; path=/
header: Set-Cookie: H_PS_PSSID=16229_16415_1431_13520_12825_14429_12868_16520_16799_16331_16662_16427_16514_15243_11854_13932_16720; path=/; domain=.baidu.com
header: P3P: CP=" OTI DSP COR IVA OUR IND COM "
header: Cache-Control: private
header: Cxy_all: baidu+ac7f221e1b28f124d4a8cbfc52852314
header: Expires: Wed, 12 Aug 2015 05:40:46 GMT
header: X-Powered-By: HPHP
header: Server: BWS/1.1
header: X-UA-Compatible: IE=Edge,chrome=1
header: BDPAGETYPE: 1
header: BDQID: 0xdc7ecbce0008572d
header: BDUSERID: 0
>
>>>