import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) '
'AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/63.0.3239.132 Safari/537.36',
}
url = "http://www.baidu.com/"
response = requests.get(url, headers = headers)
print(response)
这段代码会报出以下错误:
Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 594, in urlopen
self._prepare_proxy(conn)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 805, in _prepare_proxy
conn.connect()
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connection.py", line 344, in connect
ssl_context=context)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\util\ssl_.py", line 344, in ssl_wrap_socket
return context.wrap_socket(sock, server_hostname=server_hostname)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\ssl.py", line 412, in wrap_socket
session=session
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\ssl.py", line 853, in _create
self.do_handshake()
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\ssl.py", line 1117, in do_handshake
self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1056)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\adapters.py", line 449, in send
timeout=timeout
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 638, in urlopen
_stacktrace=sys.exc_info()[2])
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\util\retry.py", line 398, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='www.baidu.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1056)')))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Administrator/PycharmProjects/newWork/pachong_shizahn/test.py", line 12, in <module>
response = requests.get(url, headers = headers)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 668, in send
history = [resp for resp in gen] if allow_redirects else []
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 668, in <listcomp>
history = [resp for resp in gen] if allow_redirects else []
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 247, in resolve_redirects
**adapter_kwargs
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\adapters.py", line 514, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: HTTPSConnectionPool(host='www.baidu.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1056)')))
调试:在控制台运行python。
1.当使用http协议获取请求时
requests.get('http://www.baidu,com/')
输出:<Response [502]> 没有问题
2.当使用https协议获取请求时
requests.get('https://www.baidu,com/')
会报出和上面代码一样的错误,但是代码中明明是用的是http协议开头,说明了伪装的UA头会将Url重新转换成https,故去掉UA伪装或者换其他的UA伪装可以解决问题。
问题2:
通过r.encoding或者r.apparent_encoding(也就是网页源代码的解码方式)获取的编码都是一样的,但是还是会报编码错误,这种情况可能就要考虑下IDE的字符编码设置问题了:
解决方法:以PyCharm为例按如下路径:file→settings→Editor→File Encodings→Project Encoding进行设置,
成功解决问题。