python3.5.2,环境是pycharm
去掉了spider_main中的try-except后发现报错如下:Traceback (most recent call last):
File "S:/Python Learning/baike_spider/spider_main.py", line 43, in
obj_spider.craw(root_url) # 启动爬虫
File "S:/Python Learning/baike_spider/spider_main.py", line 26, in craw
html_cont = self.downloader.download(new_url)
File "S:\Python Learning\baike_spider\html_downloader.py", line 10, in download
resp = request.urlopen(url)
File "E:\Tools\Python\Python3.5\lib\urllib\request.py", line 163, in urlopen
return opener.open(url, data, timeout)
File "E:\Tools\Python\Python3.5\lib\urllib\request.py", line 466, in open
response = self._open(req, data)
File "E:\Tools\Python\Python3.5\lib\urllib\request.py", line 484, in _open
'_open', req)
File "E:\Tools\Python\Python3.5\lib\urllib\request.py", line 444, in _call_chain
result = func(*args)
File "E:\Tools\Python\Python3.5\lib\urllib\request.py", line 1297, in https_open
context=self._context, check_hostname=self._check_hostname)
File "E:\Tools\Python\Python3.5\lib\urllib\request.py", line 1254, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "E:\Tools\Python\Python3.5\lib\http\client.py", line 1107, in request
self._send_request(method, url, body, headers)
File "E:\Tools\Python\Python3.5\lib\http\client.py", line 1142, in _send_request
self.putrequest(method, url, **skips)
File "E:\Tools\Python\Python3.5\lib\http\client.py", line 984, in putrequest
self._output(request.encode('ascii'))
UnicodeEncodeError: 'ascii' codec can't encode characters in position 10-12: ordinal not in range(128)
html_downloader模块代码:# coding:utf-8
from urllib import request
class HtmlDownloader(object):
def download(self, url):
if url is None:
return None
resp = request.urlopen(url)
if resp.getcode() != 200:
return None
return resp.read()
查了很多但越改越乱,求解谢谢!