一、简单的爬虫程序
import urllib.request
from urllib.error import URLError,HTTPError,ContentTooShortError
def download(url,num = 2,user_agnet =’wswp’):
print(‘donwloadiing’,url)
#设置用户代理
··request = urllib.request.Request(url)
·· request.add_header(‘User-agent’,user_agnet)
try:
html = urllib.request.urlopen(url).read()
except (URLError,HTTPError,ContentTooShortError) as e:
print('downloading',e.reason)
html = None
#当错误发生在5xx的 时候,进行重新下载
if num >0:
if hasattr(e,'code') and 500 <= e.code < 600:
return download(url,num - 1)
return html
A = ‘https://www.douban.com/’
print(download(A))