一、在urllib2中的使用:
# 一:
# 异常处理,及设置请求次数
# 可添加time时间间隔
import urllib2
def download(url,num_retries=2):
print("Downloading:",url)
try:
html = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print("Download error:",e.reason)
html = None
if num_retries>0:
if hasattr(e,"code") and 500 <= e.code <600:
# 当服务器响应结果为5XX时重新请求
return download(url,num_retries-1)
return html
# 二:在一的基础上添加用户代理
import urllib2
def download(url,uesr_agent="wswp",num_retries=2):
print("Downloading:",url)
headers = {"User_agent":uesr_agent}
requset = urllib2.Request(url,headers=headers)
try:
html = urllib2.urlopen(requset).read()
except urllib2.URLError as e:
print("Download error:",e.reason)
html = None
if num_retries>0:
if hasattr(e,"code") and 500 <= e.code <600:
# 当服务器响应结果为5XX时重新请求
return download(url,num_retries-1)
return html
# 三、支持代理
import urllib2
def download(url,uesr_agent="wswp",proxy=None,num_retries=2):
print("Downloading:",url)
headers = {"User_agent":uesr_agent}
requset = urllib2.Request(url,headers=headers)
opener = urllib2.build_opener()
if proxy:
proxy_params = {urlparse.urlparse(url).scheme:proxy}
opener.add_handler(urllib2.ProxyHandler(proxy_params))
try:
html = urllib2.urlopen(requset).read()
except urllib2.URLError as e:
print("Download error:",e.reason)
html = None
if num_retries>0:
if hasattr(e,"code") and 500 <= e.code <600:
# 当服务器响应结果为5XX时重新请求
return download(url,num_retries-1)
return html
二、在requests中的使用:
import requests
headers={"User_agent":uesr_agent}
proxies= {
"http":"http://127.0.0.1:9999",
"https":"http://127.0.0.1:8888"
}
response = requests.get("https://www.baidu.com",headers=headers,proxies=proxies)
print(response.text)
更多requests的用法,参考:https://www.cnblogs.com/zhaof/p/6915127.html