import urllib3
import json
# 辅助方法,将内容写到文件中
def write_to_file(filename,html):
f=open(filename,"w",encoding="utf8")
f.write(html)
f.close()
# 辅助方法,加载页面到本地目录
def load_page(filename,res):
html = res.data.decode("utf-8")
print(html)
write_to_file(filename,html)
#例1,http基本请求
def send_url():
urlstr="http://www.baidu.com"
httpMgr=urllib3.PoolManager()
res=httpMgr.request("GET",urlstr)
print("status:%d" % res.status)
# print(res.data)
load_page("send_url.html",res)
# 例2,带header
def send_url_with_headers():
urlstr = "http://www.baidu.com"
urlstr="http://httpbin.org/get"
headers={
"x-something":"value"
}
httpMgr = urllib3.PoolManager()
res = httpMgr.request("GET",urlstr,headers=headers)
print("status:%d" % res.status)
# print(res.data)
load_page("send_url_with_headers.html",res)
# 例3,带参数的get请求1
def send_get_with_param001():
urlstr = "http://httpbin.org/get"
param = {
'arg1': 'value1',
'arg2': 'value2'
}
httpMgr = urllib3.PoolManager();
res = httpMgr.request("GET", urlstr, fields=param)
load_page("send_get_with_param001.html", res)
# 例4,带参数的get请求——bing
def send_get_with_param002():
urlstr="https://cn.bing.com/search"
word = {"q":"Python网络爬虫"}
headers = {'url-Agent':'Mozilla/5.0 (Windows NT 10.0 Win64 x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'}
httpMgr = urllib3.PoolManager()
res = httpMgr.request("GER",urlstr,headers=headers,fields=word)
load_page("send_get_with_param002.html",res)
def send_post_with_field():
urlstr = "http://httpbin.org/post"
param = {
"arg1":"value1",
"arg2": "value2"
}
httpMgr = urllib3.PoolManager()
res = httpMgr.request("POST",urlstr,fields=param)
print("status:%d" % res.status)
# print(res.data)
load_page("send_post_with_field.html",res)
# 例6,post发送body信息
def send_post_with_body():
urlstr = "http://httpbin.org/post"
body = {
"arg1": "value1",
"arg2": "value2"
}
encode_data=json.dumps(body).encode("utf-8")
httpMgr = urllib3.PoolManager()
res=httpMgr.request("POST",urlstr,body=encode_data)
load_page("send_post_with_body.html",res)
# 例7,用post传递文件内容
def send_post_with_file():
with open("e.txt") as fp:
file_data=fp.read()
urlstr = "http://httpbin.org/post"
httpMgr = urllib3.PoolManager()
res = httpMgr.request("POST",urlstr,
fields={
"filefields":("e.txt",file_data,"text/plain")
})
load_page("send_post_with_file.html",res)
# 例8,设置代理
def send_url_with_proxy():
try:
urlstr="http://httpbin.org/ip"
proxy_httpMgr=urllib3.ProxyManager("http://50.233.137.33:80",
headers={
"connection":"keep-alive"
})
res=proxy_httpMgr.request("GET",urlstr)
load_page("send_url_with_proxy.html",res)
except urllib3.exceptions.NewConnectionErroe as e:
print(e)
# 例9 设置timeout
def send_url_with_timeout001():
urlstr = "http://httpbin.org/delay/3"
httpMgr = urllib3.PoolManager()
res = httpMgr.request("GET",urlstr,timeout=4.0)
print("status:%d" % res.status)
load_page("send_url_with_timeout001.html",res)
# 例10,精细化设置timeout
def send_url_with_timeout002():
urlstr = "http://httpbin.org/delay/3"
httpMgr = urllib3.PoolManager()
res = httpMgr.request("GET",urlstr,timeout=urllib3.Timeout(connect=1.0,read=4.0))
print("status:%d" % res.status)
load_page("send_url_with_timeout001.html",res)
# 例11,全局性设置timeout
def send_url_with_timeout003():
try:
urlstr = "http://httpbin.org/delay/3"
httpMgr = urllib3.PoolManager(timeout=urllib3.Timeout(connect=1.0 ,read=2.0))
res = httpMgr.request("GET", urlstr)
print("status:%d" % res.status)
load_page('send_url_with_timeout003_html', res)
except urllib3.exceptions.ReadTimeoutError as e:
print(e)
except urllib3.exceptions.MaxRetryError as e:
print(e)
# 例12,异常处理
def send_url_with_exception():
try:
urlstr = "http://vjsdhjfkh.com"
httpMgr = urllib3.PoolManager(timeout=urllib3.Timeout(connect=1.0,read=2.0))
res = httpMgr.request("GET", urlstr, retries=False)
print("status:%d" % res.status)
load_page("send_url_with_timeout001.html", res)
except urllib3.exceptions.NewConnectionErroe as e:
print(e)
print('even though failed,still alive')
if __name__=="__main__":
send_url()
send_url_with_headers()
send_get_with_param001()
send_get_with_param002()
send_post_with_field()
send_post_with_body()
send_post_with_file()
send_url_with_proxy()
send_url_with_timeout001()
send_url_with_timeout002()
send_url_with_timeout003()
send_url_with_exception()
2021-10-15 网络爬虫
最新推荐文章于 2021-10-25 14:06:31 发布