python爬虫urllib怎么使用_Python爬虫之Urllib库的基本使用

#get请求

importurllib.request

response= urllib.request.urlopen("http://www.baidu.com")print(response.read().decode('utf-8'))#post请求

importurllib.parseimporturllib.request

data= bytes(urllib.parse.urlencode({"word":"hello"}), encoding='utf8')

response= urllib.request.urlopen('http://httpbin.org/post', data=data)print(response.read())importurllib.request

response= urllib.request.urlopen('http://httpbin.org/get', timeout=1)print(response.read())importsocketimporturllib.requestimporturllib.errortry:

response= urllib.request.urlopen('http://httpbin.org/get', timeout = 0.1)excepturllib.error.URLError as e:ifisinstance(e.reason, socket.timeout):print('TIME OUT')#响应类型

importurllib.request

response= urllib.request.urlopen('http://www.python.org')print(type(response))#状态码、响应头

importurllib.request

response= urllib.request.urlopen('http://www.python.org')print(response.status)print(response.getheaders())print(response.getheader('server'))#Request

importurllib.request

request= urllib.request.Request('http://python.org')

response=urllib.request.urlopen(request)print(response.read().decode('utf-8'))from urllib importrequest, parse

url= 'http://httpbin.org/post'headers={'User-Agent': 'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36','Host':'httpbin.org'}

dict={'name':'Germey'}

data= bytes(parse.urlencode(dict), encoding = 'utf-8')

req= request.Request(url = url, data = data, headers = headers, method = 'POST')

response=request.urlopen(req)print(response.read().decode('utf-8'))from urllib importrequest, parse

url= 'http://httpbin.org/post'dict={'name': 'Germey'}

data= bytes(parse.urlencode(dict), encoding = 'utf-8')

req= request.Request(url = url, data = data, method = 'POST')

req.add_header('User-Agent', 'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36')

response=request.urlopen(req)print(response.read().decode('utf-8'))#代理

importurllib.request

proxy_handler=urllib.request.ProxyHandler({'http': 'http://127.0.0.1:9743','https': 'https://127.0.0.1:9743'})

opener=urllib.request.build_opener(proxy_handler)

response= opener.open('http://httpbon.org/get')print(response.read())#cookie

importhttp.cookiejar, urllib.request

cookie=http.cookiejar.CookieJar()

handler=urllib.request.HTTPCookieProcessor(cookie)

opener=urllib.request.build_opener(handler)

response= opener.open('http://www.baidu.com')for item incookie:print(item.name + "=" +item.value)#保存cookie为1.txt

importhttp.cookiejar, urllib.request

filename= '1.txt'cookie=http.cookiejar.MozillaCookieJar(filename)

handler=urllib.request.HTTPCookieProcessor(cookie)

opener=urllib.request.build_opener(handler)

response= opener.open('http://www.baidu.com')

cookie.save(ignore_discard= True, ignore_expires =True)#另外一种方式保存cookie

importhttp.cookiejar, urllib.request

filename= '1.txt'cookie=http.cookiejar.LWPCookieJar(filename)

handler=urllib.request.HTTPCookieProcessor(cookie)

opener=urllib.request.build_opener(handler)

response= opener.open('http://www.baidu.com')

cookie.save(ignore_discard= True, ignore_expires =True)#读取cookie

importhttp.cookiejar, urllib.request

cookie=http.cookiejar.LWPCookieJar()

cookie.load('1.txt', ignore_discard = True, ignore_expires =True)

handler=urllib.request.HTTPCookieProcessor(cookie)

opener=urllib.request.build_opener(handler)

response= opener.open('http://www.baidu.com')print(response.read().decode('utf-8'))#异常处理

from urllib importrequest, errortry:

response= request.urlopen('http://lidonghao.com')excepterror.URLError as e:print(e.reason)from urllib importrequest, errortry:

response= request.urlopen('http://www.baidu.com/101')excepterror.HTTPError as e:print(e.reason, e.code, sep = 'n')excepterror.URLError as e:print(e.reason)else:print('Request Successfully')importsocketimporturllib.requestimporturllib.errortry:

response= urllib.request.urlopen("https://www.baidu.com", timeout = 0.01)excepturllib.error.URLError as e:print(type(e.reason))ifisinstance(e.reason, socket.timeout):print("TIME OUT")

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值