python3 爬虫urllib中http get,post,超时,设置中文请求头的使用方法例子

一.http get使用方法:

import urllib.request
response = urllib.request.urlopen('https://baidu.com')
print(response.read().decode('utf-8'))
print(type(response))
import urllib.request
reponse = urllib.request.urlopen('https://jd.com')
print('response类型:', type(reponse))
print('status:', reponse.status, 'msg:', reponse.msg, 'version:', reponse.version)
print('headers:', reponse.getheaders())
print('Content-Type:', reponse.getheader('Content-Type'))
print(reponse.read().decode('utf-8'))

输出:

二 http post请求

http://httpbin.org/post 是一个用于测试http post请求的网址,如果请求成功,服务端会将http post请求原封不动的还给客户端。

import urllib.request
data = bytes(urllib.parse.urlencode({'name:':'Mary','age:':'18'}),encoding='utf=8')
reponse = urllib.request.urlopen('http://httpbin.org/post',data=data)
print(reponse.read().decode('utf-8'))

运行结果:

三 请求超时

由于绝大多数网站不会0.1秒响应,所以以下代码会超时。

import urllib.request
response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.1)

运行结果:

四 超时异常处理

捕获超时异常,并进行异常处理:

import urllib.request
import socket
import urllib.error
try:
    response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
except urllib.error.URLError as e:
    #判断异常的类型是否是超时异常
    if isinstance(e.reason,socket.timeout):
        print('超时')
print('继续其他工作')

运行结果:

五.设置http请求头

练习:将自定义请求头who,post到http://httpbin.org/post

from urllib import request,parse
url = 'http://httpbin.org/post'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWeKit/537.36'
'(KHTML, like Geko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'httpbin.org',
    'Who':'Python Scrapy'
}
dict = {
    'name':'Mary',
    'age':30
}
data = bytes(parse.urlencode(dict),encoding='utf-8')
#创建Request对象,通过Request类的构造方法指定了表单数据和HTTP请求头
req = request.Request(url = url,data = data,headers = headers)
response = request.urlopen(req)
print(response.read().decode('utf-8'))

运行结果:

六 中文的处理

http请求头只能是英文和符号,不能是双字节的中文。

from urllib.parse import unquote ,urlencode
value = urlencode({'name':'果汁儿'})
print(value)
print(unquote(value))

运行结果:

使用base64对中文进行编码

import base64
base64Value = base64.b64encode(bytes('果汁儿',encoding='utf-8'))
print(str(base64Value))
print(str(base64.b64decode(base64Value),'utf-8'))

运行结果:

七 题目:设置中文http请求头,并对其进行解码

'''
import urllib.request
response = urllib.request.urlopen('https://baidu.com')
print(response.read().decode('utf-8'))
print(type(response))
'''
'''
import urllib.request
reponse = urllib.request.urlopen('https://jd.com')
print('response类型:', type(reponse))
print('status:', reponse.status, 'msg:', reponse.msg, 'version:', reponse.version)
print('headers:', reponse.getheaders())
print('Content-Type:', reponse.getheader('Content-Type'))
print(reponse.read().decode('utf-8'))
'''
'''
import urllib.request
data = bytes(urllib.parse.urlencode({'name:':'Mary','age:':'18'}),encoding='utf=8')
reponse = urllib.request.urlopen('http://httpbin.org/post',data=data)
print(reponse.read().decode('utf-8'))
'''
'''
import urllib.request
response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.1)
'''
'''
import urllib.request
import socket
import urllib.error
try:
    response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
except urllib.error.URLError as e:
    #判断异常的类型是否是超时异常
    if isinstance(e.reason,socket.timeout):
        print('超时')
print('继续其他工作')
'''
'''
from urllib import request,parse
url = 'http://httpbin.org/post'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWeKit/537.36'
'(KHTML, like Geko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'httpbin.org',
    'Who':'Python Scrapy'
}
dict = {
    'name':'Mary',
    'age':30
}
data = bytes(parse.urlencode(dict),encoding='utf-8')
#创建Request对象,通过Request类的构造方法指定了表单数据和HTTP请求头
req = request.Request(url = url,data = data,headers = headers)
response = request.urlopen(req)
print(response.read().decode('utf-8'))
'''
'''
from urllib import request
url = 'http://httpbin.org/post'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWeKit/537.36'
'(KHTML, like Geko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'httpbin.org',
    'Who':'中文'
}
req = request.Request(url = url,headers = headers,method='POST')
request.urlopen(req)
'''
'''
from urllib.parse import unquote ,urlencode
value = urlencode({'name':'果汁🕑儿'})
print(value)
print(unquote(value))
'''
'''
import base64
base64Value = base64.b64encode(bytes('果汁儿',encoding='utf-8'))
print(str(base64Value))
print(str(base64.b64decode(base64Value),'utf-8'))
'''
from urllib import request
from urllib.parse import unquote,urlencode
import base64
url = 'http://httpbin.org/post'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWeKit/537.36'
'(KHTML, like Geko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'httpbin.org',
    'Chinese1':urlencode({'name':'url编码'}),
    'MyChinese':base64.b64encode(bytes('b64编码',encoding='utf-8')),
    'who':'Python Scrapy'
}
dict = {
    'name':'km',
    'age':18
}
data = bytes(urlencode(dict),encoding='utf-8')
req = request.Request(url=url,data=data,headers=headers,method='POST')
req.add_header('Chinese2',urlencode({'国籍':'中国'}))
reponse = request.urlopen(req)
value = reponse.read().decode('utf-8')
print(value)
import json
reponseObj = json.loads(value)
print(unquote(reponseObj['headers']['Chinese1']))
print(unquote(reponseObj['headers']['Chinese2']))
print(str(base64.b64decode(reponseObj['headers']['Mychinese']),'utf-8'))

运行结果:

 

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值