python3 爬虫urllib中http get，post，超时，设置中文请求头的使用方法例子

最新推荐文章于 2024-01-04 11:54:30 发布

采蘑菇的老姑娘

最新推荐文章于 2024-01-04 11:54:30 发布

阅读量1k

点赞数 1

分类专栏：爬虫-pyhon

本文链接：https://blog.csdn.net/u011093930/article/details/108296838

版权

爬虫-pyhon 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

一.http get使用方法：

import urllib.request
response = urllib.request.urlopen('https://baidu.com')
print(response.read().decode('utf-8'))
print(type(response))

import urllib.request
reponse = urllib.request.urlopen('https://jd.com')
print('response类型：', type(reponse))
print('status:', reponse.status, 'msg:', reponse.msg, 'version:', reponse.version)
print('headers:', reponse.getheaders())
print('Content-Type:', reponse.getheader('Content-Type'))
print(reponse.read().decode('utf-8'))

输出：

二 http post请求

http://httpbin.org/post 是一个用于测试http post请求的网址，如果请求成功，服务端会将http post请求原封不动的还给客户端。

import urllib.request
data = bytes(urllib.parse.urlencode({'name:':'Mary','age:':'18'}),encoding='utf=8')
reponse = urllib.request.urlopen('http://httpbin.org/post',data=data)
print(reponse.read().decode('utf-8'))

运行结果：

三请求超时

由于绝大多数网站不会0.1秒响应，所以以下代码会超时。

import urllib.request
response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.1)

运行结果：

四超时异常处理

捕获超时异常，并进行异常处理：

import urllib.request
import socket
import urllib.error
try:
    response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
except urllib.error.URLError as e:
    #判断异常的类型是否是超时异常
    if isinstance(e.reason,socket.timeout):
        print('超时')
print('继续其他工作')

运行结果：

五.设置http请求头

练习：将自定义请求头who,post到http://httpbin.org/post

from urllib import request,parse
url = 'http://httpbin.org/post'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWeKit/537.36'
'(KHTML, like Geko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'httpbin.org',
    'Who':'Python Scrapy'
}
dict = {
    'name':'Mary',
    'age':30
}
data = bytes(parse.urlencode(dict),encoding='utf-8')
#创建Request对象，通过Request类的构造方法指定了表单数据和HTTP请求头
req = request.Request(url = url,data = data,headers = headers)
response = request.urlopen(req)
print(response.read().decode('utf-8'))

运行结果：

六中文的处理

http请求头只能是英文和符号，不能是双字节的中文。

from urllib.parse import unquote ,urlencode
value = urlencode({'name':'果汁儿'})
print(value)
print(unquote(value))

运行结果：

使用base64对中文进行编码

import base64
base64Value = base64.b64encode(bytes('果汁儿',encoding='utf-8'))
print(str(base64Value))
print(str(base64.b64decode(base64Value),'utf-8'))

运行结果：

七题目：设置中文http请求头，并对其进行解码

'''
import urllib.request
response = urllib.request.urlopen('https://baidu.com')
print(response.read().decode('utf-8'))
print(type(response))
'''
'''
import urllib.request
reponse = urllib.request.urlopen('https://jd.com')
print('response类型：', type(reponse))
print('status:', reponse.status, 'msg:', reponse.msg, 'version:', reponse.version)
print('headers:', reponse.getheaders())
print('Content-Type:', reponse.getheader('Content-Type'))
print(reponse.read().decode('utf-8'))
'''
'''
import urllib.request
data = bytes(urllib.parse.urlencode({'name:':'Mary','age:':'18'}),encoding='utf=8')
reponse = urllib.request.urlopen('http://httpbin.org/post',data=data)
print(reponse.read().decode('utf-8'))
'''
'''
import urllib.request
response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.1)
'''
'''
import urllib.request
import socket
import urllib.error
try:
    response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
except urllib.error.URLError as e:
    #判断异常的类型是否是超时异常
    if isinstance(e.reason,socket.timeout):
        print('超时')
print('继续其他工作')
'''
'''
from urllib import request,parse
url = 'http://httpbin.org/post'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWeKit/537.36'
'(KHTML, like Geko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'httpbin.org',
    'Who':'Python Scrapy'
}
dict = {
    'name':'Mary',
    'age':30
}
data = bytes(parse.urlencode(dict),encoding='utf-8')
#创建Request对象，通过Request类的构造方法指定了表单数据和HTTP请求头
req = request.Request(url = url,data = data,headers = headers)
response = request.urlopen(req)
print(response.read().decode('utf-8'))
'''
'''
from urllib import request
url = 'http://httpbin.org/post'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWeKit/537.36'
'(KHTML, like Geko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'httpbin.org',
    'Who':'中文'
}
req = request.Request(url = url,headers = headers,method='POST')
request.urlopen(req)
'''
'''
from urllib.parse import unquote ,urlencode
value = urlencode({'name':'果汁🕑儿'})
print(value)
print(unquote(value))
'''
'''
import base64
base64Value = base64.b64encode(bytes('果汁儿',encoding='utf-8'))
print(str(base64Value))
print(str(base64.b64decode(base64Value),'utf-8'))
'''
from urllib import request
from urllib.parse import unquote,urlencode
import base64
url = 'http://httpbin.org/post'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWeKit/537.36'
'(KHTML, like Geko) Chrome/72.0.3626.109 Safari/537.36',
    'Host':'httpbin.org',
    'Chinese1':urlencode({'name':'url编码'}),
    'MyChinese':base64.b64encode(bytes('b64编码',encoding='utf-8')),
    'who':'Python Scrapy'
}
dict = {
    'name':'km',
    'age':18
}
data = bytes(urlencode(dict),encoding='utf-8')
req = request.Request(url=url,data=data,headers=headers,method='POST')
req.add_header('Chinese2',urlencode({'国籍':'中国'}))
reponse = request.urlopen(req)
value = reponse.read().decode('utf-8')
print(value)
import json
reponseObj = json.loads(value)
print(unquote(reponseObj['headers']['Chinese1']))
print(unquote(reponseObj['headers']['Chinese2']))
print(str(base64.b64decode(reponseObj['headers']['Mychinese']),'utf-8'))

运行结果：

采蘑菇的老姑娘

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
python3 爬虫urllib中http get，post，超时，设置中文请求头的使用方法例子

一.http get练习：import urllib.requestresponse = urllib.request.urlopen('https://baidu.com')print(response.read().decode('utf-8'))print(type(response))import urllib.requestreponse = urllib.request.urlopen('https://jd.com')print('response类型：', type(r
复制链接

扫一扫

专栏目录