注意:httpx库的使用方法,跟requests库的使用方法非常相似,学会requests库,再学httpx库非常容易。
- 查看请求所用的协议是HTTP/1.1,还是HTTP/2.0的方法(打开浏览器的开发者模式中查看):
一、快速入门
urllib和request只支持HTTP/1.1,不支持HTTP/2.0
httpx跟request很相似,不过httpx既支持HTTP/1.1,也支持HTTP/2.0
httpx默认是HTTP/1.1,需要安装httpx[http2]
特殊情况下,httpx.Client(http2=True)方法中传入http2=True,才能启用HTTP/2.0
使用 pip 安装:
$ pip install httpx
或者,要包括可选的 HTTP/2 支持,请使用:
$ pip install httpx[http2]
要包括可选的 brotli 解码器支持,请使用:
$ pip install httpx[brotli]
1、get请求
# Python版本:3.6
# -*- coding:utf-8 -*-
import httpx
r = httpx.get('https://www.example.org/')
print(r.status_code) # 200
print(r.headers['content-type']) # text/html; charset=UTF-8
print(r.text)
2、post请求
import httpx
r = httpx.post('https://httpbin.org/post',data={'key1':'value1'})
print(r.text)
3、put、delete、head、options请求
import httpx
httpx.put('https://httpbin.org/put', data={'key': 'value'})
httpx.delete('https://httpbin.org/delete')
httpx.head('https://httpbin.org/get')
httpx.options('https://httpbin.org/get')
4、在url链接中传递参数
A、使用params关键字传递参数
import httpx
url = 'https://httpbin.org/get'
params = {'key1':'value1','key2':'value2'}
r = httpx.get('https://httpbin.org/get',params=params)
print(r.status_code) # 200
print(r.url) # https://httpbin.org/get?key1=value1&key2=value2
B、列表数据类型
import httpx
params = {'key1':'value1','key2':['value2','value3']}
r = httpx.get('https://httpbin.org/get',params=params)
print(r.status_code) # 200
print(r.url) # https://httpbin.org/get?key1=value1&key2=value2&key2=value3
5、响应文本内容
import httpx
r = httpx.get('https://www.example.org/')
print(r.text)
6、查看或设置网页的编码
A、查看网页的编码
import httpx
r = httpx.get('https://httpbin.org/get')
# 查看网页的编码
print(r.encoding) # ascii
B、设置编码方式,一旦设置就会覆盖原来的编码
import httpx
r = httpx.get('https://httpbin.org/get')
r.encoding = 'utf-8'
print(r.encoding) # utf-8
print(r.text)
7、二进制响应内容
import httpx
r = httpx.get('https://httpbin.org/get')
print(r.content)
8、要从请求返回的二进制数据创建图像
import httpx
from PIL import Image
from io import BytesIO
r = httpx.get('https://pic.ntimg.cn/file/20220402/19727910_161258533101_2.jpg')
print(r.content)
i = Image.open(BytesIO(r.content))
# 打开、查看图片
i.show()
# 保存图片到当前文件夹
i.save('biadu_logo_BytesIO.PNG')
9、JSON响应内容
import httpx
r = httpx.get('https://api.github.com/events')
print(r.json())
10、自定义headers
import httpx
url = 'https://httpbin.org/headers'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'}
r = httpx.get(url, headers=headers)
print(r.status_code) # 200
print(r.headers)
"""
Headers({'date': 'Thu, 21 Apr 2022 11:40:57 GMT', 'content-type': 'application/json', 'content-length': '319', 'connection': 'keep-alive', 'server': 'gunicorn/19.9.0', 'access-control-allow-origin': '*', 'access-control-allow-credentials': 'true'})
"""
11、发送表单数据
import httpx
data = {
'key1':'value1',
'key2':'value2'
}
r = httpx.post('https://httpbin.org/post',data=data)
print(r.text)
"""
...
"form": {
"key1": "value1",
"key2": "value2"
},
...
"""
- 同一键,包含多个值
import httpx
data = {
'key1':['value1','value2']
}
r = httpx.post('https://httpbin.org/post',data=data)
print(r.text)
"""
...
"form": {
"key1": [
"value1",
"value2"
]
},
...
"""
12、上传文件
import httpx
files = {'upload-file':open('uploading.txt', 'rb')}
r = httpx.post('https://httpbin.org/post',files=files)
print(r.text)
"""
....
"files": {
"upload-file": "abcdef\r\nghig\r\nkega"
},
...
"""
- 显式设置文件名和内容类型
import httpx
"""
显式设置文件名和内容类型
文件类型:https://tool.oschina.net/commons/
"""
files = {'upload-file':('uploading.xls', open('uploading.xls', 'rb'), 'application/vnd.ms-excel')}
# files = {'upload-file':('uploading.txt',open('uploading.txt','rb'),'text/plain')}
r = httpx.post('https://httpbin.org/post',files=files)
print(r.text)
- 如果需要在表单中包含非文件数据字段,请使用data=…参数
import httpx
data = {'message':'Hello world!'}
files = {'file':open('uploading.txt', 'rb')}
r = httpx.post('https://httpbin.org/post',data=data,files=files)
print(r.text)
"""
....
"files": {
"file": "abcdef\r\nghig\r\nkega"
},
"form": {
"message": "Hello world!"
},
....
"""
13、发送JSON编码的数据
import httpx
data = {'iteger':123,'boolean':True,'list':['a','b','c']}
r = httpx.post('https://httpbin.org/post',json=data)
print(r.text)
"""
...
"json": {
"boolean": true,
"iteger": 123,
"list": [
"a",
"b",
"c"
]
},
...
"""
14、发送二进制请求数据
import httpx
content = b'Hello wolrd'
r = httpx.post('https://httpbin.org/post',content=content)
print(r.text) # ...."data": "Hello wolrd", ...
15、状态码
import httpx
r = httpx.get('https://httpbin.org/get')
print(r.status_code) # 200
print(r.status_code == httpx.codes.OK) # True
- 其它状态码:比如404
import httpx
not_found = httpx.get('https://httpbin.org/status/404')
print(not_found.status_code) # 404
# 捕获异常
# print(not_found.raise_for_status())
16、headers文件头
import httpx
r = httpx.get('https://www.baidu.com')
print(r.headers)
"""
Headers([('accept-ranges', 'bytes'), ('cache-control', 'no-cache'), ('connection', 'keep-alive'), ('content-length', '227'), ('content-type', 'text/html'), ('date', 'Thu, 21 Apr 2022 13:01:43 GMT'), ('p3p', 'CP=" OTI DSP COR IVA OUR IND COM "'), ('p3p', 'CP=" OTI DSP COR IVA OUR IND COM "'), ('pragma', 'no-cache'), ('server', 'BWS/1.1'), ('set-cookie', 'BD_NOT_HTTPS=1; path=/; Max-Age=300'), ('set-cookie', 'BIDUPSID=AC1E015F16AEB5710A9851F42E6A5A7E; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com'), ('set-cookie', 'PSTM=1650546103; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com'), ('set-cookie', 'BAIDUID=AC1E015F16AEB57186EF629BB294F2FC:FG=1; max-age=31536000; expires=Fri, 21-Apr-23 13:01:43 GMT; domain=.baidu.com; path=/; version=1; comment=bd'), ('strict-transport-security', 'max-age=0'), ('traceid', '165054610306639106669606882951048680134'), ('x-frame-options', 'sameorigin'), ('x-ua-compatible', 'IE=Edge,chrome=1')])
"""
# 获取headers中的指定键值
print('connection:',r.headers['connection']) # connection: keep-alive
print('cache-control:',r.headers.get('cache-control')) # cache-control: no-cache
17、response响应流
使用响应流,对大网页的响应,不立即将整个响应加载到内存中。
- 二进制内容(bytes数据类型)
import httpx
with httpx.stream('GET','https://httpbin.org/get') as r:
for data in r.iter_bytes():
print(data)
- 文本内容
import httpx
with httpx.stream('GET','https://httpbin.org/get') as r:
for text in r.iter_text():
print(text)
- 逐行传输文本
import httpx
with httpx.stream('GET','https://httpbin.org/get') as r:
for line in r.iter_lines():
print(line)
- 原始字节
import httpx
with httpx.stream('GET','https://httpbin.org/get') as r:
for chunk in r.iter_raw():
print(chunk)
18、访问指定的Cookie
import httpx
r = httpx.get('https://httpbin.org/cookies/set?chocolate=chip')
print(r.cookies['chocolate']) # chip
20、Cookies参数
import httpx
cookies = {'key':'value'}
# 发出请求时,传入cookies参数
res = httpx.get('https://httpbin.org/cookies',cookies=cookies)
print(res.json())
21、设置Cookies
import httpx
# 实例一个cookies对象
cookies = httpx.Cookies()
# 设置cookies
cookies.set('cookie_on_domain', 'hello, there!', domain='httpbin.org')
cookies.set('cookie_off_domain', 'nope.', domain='example.org')
# 发出请求
r = httpx.get('http://httpbin.org/cookies', cookies=cookies)
print(r.json())
22、重定向
- GitHub 将所有 HTTP 请求重定向到 HTTPS
import httpx
r = httpx.get('http://github.com/')
print(r.status_code) # 301
print(r.history) # []
print(r.next_request) # <Request('GET', 'https://github.com/')>
- 启用重定向
import httpx
r = httpx.get('http://github.com',follow_redirects=True)
print(r.url) # https://github.com/
print(r.status_code) # 200
print(r.history) # [<Response [301 Moved Permanently]>]
- 禁用重定向
import httpx
r = httpx.get('http://github.com',follow_redirects=False)
print(r.url) # http://github.com
print(r.status_code) # 301
print(r.next_request) # <Request('GET', 'https://github.com/')>
23、超时设置
- timeout参数值太小,会报错
import httpx
r = httpx.get('https://github.com/',timeout=0.001) # 报错
- 完全禁用超时行为
import httpx
r = httpx.get('http://github.com',timeout=None)
print(r.url) # http://github.com
print(r.status_code) # 301
24、HTTP身份验证
- 没有进行http身份验证时
import httpx
r = httpx.get('https://ssr3.scrape.center/')
print(r.url) # https://ssr3.scrape.center/
print(r.status_code) # 401
- 进行http身份验证
import httpx
r = httpx.get('https://ssr3.scrape.center/',auth=("admin", "admin"))
print(r.url) # https://ssr3.scrape.center/
print(r.status_code) # 200
二、高级用法
1、Client方法
import httpx
with httpx.Client(http2=True) as client:
r = client.get('https://www.qq.com')
print(r) # <Response [200 OK]>
另一种写法:
import httpx
client = httpx.Client()
try:
response = client.get('https://httpbin.org/get')
finally:
client.close()
2、添加headers
- 在get请求中,加入headers
import httpx
with httpx.Client() as client:
headers = {'X-Custom':'value'}
r = client.get('https://www.qq.com',headers=headers)
print(r.request.headers['X-Custom'])
- client方法中,添加headers
import httpx
url = 'https://httpbin.org/headers'
headers = {'user-agent':'my-app/0.0.1'}
with httpx.Client(headers=headers) as client:
r = client.get(url)
print(r.json())
3、异步请求
# Python版本:3.6
# -*- coding:utf-8 -*-
import httpx
import asyncio
async def fetch(url):
async with httpx.AsyncClient(http2=True) as client:
response = await client.get(url)
print(response.text)
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(fetch('https://www.httpbin.org/get'))
程序运行结果为:
{
"args": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Host": "www.httpbin.org",
"User-Agent": "python-httpx/0.22.0",
"X-Amzn-Trace-Id": "Root=1-626176f1-3bcfc83347c0102047408b78"
},
"origin": "120.239.165.180",
"url": "https://www.httpbin.org/get"
}
4、配置合并:对headers、params、cookie值组合在一起
# Python版本:3.6
# -*- coding:utf-8 -*-
import httpx
headers = {'X-Auth': 'from-client'} # 添加到headers
params = {'client_id': 'client1'} # 在地址栏中显示该参数值
with httpx.Client(headers=headers, params=params) as client:
headers = {'X-Custom': 'from-request'} # 添加到headers
params = {'request_id': 'request1'} # 在地址栏中显示该参数值
r = client.get('https://example.com', headers=headers, params=params)
print(r.request.url) # https://example.com?client_id=client1&request_id=request1
print(r.request.headers) # Headers({'host': 'example.com', 'accept': '*/*', 'accept-encoding': 'gzip, deflate', 'connection': 'keep-alive', 'user-agent': 'python-httpx/0.22.0', 'x-auth': 'from-client', 'x-custom': 'from-request'})
print(r.request.headers['X-Auth']) # from-client
print(r.request.headers['X-Custom']) # from-request
5、在base_url参数值的基础上添加路径
import httpx
with httpx.Client(base_url='http://httpbin.org') as client:
r = client.get('/headers') # 实际get方法传入"base_url + /headers"
print(r.url) # http://httpbin.org/headers
print(r.request.url) # http://httpbin.org/headers
print(r.json())
6、Request请求
import httpx
request = httpx.Request('GET', 'https://www.qq.com')
with httpx.Client() as client:
response = client.send(request)
print(response) # <Response [200 OK]>
print(response.url) # https://www.qq.com
print(response.text)
7、监控下载进度
# 用于生成临时文件
import tempfile
import httpx
# 进度条库
from tqdm import tqdm
with tempfile.NamedTemporaryFile() as download_file:
url = "https://speed.hetzner.de/100MB.bin"
with httpx.stream('GET',url) as response:
total = int(response.headers['Content-Length'])
with tqdm(total=total,unit_scale=True,unit_divisor=1024,unit='B') as progress:
num_bytes_download = response.num_bytes_downloaded
for chunk in response.iter_bytes():
download_file.write(chunk)
progress.update(response.num_bytes_downloaded - num_bytes_download)
num_bytes_download = response.num_bytes_downloaded
程序运行后,在run控制台中显示: