前期准备
requests是第三方库
安装requests库
pip install requests
使用requests库
import requests
简单的get/post
发送一个get请求
# 发送请求
import requests
response = requests.get('http://httpbin.org/get')
# 获取返回的html信息
print(response.text)
如何返回的是乱码
需要看清网站用的是什么编码格式,
例如网站用的是utf-8
添加下面这串代码
response.encoding = 'utf-8'
获取返回的其它的信息
# 获取返回头
print(response.headers)
# 请求状态码
print(response.status_code)
# 获取网页的二进制内容
print(response.content)
# 获取请求的url
print(response.url)
# 获取cookie
print(response.cookies)
发送post请求
# 进行post请求
data = {'name': '你好', 'password': 123456}
response = requests.post('http://httpbin.org/post', data=data)
print(response.text)
复杂请求
添加请求头
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}
# get请求
response = requests.get('http://httpbin.org/get', headers=headers )
# post请求
response = requests.post('http://httpbin.org/post', data=data, headers=headers)
添加参数
# 进行带参数的get请求
data = {'name': '你好', 'password': 123456}
# get请求
response = requests.get('http://httpbin.org/get', params=data)
# post请求
response = requests.post('http://httpbin.org/post', data=data)
print(response.text)
添加cookie
将cookie放入headers中
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36',
"Cookie":"csrftoken=JfnuMOvgMGZTxtCjvgfZ…6246vb2; username=chengyangkj"}
response = requests.get('http://httpbin.org/get', headers=headers )
添加认证
from requests.auth
import HTTPBasicAuth
# 设置认证#
requests.get('需要认证的网址', auth=HTTPBasicAuth('user', 'passwd'))
# 还可以这样认证
requests.get('需要认证的网址', auth=('user', 'passwd'))
设置代理
# 设置代理
proxies = {
'http': 'http://122.110.31.177:8080', 'https': 'https://122.110.31.177:8080'}
# 在请求时添加上列代理
response = requests.get('http://httpbin.org/get', proxies=proxies)
print(response.text)
请求异常处理
requests库中有个处理异常的库requests.exceptions
import requests
from requests.exceptions
import ReadTimeout, ConnectTimeout, HTTPError, ConnectionError, RequestException
# 捕捉异常
try:
response = requests.get('http://httpbin.org/get', timeout=0.1)
# 规定时间内未响应就抛出异常
print(response.text)
except ReadTimeout as e:
print('请求超时')
except ConnectionError as e:
print('连接失败')
except RequestException as e:
print('请求失败')
处理不安全网站
取消证书验证设置verify=False
# 证书验证
response = requests.get('显示不完全的网址', verify=False)
保持会话
# 用会话来保持登陆信息
session = requests.session()
response = session.get('http://httpbin.org/cookies/set/number/123456')
print(response.text)
上传文件
# 上传文件
files = {'picture': open('1.png', 'rb')}
response = requests.post('http://httpbin.org/post', files=files)
print(response.text)
下载文件
# 从网上读取二进制数据,比如图片
response = requests.get('https://www.baidu.com/img/bd_logo1.png', headers=headers)
# 用文件来把图片下载下来
with open('baidu.png', 'wb') as f:
# 注意写的方式是以二进制方式写入
f.write(response.content)
print('下载完毕')
处理json数据
# 解析json
j = response.json()
处理cookie数据
# 获取cookie
response = requests.get('https://www.baidu.com')
for k, v in response.cookies.items():
print(k, '=', v)