requests
- 虽然python的标准库中urllib模块已经包含了平常我们使用的大多数功能,但是它的API使用起来让人感觉不太好
- requests 是用python编写,基于urllib,但是比urllib更加方便
requests 关键字参数
- method: 请求方法
- url: 请求网址
- headers:请求头字段
- cookies:用户身份标识
- proxies:ip代理的关键字参数
- params:查询参数
- data:请求参数 post请求
- timeout:设置响应时间,一旦超时程序报错
- allow_redirects:是否运行重定向
- verify:是否验证证书
- json:json提交参数
- files:文件
- auth:权限认证
- stream:是否是数据流传输
响应体常见的方法和属性
- text:获取响应体文本数据
- content:获取响应体二进制数据
- json():获取响应体的json数据,如果不是json数据,会报错
- headers:查看响应体的响应头信息
- encoding:指定响应体编码
- apparent_encoding:自动识别响应体编码
- cookies:获取响应体的cookies字段信息,得到的是RequestsCookieJar对象
- url:获取响应体的url地址
- status_code:获取响应体状态码
import requests
url = 'https://movie.douban.com/top250'
headers = {
'Cookie': 'bid="xYzW76eUFk8"; __yadk_uid=XU38FA8bxpKrGZUK1KNEfbTp2VcwSYr4; __gads=ID=6db86c8088752a10-225dc67a68c40078:T=1603710897:RT=1603710897:S=ALNI_MaKwh8GKcMOamfPzR24mBE6kuNMoA; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1603803210%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D5ySilQQPbwcNZs9RwHAuyDAobBcp2_xCj2lV3bceXeMRZLwzc7lQUqN2wmVhtSOl%26wd%3D%26eqid%3Dfcc151630008b5d4000000065f981848%22%5D; _pk_id.100001.4cf6=ce6b3b49020921cb.1603710897.3.1603803210.1603718808.; _pk_ses.100001.4cf6=*; __utma=30149280.1097713080.1601989162.1603718808.1603803210.4; __utmb=30149280.0.10.1603803210; __utmc=30149280; __utmz=30149280.1603803210.4.3.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utma=223695111.1702441161.1603710897.1603718808.1603803210.3; __utmb=223695111.0.10.1603803210; __utmc=223695111; __utmz=223695111.1603803210.3.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ap_v=0,6.0',
'Host': 'movie.douban.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
}
response = requests.get(url=url, headers=headers)
html_data = response.text
html_data.encode()
print('获取响应体的文本数据', response.text)
print('获取响应体的二进制数据', response.content)
print('查看响应体的响应头信息', response.headers)
print('指定响应体编码',response.encoding)
print('自动识别响应体编码',response.apparent_encoding)
print('获取响应体的 cookies 字段信息', response.cookies)
print(response.cookies.get_dict())
print('获取响应体的url地址', response.url)
print('获取响应体状态码', response.status_code)
cookie
"""
利用cookie模拟登录
"""
import requests
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
'cookie':''}
res = requests.post(post_url, headers=header)
print(res.text)
session 会话维持
"""
利用session进行cookie共享
代码格式:
"""
import requests
session = requests.Session()
url_login = '登录网址'
header = {}
data = {'UserName':'',
'password':''}
session.post(url_login, headers=header, data=data)
url = '个人页面'
response = session.get(url)
print(response.text)
处理不受信任的SSL证书
- 对于已经被信任的SSL证书的网站用requests库可以正常访问
import requests
import urllib3
urllib3.disable_warnings()
url = 'https://data.stats.gov.cn/'
res = requests.get(url)
print(res.text)
response = requests.get(url=url, verify=False)
print(response.text)