pip3 install requests
验证完成安装 命令行下 import requests 无报错信息即安装成功
一般常用的测试网页为 http://httpbin.org/get
import requests
response = requests.get('http://www.baidu.com')
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text) #网页信息
print(response.cookies)
get请求
import requests
response = requests.get('http://httpbin.org/get') #测试网页
print(response.text)
带参数的get请求
#通过params参数构建url地址
#params前面是逗号
import requests
data = {
'name':'liu',
'age':22
}
response = requests.get('http://httpbin.org/get',params=data)
print(response.text)
打印结果
{
"args": {
"age": "22",
"name": "liu"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "close",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.20.0"
},
"origin": "210.77.180.38",
"url": "http://httpbin.org/get?name=liu&age=22"
}
解析json
import requests
import json
response = requests.get('http://httpbin.org/get')
print(response.text)
print(response.json())
print(json.loads(response.text)) #两次返回结果是一样的
print(type(response.json()))
打印结果
{
"args": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "close",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.20.0"
},
"origin": "210.77.180.38",
"url": "http://httpbin.org/get"
}
{'headers': {'User-Agent': 'python-requests/2.20.0', 'Connection': 'close', 'Accept': '*/*', 'Host': 'httpbin.org', 'Accept-Encoding': 'gzip, deflate'}, 'origin': '210.77.180.38', 'args': {}, 'url': 'http://httpbin.org/get'}
{'headers': {'User-Agent': 'python-requests/2.20.0', 'Connection': 'close', 'Accept': '*/*', 'Host': 'httpbin.org', 'Accept-Encoding': 'gzip, deflate'}, 'origin': '210.77.180.38', 'args': {}, 'url': 'http://httpbin.org/get'}
<class 'dict'>
获取二进制数据
import requests
response = requests.get('http://github.com/favicon.ico')
print(type(response.text),type(response.content))
print(response.text)
print(response.content)
打印结果
下载图片
import requests
response = requests.get('http://github.com/favicon.ico')
with open('favicon.ico','wb') as f:
f.write(response.content) #content 获取二进制数据
f.close()
运行后可在路径下找到下载的图片
#以访问知乎为例
import requests
response = requests.get('https://www.zhihu.com/explore')
print(response.text)
打印结果
<html>
<head><title>400 Bad Request</title></head>
<body bgcolor="white">
<center><h1>400 Bad Request</h1></center>
<hr><center>openresty</center>
</body>
</html>
通过加headers来访问
import requests
headers = {
'User-Agent':..................自行添加.....................
}
response = requests.get('https://www.zhihu.com/explore',headers=headers)
print(response.text)
基本post请求
import requests
data = {
'name':'liu',
'age':22
}
response = requests.post('http://httpbin.org/post',data = data)
print(response.text)
打印结果
{
"args": {},
"data": "",
"files": {},
"form": {
"age": "22",
"name": "liu"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "close",
"Content-Length": "15",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.20.0"
},
"json": null,
"origin": "210.77.180.38",
"url": "http://httpbin.org/post"
}
添加headers 与get方法一样
import requests
data = {
'name':'liu',
'age':22
}
headers = {'User-Agent':.............}
response = requests.post('http://httpbin.org/post',data = data,headers=headers)
print(response.json())
响应
response 属性
import requests
response = requests.get('http://www.baidu.com')
print(type(resopnse.status_code),response.status_code)
print(type(response.headers),response.headers)
print(type(response.cookies),response.cookies)
print(type(response.url),response.url)
print(type(response.history),response.history)
打印结果
<class 'int'> 200
<class 'requests.structures.CaseInsensitiveDict'> {'Content-Type': 'text/html', 'Cache-Control': 'private, no-cache, no-store, proxy-revalidate, no-transform', 'Transfer-Encoding': 'chunked', 'Server': 'bfe/1.0.8.18', 'Content-Encoding': 'gzip', 'Set-Cookie': 'BDORZ=27315; max-age=86400; domain=.baidu.com; path=/', 'Last-Modified': 'Mon, 23 Jan 2017 13:27:36 GMT', 'Date': 'Thu, 08 Nov 2018 07:18:47 GMT', 'Pragma': 'no-cache', 'Connection': 'Keep-Alive'}
<class 'requests.cookies.RequestsCookieJar'> <RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
<class 'str'> http://www.baidu.com/
<class 'list'> []
状态码判断
import requests
response = requests.get('http://www.baidu.com')
exit() if not response.status_code == requests.codes.ok else print('访问成功')
exit() if not response.status_code ==200 else print('访问成功') #可以直接用状态码200替换
打印结果
访问成功
访问成功
高级操作
文件上传
import requests
files = {'file':open('favicon.ico','rb')}
response = requests.post('http://httpbin.org/post',files = files)
print(response.text)
获取cookie
import requests
response = requests.get('http://www.baidu.com')
print(response.cookies)
for key,value in response.cookies.items():
print(key + '='+ value)
返回结果
<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
BDORZ=27315
会话维持
模拟登陆
import requests
requests.get('http://httpbin.org/cookies/set/number/123456') #设置cookies
response = requests.get('http://httpbin.org/cookies')
print(response.text)
打印结果
{
"cookies": {}
}
#运行结果cookies是个空
因为设置set cookies的浏览器和访问获取get的浏览器是两个相对独立的访问行为,所以获取的cookies为空。所以要通过requeset库中session函数实现
通过session对象在同一个浏览器中发起两次get请求来实现
import requests
s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456')
response = s.get('http://httpbin.org/cookies')
print(response.text)
打印结果
{
"cookies": {
"number": "123456"
}
}
证书验证
import requests
response = requests.get('https://www.12306.cn')
print(response.status_code)
import requests
response = requests.get('https://www.12306.cn',verify=False) #verify设置为false 即取消证书认证默认为true
print(response.status_code)
打印结果
200
c:\users\elric\appdata\local\programs\python\python35-32\lib\site-packages\urllib3\connectionpool.py:847: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
InsecureRequestWarning)
c:\users\elric\appdata\local\programs\python\python35-32\lib\site-packages\urllib3\connectionpool.py:847: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
InsecureRequestWarning)
因为会有警告信息 所以需要引入urllib3中的告警不可用设置 即可取消
import requests
from requests.packages import urllib3
urllib3.disable_warnings() #包中的告警不可用
response = requests.get('https://www.12306.cn',verify=False) #verify设置为false 即取消证书认证默认为true
print(response.status_code)
打印结果
200
添加本地证书信息
import requests
response = requests.get('https://www.12306.cn',cert =( '/path/server.crt','/path/key')
#将本地的证书路径添加进去后不再有告警
代理设置
import requests
proxies = {
'http':'http;//代理地址',
'https':'https://..代理地址'
}
response = requests.get('http://xxxxx.com',proxies=proxies)
print(response.status_code)
#有用户名和密码的情况
import requests
proxies = {
'http';'http://user:password@代理地址',
'https://..代理地址'
}
response = requests.get('http://xxxxx.com',proxies=proxies)
print(response.status_code)
#如果不是http或https代理 需要设置socks代理
import requests
#pip3 install 'requests(socks)'
proxies = {
'http':'socks5://代理地址',
'https://socks5://..代理地址'
}
response = requests.get('http://xxxxx.com',proxies=proxies)
print(response.status_code)
超时设置 #设置time out
import requests
response = requests.get('https://www.taobao.com',timeout = 1) #一秒内应答
print(response.status_code)
如果网站1秒内未响应,则会提示timeout。提示信息后会继续运行
异常处理
import requests
from requests.exceptions import ReadTimeout
try:
response = requests.get('https://httpbin.org/get',timeout = 0.5)
print(response.status_code)
except ReadTimeout:
print('Timeout')
打印结果
Timeout
#通过try 来捕获异常信息
认证设置 遇到需要输入用户名密码的情况
import requests
from requests.auth import HTTPBasicAuth
r = requests.get('http://.123..23',auth = HTTPBasicAuth('user','123'))
print(r.status_code)
import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException
try:
response = requests.get('http://httpbin.org/get',timeout = 0.6)
print(response.status_code)
except ReadTimeout:
print('Timeout')
except HTTPError:
print('HTTPError')
except RequestException:
print('Error')