requests库
request库比urlllib好用的多
实例
import requests
response=requests.get("http://www.baidu.com/")
print(response.status_code)
print(response.txt)
print(type(response.txt))
print(response.cookies)
request库实现各种请求
import requests
requests.get("http://www.baidu.com/")
requests.post("http://www.baidu.com/")
requests.delete("http://www.baidu.com/")
requests.head("http://www.baidu.com/")
requests.options("http://www.baidu.com/")
基本get请求
import requests
response=requests.get("http://www.baidu.com/")
print(response.txt)#打印百度的源代码
带参数的get请求
import requests
response=requests.get("http://httpbin.org/get?name=germey&age=22")
print(response.txt)
等价于
import requests
data={
''name':'germey'
'age':22
}
response=requests.get("http://httpbin.org/get?",params=data)
print(response.txt)
解析json
import requests
import json
response=requests.get("http://httpbin.org/get")
print(type(response.text))
print(response.json())
print(json.loads(response.text))
print(type(response.json()))
获取二进制数据
对于text以及content的理解
response.text 返回的是一个 unicode 型的文本数据
response.content 返回的是 bytes 型的二进制数据
也就是说如果想取文本数据可以通过response.text 如果想取图片,文件,则可以通过 response.content
import requests
resopnse=requests.get("https://github.com/favicon.ico")
print(type(resopnse.text),type(resopnse.content))
# <class 'str'>
#<class'bytes'>
print(resopnse.text)
print(resopnse.content)
保存图片
import requests
resopnse=requests.get("https://github.com/favicon.ico")
with open("gitub.ico",'wb') as f:
f.write(resopnse.content)
f.close()
没有头部信息会报500错误
import requests
response=requests.get("https://www.zhihu.com/explore")#知乎官网
print(response.text)
返回的代码为
400 Bad Request
加头部信息
import requests
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
}
response=requests.get("https://www.zhihu.com/explore",headers=headers)
print(response.text)
加入头部信息,便可成功打印知乎的源代码
post请求
import requests
data={'name':'germey','age':'22'}
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
}
response=requests.post('https://www.org/post',data=data,headers=headers)
print(response.json())
响应
response属性
import requests
response=requests.get("http://www.jianshu.com")
print(type(response.status_code),response.status_code)
print(type(response.headers),response.headers)
print(type(response.cookies),response.cookies)
print(type(response.url),response.url)
print(type(response.history),response.history)#访问历史记录
状态码判断
import requests
response=requests.get("http://www.jianshu.com")
exit() if not response.status_code==requests.codes.ok else print("request successfully")
状态码常用于判断请求是否成功,而requests还提供了一个内置的状态码查询对象requests.codes,通过比较返回码和内置的成功的返回码,来保证请求得到正常响应,输出成功请求的信息,否则终止程序。
内置码
request高级操作
1.文件上传
import requests
files={‘file’:open('gitub.ico','rb')}
response=requests.post("http://httpbin.org/post",files=files)
print(response.text)
2.获取cookie
import requests
response=requests.get("http://www.jianshu.com")
print(response.cookies)
for key ,value in response.cookies.items():
print(key+'='+value)
3.会话维持(用于模拟登录)
import requests
requests.get('http://httpbin.org/cookies/set/number/123456789')
response=requests.get("http://httpbin.org/cookies")
print(response.text)# 得不到,发起了两次请求,每个请求是独立的,是不可以实现会话维持的
返回结果:
{
“cookies”: { }
}
import requests
s=requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
response=s.get("http://httpbin.org/cookies")
print(response.text)
Session可以做到模拟会话而不用担心cookies的问题,他通常用于模拟登录成功之后在进行的下一步操作
返回结果:
{
“cookies”: {
“number”: “123456789”
}
}
证书验证
import requests
response=requests.get("https://www.12306.cn")
print(response.status_code)
import requests
from requests.packages import urllib3
urllib.disable_warmings()# 可以消除警告,你可以把这行代码消除看一下运行结果
response=requests.get("https://www.12306.cn",verfy=False)
print(response.status_code)
#这样就可以避免证书验证了
超时设置
import requests
response=requests.get("https://www.12306.cn",timeout=0.01)
print(response.status_code)
这里的timeout参数是连接和读取的总和,如果想要分别指定可以在timeout中传入元组,如timeout=(5,11,30)
认证设置
当我们在访问某一个网址时,需要让我们登录之后才可以访问该网址,这时我们模拟登录时,就需要用到requests自带的身份认证功能
import requests
from requests.auth import HTTPBasicAuth
r=requests.get("http://120.27.34.24:9001",auth=HTTPBasicAuth('user','123'))
print(r.status_code)
等价于
import requests
from requests.auth import HTTPBasicAuth
r=requests.get("http://120.27.34.24:9001",auth=('user','123'))
print(r.status_code)
异常处理
import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException
try:
response=requests.get("http://httpbin.org/get",timeout=0.5)
print(response.status_code)
except ReadTimeout:
print('timeout')
except ConnectionError:
print('ConnectionError')
except RequestExceptin:
print("Error")