安装方法:pip3 install requests
实例引入
-
import requests
-
-
response = requests.get(
'https://www.baidu.com/')
#传入一个网址,解析它的源代码
-
print(type(response))
#打印其类型
-
print(response.status_code)
#打印状态码
-
print(type(response.text))
#打印网址源代码的属性
-
print(response.text)
#得到网址的源代码
-
print(response.cookies)
#打印Cookies
-
-
运行结果:
-
①:<
class 'requests.models.Response'>
-
②:200
-
③:<class 'str'>
-
④:<!DOCTYPE html>feedback>æè§åé¦</a> 京ICPè¯030173å· <img src=//www.baidu.com/img/gs.gif> </p> </div> </div> </div> </body> </html>
-
⑤:<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
各种请求方式
-
import requests
-
-
requsets.post(
'http://httpbin.org/post')
-
requests.put(
'http://httpbin.org/put')
-
requests.delete(
'http://httpbin.org/delete')
-
requests.head(
'http://httpbin.org/get')
-
requests.options(
'http://httpbin.org/get')
请求
基本GET请求
基本写法
-
import requests
-
-
response = requests.get(
'http://httpbin.org/get')
#传入一个网址,并解析这个网址
-
print(response.text)
#将这个网址的源代码打印出来
-
-
运行结果:
-
{
-
"args": {},
-
"headers": {
-
"Accept":
"*/*",
-
"Accept-Encoding":
"gzip, deflate",
-
"Connection":
"close",
-
"Host":
"httpbin.org",
-
"User-Agent":
"python-requests/2.19.1"
-
},
-
"origin":
"222.210.218.46",
-
"url":
"http://httpbin.org/get"
-
}
带参数GET请求
-
import requests
-
-
response = requests.get(
'http://httpbin.org/get?name=germey&age=22')
-
print(response.text)
-
-
运行结果:
-
{
-
"args": {
-
"age":
"22",
-
"name":
"germey"
-
},
-
"headers": {
-
"Accept":
"*/*",
-
"Accept-Encoding":
"gzip, deflate",
-
"Connection":
"close",
-
"Host":
"httpbin.org",
-
"User-Agent":
"python-requests/2.19.1"
-
},
-
"origin":
"222.210.218.46",
-
"url":
"http://httpbin.org/get?name=germey&age=22"
-
}
另一种方式传参
-
import requests
-
-
data = {
-
'name' :
'germey',
-
'age' :
22
-
}
-
response = requests.get(
'http://httpbin.org/get',params=data)
#传入一个字典形式的参数
-
print(response.text)
-
-
运行结果:
-
{
-
"args": {
-
"age":
"22",
-
"name":
"germey"
-
},
-
"headers": {
-
"Accept":
"*/*",
-
"Accept-Encoding":
"gzip, deflate",
-
"Connection":
"close",
-
"Host":
"httpbin.org",
-
"User-Agent":
"python-requests/2.19.1"
-
},
-
"origin":
"222.210.218.46",
-
"url":
"http://httpbin.org/get?name=germey&age=22"
-
}
-
-
PS:运行结果与第一种完全一致
解析json
-
import requests
-
import json
-
-
response = requests.get(
'http://httpbin.org/get')
-
print(type(response.text))
-
print(response.json())
#返回json的字符串,如果传入的是json形式的字符串就会返回一个json的对象
-
print(json.loads(response.text))
#返回结果与上面一致
-
print(type(response.json()))
-
-
运行结果:
-
①:<
class 'str'>
-
②:{'args': {},
'headers': {
'Accept':
'*/*',
'Accept-Encoding':
'gzip, deflate',
'Connection':
'close',
'Host':
'httpbin.org',
'User-Agent':
'python-requests/2.19.1'},
'origin':
'222.210.218.46',
'url':
'http://httpbin.org/get'}
-
③:{
'args': {},
'headers': {
'Accept':
'*/*',
'Accept-Encoding':
'gzip, deflate',
'Connection':
'close',
'Host':
'httpbin.org',
'User-Agent':
'python-requests/2.19.1'},
'origin':
'222.210.218.46',
'url':
'http://httpbin.org/get'}
-
④:<
class 'dict'>
获取二进制数据
-
import requests
#下载(获取)图片、视频常用的一个方法
-
-
response = requests.get(
'https://github.com/favicon.ico')
#将图片的链接传入
-
print(type(response.text),type(response.content))
-
print(response.text)
-
print(response.content)
#获取图片的二进制内容
-
-
运行结果:
-
①:<
class 'str'> <class 'bytes'>
-
②::�������O L������ ���
-
③:
b'\x00\x00\x01\x00\x02\x00\x10\x10\......
-
-
import requests
#将获取的图片保存下来
-
-
response = requests.get(
'https://github.com/favicon.ico')
-
with open(
'favicon.ico',
'wb')
as f:
#第一个传入的参数是图片名称,第二个是写入模式
-
f.write(response.content)
-
f.close()
添加headers
-
import requests
-
-
response = requests.get(
'https://www.zhihu.com/explore')
-
print(response.text)
-
-
运行结果:
-
<html>
-
<head><title>
400 Bad Request</title></head>
-
<body bgcolor=
"white">
-
<center><h1>
400 Bad Request</h1></center>
-
<hr><center>openresty</center>
-
</body>
-
</html>
-
-
PS:网站判断headers(浏览器头信息)不正确,拒绝访问
-
import requests
-
-
headers = {
-
'User-Agent' :
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)'
-
}
-
response = requests.get(
'https://www.zhihu.com/explore',headers=headers)
#传入了一个浏览器头
-
print(response.text)
-
-
运行结果:
-
<!DOCTYPE html>
-
<html lang=
"zh-CN" dropEffect=
"none"
class="no-js no-auth ">
-
<head>
-
<meta charset="utf-8" />
-
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
-
<meta name="renderer" content="webkit" />
-
<meta http-equiv="X-ZA-Response-Id" content="cecbafbb28655862d786c4985980a895">
-
......
-
-
PS:加了一个浏览器头信息,网址已正常访问
基本POST请求
-
import requests
-
-
data = {
'name':
'germey',
'age':
'22'}
-
response = requests.post(
'http://httpbin.org/post',data=data)
-
print(response.text)
-
-
运行结果:
-
{
-
"args": {},
-
"data":
"",
-
"files": {},
-
"form": {
-
"age":
"22",
-
"name":
"germey"
-
},
-
"headers": {
-
"Accept":
"*/*",
-
"Accept-Encoding":
"gzip, deflate",
-
"Connection":
"close",
-
"Content-Length":
"18",
-
"Content-Type":
"application/x-www-form-urlencoded",
-
"Host":
"httpbin.org",
-
"User-Agent":
"python-requests/2.19.1"
-
},
-
"json": null,
-
"origin":
"222.210.218.46",
-
"url":
"http://httpbin.org/post"
-
}
-
PS:可以非常方便的传入一个字典实现response操作
-
import requests
-
-
data = {
'name':
'germey',
'age':
'22'}
-
headers={
-
'User-Agent':
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)'
-
}
-
response = requests.post(
'http://httpbin.org/post',data=data,headers=headers)
-
print(response.json())
-
-
运行结果:
-
{
'args': {},
'data':
'',
'files': {},
'form': {
'age':
'22',
'name':
'germey'},
'headers':
-
{
'Accept':
'*/*',
'Accept-Encoding':
'gzip, deflate',
'Connection':
'close',
'Content-
-
Length':
'18',
'Content-Type':
'application/x-www-form-urlencoded',
'Host':
'httpbin.org',
-
'User-Agent':
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X
-
MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)'},
'json':
None,
-
'origin':
'222.210.218.46',
'url':
'http://httpbin.org/post'}
响应
reponse属性
-
import requests
-
-
response = requests.get(
'http://www.jianshu.com')
-
print(type(response.status_code),response.status_code)
#状态码及其类型
-
print(type(response.headers),response.headers)
-
print(type(response.cookies),response.cookies)
-
print(type(response.url),response.url)
-
print(type(response.history),response.history)
#访问的历史记录
-
-
运行结果:
-
①:<
class 'int'> 403
-
②:<class 'requests.structures.CaseInsensitiveDict'> {'Date':
'Wed, 12 Sep 2018 12:11:31
-
GMT',
'Server':
'Tengine',
'Content-Type':
'text/html',
'Transfer-Encoding':
'chunked',
-
'Strict-Transport-Security':
'max-age=31536000; includeSubDomains; preload',
'Content-
-
Encoding':
'gzip',
'X-Via':
'1.1 shandianxin27:8 (Cdn Cache Server V2.0), 1.1 ndianxin72:8
-
(Cdn Cache Server V2.0)',
'Connection':
'keep-alive'}
-
③:<
class 'requests.cookies.RequestsCookieJar'> <RequestsCookieJar[]>
-
④:<class 'str'> https://www.jianshu.com/
-
⑤:<
class 'list'> [<Response [301]>]
高级操作
文件上传
-
import requests
#文件上传用post操作
-
-
files = {
'files':open(
'favicon.ico',
'rb')}
#用files(自由指定上传的文件名称)、open的方法吧文件读取出来
-
response = requests.post(
'http://httpbin.org/post',files=files)
-
print(response.text)
获取cookie
-
import requests
-
-
response = requests.get(
'https://www.baidu.com')
-
print(response.cookies)
-
for key,value
in response.cookies.items():
-
print(key +
'=' + value)
-
-
运行结果:
-
<RequestsCookieJar[<Cookie BDORZ=
27315
for .baidu.com/>]>
-
BDORZ=
27315
会话维持(模拟登录)
-
import requests
-
-
requests.get(
'http://httpbin.org/cookies/set/number/123456789')
#为一个网站访问的时候设置一个cookie
-
response = requests.get(
'http://httpbin.org/cookies')
#访问cookies,就可以拿到网站当前的cookies
-
print(response.text)
-
-
运行结果:
-
{
-
"cookies": {}
-
}
-
-
PS:错误的程序
-
import requests
-
-
s = requests.Session()
-
s.get(
'http://httpbin.org/cookies/set/number/123456789')
#为一个网站访问的时候设置一个cookie
-
response = s.get(
'http://httpbin.org/cookies')
#访问cookies,就可以拿到网站当前的cookies
-
print(response.text)
-
-
运行结果:
-
{
-
"cookies": {
-
"number":
"123456789"
-
}
-
}
证书验证
-
import requests
-
-
response = requests.get(
'https://www.12306.cn')
-
print(response.status_code)
-
-
PS:网站证书错误,程序无法访问网页
-
import requests
-
from requests.packages
import urllib3
#加入这个包和下面这一句话不在显示红色警告
-
urllib3.disable_warnings()
-
-
response = requests.get(
'https://www.12306.cn',verify=
False)
#添加一个verify,将证书验证的步骤省略即可正常访问页面
-
print(response.status_code)
-
-
PS:虽然可以正常的访问网页,但是会有一个红色的警告
-
import requests
-
-
response = requests.get(
'https://www.12306.cn',cert=(
'/path/server.crt',
'/path/key'))
-
print(response.status_code)
-
-
PS:另外可以手动指定一个证书(上面的证书是编的)
代理设置
-
import requests
-
-
proxies = {
-
'http':
'http://127.0.0.1:9743',
-
'https':
'http://127.0.0.1:9743',
-
}
-
-
response = requests.get(
'https://www.taobao.com',proxies=proxies)
-
print(response.status_code)
-
import requests
-
-
proxies = {
-
'http':
'http://user:password@127.0.0.1:19743/',
-
}
-
-
response = requests.get(
'https://www.taobao.com',proxies=proxies)
-
print(response.status_code)
-
-
PS:当代理有用户名和密码的时候用此方法
如果你的代理不是http、https 而是socks代理,需要在python安装 pip3 install 'requests[socks]' 来测试使用代理,代码如下:
-
import requests
-
-
proxies = {
-
'http':
'socks5://127.0.0.1:9742',
-
'https':
'socks5://127.0.0.1:9742',
-
}
-
-
response = requests.get(
'https://www.taobao.com',proxies=proxies)
-
print(response.status_code)
超时设置
-
import requests
-
-
response = requests.get(
'https://www.taobao.com',timeout =
1)
#在规定的时间内没有应答就抛出一个异常
-
print(response.status_code)
认证设置
-
import requests
-
from requests.auth
import HTTPBasicAuth
-
-
r = requests.get(
'http://120.27.34.24:9001',auth=HTTPBasicAuth(
'user',
'123'))
-
print(r.status_code)
-
-
PS:遇到登录验证的网站使用此设置 HTTPBasicAuth里面是账号和密码
-
import requests
-
-
r = requests.get(
'http://120.27.34.24:9001',auth=(
'user',
'123'))
-
print(r.status_code)
-
-
PS:这是另一种方法,结果一样
异常处理
-
import requests
-
from requests.exceptions
import ReadTimeout,HTTPError,ConnectionError,RequestException
-
-
try:
-
response = requests.get(
'http://httpbin.org/get',timeout =
0.5)
-
print(response.status_code)
-
except ReadTimeout:
-
print(
'Timeout')
-
except HTTPError:
-
print(
'Http error')
-
except ConnectionError:
#网络不通
-
print(
'Connection error')
-
except RequestException:
-
print(
'Error')