1.1 requests库
1.1.1 包含的所有方法
['ConnectTimeout', 'ConnectionError', 'DependencyWarning', 'FileModeWarning', 'HTTPError', 'NullHandler', 'PreparedRequest', 'ReadTimeout', 'Request', 'RequestException', 'RequestsDependencyWarning', 'Response', 'Session', 'Timeout', 'TooManyRedirects', 'URLRequired', '__author__', '__author_email__', '__build__', '__builtins__', '__cached__', '__cake__', '__copyright__', '__description__', '__doc__', '__file__', '__license__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__title__', '__url__', '__version__', '_internal_utils', 'adapters', 'api', 'auth', 'certs', 'chardet', 'check_compatibility', 'codes', 'compat', 'cookies', 'delete', 'exceptions', 'get', 'head', 'hooks', 'logging', 'models', 'options', 'packages', 'patch', 'post', 'put', 'request', 'session', 'sessions', 'status_codes', 'structures', 'urllib3', 'utils', 'warnings']
模拟不同的浏览器
1.1.2 get请求返回的内容
import requests
url = 'https://www.baidu.com'
con = requests.get(url)
print(dir(con))
print(con.status_code) # 返回HTTP状态码
print(con.text)
print(con.url)
print(con.request) # <PreparedRequest [GET]>
print(con.reason) # ok
print(con.raw) # ok
print(con.raw) # <urllib3.response.HTTPResponse object at 0x10742cac8>
print(con.raise_for_status) # <bound method Response.raise_for_status of <Response [200]>>
print(con.ok) # True
print(con.next) # None
print(con.links) # None
print(con.json) # <bound method Response.json of <Response [200]>>
con.headers 响应的头部信息
{'Cache-Control': 'private, no-cache, no-store, proxy-revalidate, no-transform', 'Connection': 'Keep-Alive', 'Content-Encoding': 'gzip', 'Content-Type': 'text/html', 'Date': 'Thu, 15 Feb 2018 08:53:58 GMT', 'Last-Modified': 'Mon, 23 Jan 2017 13:23:56 GMT', 'Pragma': 'no-cache', 'Server': 'bfe/1.0.8.18', 'Set-Cookie': 'BDORZ=27315; max-age=86400; domain=.baidu.com; path=/', 'Transfer-Encoding': 'chunked'}
1.1.3 带参数的get请求
import requests
url = 'http://vselsdo.top/dst/accen'
queryData = {'name':'tom', 'addr':'usa'}
con = requests.get(url, params=queryData)
print(con.url) # http://vselsdo.top/dst/accen/?name=tom&addr=usa
print(con.json()) # 获得的返回值以json的格式查看
print(con.json()['obj']) # 获得返回值里的obj属性
# print(con.text) # 获得返回值的文本
1.1.4 带参数的post请求
import requests
url = 'http://127.0.0.1:3000/login'
queryData = {'name':'tom', 'addr':'usa'}
con = requests.post(url, data=queryData)
print(con.url) # http://127.0.0.1:3000/login
print(con.json())
print(con.json()['data'])
1.1.5 cookie处理
import requests
url = 'https://baidu.com'
con = requests.get(url)
print(con.headers)
{
'Server': 'bfe/1.0.8.18',
'Date': 'Thu, 15 Feb 2018 13:03:25 GMT',
'Content-Type': 'text/html',
'Last-Modified': 'Mon, 23 Jan 2017 13:27:36 GMT',
'Transfer-Encoding': 'chunked',
'Connection': 'Keep-Alive',
'Cache-Control': 'private, no-cache, no-store, proxy-revalidate, no-transform',
'Pragma': 'no-cache',
'Set-Cookie': 'BDORZ=27315; max-age=86400; domain=.baidu.com; path=/',
'Content-Encoding': 'gzip'
}
import requests
url = 'https://douban.com'
con = requests.get(url)
print(con.headers)
print('-----')
for k in con.cookies.keys():
print(k, con.cookies[k])
{
'Date': 'Thu, 15 Feb 2018 13:29:11 GMT',
'Content-Type': 'text/html; charset=utf-8',
'Transfer-Encoding': 'chunked',
'Connection': 'keep-alive',
'Keep-Alive': 'timeout=30',
'Vary': 'Accept-Encoding',
'X-Xss-Protection': '1; mode=block',
'X-Douban-Mobileapp': '0',
'Expires': 'Sun, 1 Jan 2006 01:00:00 GMT',
'Pragma': 'no-cache',
'Cache-Control': 'must-revalidate, no-cache, private',
'Set-Cookie': 'll="118281"; path=/; domain=.douban.com; expires=Fri, 15-Feb-2019 13:29:11 GMT, bid=niaUp9lxmKI; Expires=Fri, 15-Feb-19 13:29:11 GMT; Domain=.douban.com; Path=/',
'X-DOUBAN-NEWBID': 'niaUp9lxmKI',
'X-DAE-Node': 'daisy4b',
'X-DAE-App': 'sns',
'Server': 'dae',
'Strict-Transport-Security': 'max-age=15552000;',
'Content-Encoding': 'gzip'
}
bid niaUp9lxmKI
ll "118281"
1.1.6 二进制内容处理
from PIL import Image # 如果pip3 install PIL安装不成功, 可用Pillow替代
from io import BytesIO
import requests
img = requests.get('https://2-im.guokr.com/H9d7zRsGDQ3runVrTnCPXtJMFUm2YuoAMQqLyvup_S0MAQAAyQAAAEpQ.jpg?imageView2/1/w/135/h/90')
img = Image.open(BytesIO(img.content))
img.save('test.jpg')