requests模块
用于http请求的模块 ,用于网页数据抓取
七个主要方法
请求页面r = requests.get('https://api.github.com/events') #不传参
#post请求data做为键名
r = requests.post('https://httpbin.org/post', data = {'key':'value'})
这两种是最常 见的,其它不常见的
>>> r = requests.put('https://httpbin.org/put', data = {'key':'value'})
>>> r = requests.delete('https://httpbin.org/delete')
>>> r = requests.head('https://httpbin.org/get')
>>> r = requests.options('https://httpbin.org/get')
传参一个字典
payload = {'key1': 'value1', 'key2': 'value2'}
r = requests.get('https://httpbin.org/get', params=payload)
payload = {'key1': 'value1', 'key2': ['value2', 'value3']}
r = requests.get('https://httpbin.org/get', params=payload)
print(r.url)
结果:key2做为后两个值的key键,用&连接
https://httpbin.org/get?key1=value1&key2=value2&key2=value3
应答Responsser = requests.get('https://api.github.com/events')
print(r.text) //网页对应的内容
print(r.encoding) //编码格式
print(r.json()) //取得返回的json数据
返回原始套接字内容,要在请求的时候加 stream=True
r = requests.get('https://api.github.com/events', stream=True)
r.raw #
r.raw.read(10) #'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03'
保存到文件
with open(filename, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
自定义 Headers
get请求的时候用headers做为键名url = 'https://api.github.com/some/endpoint'
headers = {'user-agent': 'my-app/0.0.1'}
r = requests.get(url, headers=headers)
传递json编码的数据
先导入json模块,然后用json.dumps转换字典数据import json
url = 'https://api.github.com/some/endpoint'
payload = {'some': 'data'}
r = requests.post(url, data=json.dumps(payload))
上传文件
用files做为键名。url = 'https://httpbin.org/post'
files = {'file': open('report.xls', 'rb')}
r = requests.post(url, files=files)
r.text
cookie的应用url = 'http://example.com/some/cookie/setting/url'
r = requests.get(url)
print(r.cookies['key'])
发送cookie到服务器
url = 'https://httpbin.org/cookies'
cookies = dict(cookies_are='working')
r = requests.get(url, cookies=cookies)
print(r.text)
增加cookie
>>> jar = requests.cookies.RequestsCookieJar()
>>> jar.set('tasty_cookie', 'yum', domain='httpbin.org', path='/cookies')
>>> jar.set('gross_cookie', 'blech', domain='httpbin.org', path='/elsewhere')
>>> url = 'https://httpbin.org/cookies'
>>> r = requests.get(url, cookies=jar)
>>> r.text
#'{"cookies": {"tasty_cookie": "yum"}}'
r.history返回重定向的列表
python中的**
(**)将接收到的参数存入一个字典def foo(**kwargs):
for key, value in kwargs.items():
print("%s=%s" % (key, value))
if __name__ == '__main__':
foo(a=1, b=2, c=3)
输出结果:
a=1
b=2
c=3
python中的*
把任意长度字符串输出到元组中def foo(*kwargs):
print(kwargs)
if __name__ == '__main__':
foo(1,3)
输出结果:(1, 3)
Python中的各种括号
{}表示字典数据类型if __name__ == '__main__':
dic = {'jon': 'boy', 'lili"': 'girl'}
print(dic)
结果为:
{'jon': 'boy', 'lili"': 'girl'}
[]表示可变的序列,()是无组,不可变序列