1. 下载库
pip install requests==2.24.0
2.urllib基本使用
- 构造请求
import urllib
request = urllib.request.Request("http://www.baidu.com")
- 发送请求获取响应
response = urllib.request.urlopen(request)
- 传入headers参数
headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"}
request = urllib.request.Request(url, headers=headers)
- data参数【post请求】
import urllib.request
import urllib.parse
url = 'http://www.baidu.com'
headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"}
data = {
'a': '11'
}
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url, data=data, headers=headers)
res = urllib.request.urlopen(req)
data = res.read().decode('utf-8')
print(data)
- 获取响应
res = urllib.request.urlopen("http://www.baidu.com")
res.read()
3. requests基本使用
3.1 response 常用属性
属性 | 含义 |
---|
response.text | 响应体 str类型 |
response.encoding | 从HTTP header中猜测的响应内容的编码方式 |
respones.content | 响应体 bytes类型 |
response.status_code | 响应状态码 |
response.request.headers | 响应对应的请求头 |
response.headers | 响应头 |
response.cookies | 响应的cookie(经过了set-cookie动作) |
response.url | 获取访问的url |
response.json() | 获取json数据 得到内容为字典 (如果接口响应体的格式是json格式时) |
response.ok | 如果status_code小于等于200,response.ok返回True。如果status_code大于200,response.ok返回False。 |
3.2 基本使用
- 发送请求
import requests
url = 'https://www.baidu.com'
response = requests.get(url)
- 获取响应内容
- 图片保存
import requests
url = 'https://www.baidu.com/a.png'
response = requests.get(url)
with open('a.png', 'wb') as f:
f.write(response.content)
- 获取响应请求头信息
response.request.headers
- 添加header
headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"}
requests.get(url, headers=headers)
- 添加cookies
- 方式一
headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Cookie":" Pycharm-26c2d973=dbb9b300-2483-478f-9f5a-16ca4580177e; "
}
requests.get(url, headers=headers)
- 方式二
cookies = {
"name":"value",
"name":"value"
}
headers = {
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
}
requests.get(url, headers=headers,cookies=cookies)
- 获取响应cookie字典
response = requests.get(url)
cookies = requests.utils.dict_from_cookiejar(response.cookies)
print(cookies)
- 添加请求参数【get请求】
params = {'name':'你'}
requests.get(url,params=params)
- 添加请求参数【post请求】
data= {'name':'你'}
response = requests.post("http://www.baidu.com/", data = data)
data_resp=response.json()
- session请求
session = requests.session()
response = session.get(url,headers)
- 添加代理ip
- 单个代理
proxies = {
"http": "http://12.34.56.79:9517",
"https": "https://12.34.56.79:9517",
}
requests.get("http://www.baidu.com", proxies = proxies)
- 随机获取代理
import random
proxy = [
{'http':'http://221.178.232.130:8080'},
{'http':'http://221.178.232.130:8080'}
]
proxy = random.choice(proxy)
requests.get("http://www.baidu.com", proxies = proxy )
- 设置证书
response = requests.get(url, verify=False)
- 设置超时参数
response = requests.get(url,timeout=3)
- 关闭请求
reponse.close()