爬虫 cookies 操作
requests
import requests
from requests.cookies import RequestsCookieJar
session = requests.session()
driver_cookies = [{'domain': 'web.immomo.com', 'httpOnly': False, 'name': '111', 'path': '/', 'secure': False, 'value': '111'}]
# 添加 cookie
cookies = RequestsCookieJar()
for i in driver_cookies:
cookies.set(name=i["name"], value=i["value"], domain=i["domain"])
session.cookies = cookies
# 获取 cookie
session.cookies.get_dict()
urllib
import urllib.parse
import urllib.request
headers = {
"accept": "",
"accept-encoding": "",
"accept-language": "",
"content-type": "",
"origin": "",
"user-agent": "",
}
driver_cookies = [{'domain': 'web.immomo.com', 'httpOnly': False, 'name': '111', 'path': '/', 'secure': False, 'value': '111'}]
for i in driver_cookies:
cookie = cookie + i["name"] + i["value"] + ";"
param = {"param": {"key": "value"}}
# 添加 cookie
headers["cookie"] = cookie
data = bytes(urllib.parse.urlencode(param=param, encoding="utf-8"))
req = urllib.request.Request(url=url, data=data, headers=headers, method="POST")
response = urllib.request.urlopen(req, context=ssl.SSLContext(ssl.PROTOCOL_SSLv23))
selenium
from selenium import webdriver
opt = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=opt, executable_path=path)
# 获取 cookie
driver_cookies = driver.get_cookies()
cookies = [{'domain': 'web.immomo.com', 'httpOnly': False, 'name': '111', 'path': '/', 'secure': False, 'value': '111'}]
# 添加 cookie
for cookie in cookies:
driver.add_cookie(cookie_dict=cookie)
# 删除 cookie
driver.delete_all_cookies()
scrapy
三种方式
-
settings.py
当COOKIES_ENABLED是注释的时候scrapy默认没有开启cookie
当COOKIES_ENABLED没有注释设置为False的时候scrapy默认使用了settings里面的cookie
当COOKIES_ENABLED设置为True的时候scrapy就会把settings的cookie关掉,使用自定义cookie
-
将 settings.py 中的 COOKIES_ENABLED = False 解注释
在 DEFAULT_REQUEST_HEADERS 中添加 cookie
-
将 settings.py 中的 COOKIES_ENABLED = False 改为 True
需要在 请求中加入 cookie
-
-
在中间键 middlewares.py 中的 DownloaderMiddleware 中添加 cookie
在 DownloaderMiddleware 类的 process_request() 方法中添加 cookie
request.cookies = { "key1": "value", "key2": "value" }
-
重写 start_requests() 方法
class GaokaoSpider(scrapy.Spider): def start_requests(self): cookie = { "key1": "value", "key2": "value" } yield scarapy.Reqeust(url=url, headers=headers, cookies=cookies, callback=self.parse)
将 settings.py 中的 COOKIE_ENABLED 设置为 True