selenium很常见的一个用法:通过cookie跳过登陆和图片/短信/验证码验证等,直接处于登陆状态。
看似简单,但操作起来cookie却总有问题。
首先用豆瓣做测试,整个思路就是:
1. 先使用selenium加载一个webdriver,通过driver加载豆瓣首页,点击登陆进行登陆验证,登陆成功后会保存用户信息到cookie
2. 将浏览器cookie保存到本地,然后删除浏览器cookie,刷新页面,发现用户已经登出
3. 将本地cookie添加进浏览器cookie中,再次刷新页面,用户已经处于登陆状态
踩到的坑有:
1. 豆瓣登陆页面内嵌了一个iframe,因此要获取到这个frame中的元素,要先切换一下框架,用switch_to.frame()就可以
2. 从浏览器中取出的cookie格式为
[{"domain": "", "path": "", "expiry": "", "name": "", "httpOnly": "", "secure": "", "value": ""},
{"domain": "", "path": "", "expiry": "", "name": "", "httpOnly": "", "secure": "", "value": ""},
{"domain": "", "path": "", "expiry": "", "name": "", "httpOnly": "", "secure": "", "value": ""},
{"domain": "", "path": "", "expiry": "", "name": "", "httpOnly": "", "secure": "", "value": ""},
......]
类似这种格式,有很多个cookie,不知道哪个才是新生成的,试着取第一个或者最后一个添加,都没有用,所以这种情况下只好全部添加进cookie,相当于原封不动地还原cookie
3. 原封不动还原cookie又遇到了格式问题,取出cookie后写入文件C:\Users\Lulu\Desktop\cookie.txt"中,是将cookies列表中的cookie一个个取出单独写入的,一个cookie为一行,格式如下:
因此读出时必须一行行读取,然后每个cookie单独做添加处理
4. 此时又出现了问题,cookie的expiry字段总是在添加的时候报错“invalid argument: invalid 'expiry”,这个错误是由于expiry格式不符合要求引起的,说格式不能为float格式,网上有人将expiry置为空“”,试了一下格式依然不对。所以我又抱着测试的心态把expiry都截成整数,虽然不报格式错误了,但不出所料,这样添加的cookie不能起到认证用户的作用,豆瓣依然是未登陆状态。
5. 到这里已经有点崩溃了,明明网上搜“cookie登陆”代码一大堆,大家的方法大同小异,为什么一样的代码我却不可以?我觉得根源还是出在expiry上,不知道是不是浏览器版本的原因,其他人很少有遇到invalid "expiry"这种错误。最后终于在stackoverflow上找到一个解决办法,stackoverflow的同道中人,最后一个回答,答主表示他通过删掉expiry这个字段解决了问题......这个方法虽然很那个,但尝试了一下果然可以......
完整代码如下,注释掉的是我走过的弯路
import json
from selenium import webdriver
from time import sleep
import pytest
class Test_selenimu1:
def setup(self):
self.driver = webdriver.Chrome(r"D:\Program Files\chromedriver.exe")
self.driver.get(r"https://douban.com")
@pytest.mark.skip(msg="先不用")
def test_cookies(self):
driver = self.driver
driver.implicitly_wait(10)
#新版豆瓣的源代码特性,加了一个iframe框架,直接获取只能取到整个页面的元素,因此要先切换一下frame
#首先切换一下框架,切换到输入密码这个框架上来
driver.switch_to.frame(driver.find_elements_by_tag_name("iframe")[0])
driver.find_element_by_xpath("/html/body/div[1]/div[1]/ul[1]/li[2]").click()
driver.find_element_by_id("username").send_keys("***********")
driver.find_element_by_id("password").send_keys("***********")
#等待10秒,手动登陆
sleep(10)
cookies = driver.get_cookies()
with open(r"C:\Users\Lulu\Desktop\cookie.txt", 'w') as file1:
for cookie in cookies:
file1.write(json.dumps(cookie) + "\n")
# @pytest.mark.skip(msg="先不用")
def test_case2(self):
driver = self.driver
driver.get(r"https://douban.com")
driver.implicitly_wait(10)
with open(r"C:\Users\Lulu\Desktop\cookie.txt", 'r') as file2:
res = file2.readlines()
for line in res:
# print(line)
cookie = json.loads(line)
# print(cookie)
if "expiry" in cookie:
del cookie["expiry"]
driver.add_cookie(cookie)
# for line in file2.readlines():
# res = line.strip()
# print(res)
# mycookie = json.loads(res)
# print(mycookie)
# driver.add_cookie(mycookie)
# res = file2.readline()
# mycookies = json.loads(res)
driver.refresh()
sleep(3)
def teardown(self):
self.driver.quit()
遗留问题:从fiddler和浏览器控制台读到的cookie跟通过selenuim接口取出来的cookie不一样,网上找到一篇文章https://www.jianshu.com/p/32e4a7cc2ddd,说这些格式不同的cookie是可以互相转化的,虽然还不知道要怎么转化|= =,以后再研究吧
顺带,又测试了一下百度和b站,这种删掉expiry的方法都行得通,反正也不是重要字段,只是过期时间就无所谓吧
代码都扔上来备份吧
from selenium import webdriver
from time import sleep
import json
import pytest
class Test_bilibili:
def setup(self):
self.driver = webdriver.Chrome(r"D:\Program Files\chromedriver.exe")
self.driver.get(r"https://passport.bilibili.com/login")
def test_1(self):
driver = self.driver
driver.implicitly_wait(10)
driver.find_element_by_id("login-username").send_keys("**********")
driver.find_element_by_id("login-passwd").send_keys("***********")
driver.find_element_by_xpath('//li[@class="btn-box"]/a').click()
sleep(10)
cookies = driver.get_cookies()
with open(r"C:\Users\Lulu\Desktop\bilibili_cookies.txt", "w") as file:
file.write(json.dumps(cookies))
# for cookie in cookies:
# cookiedict = {
# "domain": "bilibili.com",
# # "expiry": int(cookie["expiry"]),
# "httpOnly": cookie["httpOnly"],
# "name": cookie["name"],
# "path": "/",
# "secure": cookie["secure"],
# "value": cookie["value"]
# }
driver.delete_all_cookies()
driver.refresh()
sleep(10)
for t in cookies:
if "expiry" in t:
del t["expiry"]
driver.add_cookie(t)
# driver.add_cookie(cookiedict)
# driver.add_cookie({"domain": domain, "expiry":expiry, "httpOnly": httpOnly, "name": name, "path": path, "secure": secure, "value": value})
driver.refresh()
# print(driver.get_cookies()[0])
sleep(5)
driver.find_element_by_link_text("历史").click()
sleep(5)
def teardown(self):
self.driver.quit()
from selenium import webdriver
from time import sleep
import json
import pytest
class TestBaidu:
def setup(self):
self.driver = webdriver.Chrome(r"D:\Program Files\chromedriver.exe")
self.driver.get(r"https://www.baidu.com")
def test_baidu_cookie(self):
driver = self.driver
driver.implicitly_wait(10)
driver.find_element_by_xpath("//*[@id='u1']/a[7]").click()
driver.find_element_by_id("TANGRAM__PSP_10__footerULoginBtn").click()
driver.find_element_by_id("TANGRAM__PSP_10__userName").send_keys("**********")
driver.find_element_by_id("TANGRAM__PSP_10__password").send_keys("**********")
sleep(5)
driver.find_element_by_id("TANGRAM__PSP_10__submit").click()
sleep(40)
cookies = driver.get_cookies()
print(cookies)
driver.delete_all_cookies()
driver.refresh()
sleep(5)
for t in cookies:
if 'expiry' in t:
del t["expiry"]
driver.add_cookie(t)
driver.refresh()
sleep(5)
def teardown(self):
self.driver.quit()