python中selenium应用实例
一个废物大学生的python笔记,共有三个实例:1.郑州某大学的打卡签到脚本2.某宝双十一的剁手脚本3.某省的答题脚本
#这几个脚本运行需要一点python基础(一点点就可以)
1,郑州某大学的打卡签到脚本
#采用解释器是python3
#采用编译器是pycharm
(1)导入模块selenium
打开cmd:
快捷键:窗口键(win键)+R,打开”运行“窗口,输入”cmd“命令并且回车打开,
输入:pip install selenium
等待下载完成即可
(2)导入chromedriver
chromedriver的网址:http://npm.taobao.org/mirrors/chromedriver/找到适配自己的chromedriver并且放入谷歌浏览器的scripts文件夹中,或者放入python的scripts中。
注:这里做的解释有点少,可以找一下相关的博客。
(3)代码编写
导入模块
#导入selenium模块
from selenium import webdriver
from selenium.webdriver.common.keys import Keys片
导入模块之后我们开始分析网页源码:
登录界面如上:可知这个没有验证码和滑块拖动那就十分简单了,输入账号,密码,点击登录即可
def f(x,y): #定义登录函数,x为账号,y为密码
myusername = x #登录账号
mypassword = y #登录密码
driver = webdriver.Chrome() #通过谷歌driver导入网页
driver.get("https://jksb.v.zzu.edu.cn/vls6sss/zzujksb.dll/first0") #网址输入
driver.maximize_window() #将窗口最大化
time.sleep(5) #延时加载(防止因网速过慢导致网址丢失)
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[2]/div[3]/input").send_keys(myusername)
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[3]/div[3]/input").send_keys(mypassword)
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[5]/div/input").click()#模拟点击登录
time.sleep(2)
然后开始解析第二个页面中的填报按钮.click()模拟点击
driver.find_element_by_xpath('/html/body/form/div/div[13]/div[5]/div[4]').click()
但是当你运行时,程序提示找不到该按钮。这是因为,在网页制作时,作者导入了一个iframe框架,而这个按钮正在iframe内部,这时,我们需要导入iframe
driver.switch_to.frame("zzj_top_6s")
driver.find_element_by_xpath('/html/body/form/div/div[13]/div[5]/div[4]').click()
进入最后一个界面,并且模拟点击填报按钮
driver.find_element_by_xpath('/html/body/form/div[1]/div[11]/div[4]').click()
time.sleep(2)
print(myusername) #检验是否填报完成
driver.quit() #退出驱动
总体模块如下:
def f(x,y):
now=str(datetime.datetime.now())
myusername = x#**登录账号**
mypassword = y#**登录密码**
driver = webdriver.Chrome() #模拟浏览器打开网站
driver.get("https://jksb.v.zzu.edu.cn/vls6sss/zzujksb.dll/first0") #**网址输入**
driver.maximize_window() #将窗口最大化
time.sleep(5) #延时加载
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[2]/div[3]/input").send_keys(myusername)
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[3]/div[3]/input").send_keys(mypassword)
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[5]/div/input").click()#模拟点击登录
time.sleep(2)
driver.switch_to.frame("zzj_top_6s")
driver.find_element_by_xpath('/html/body/form/div/div[13]/div[5]/div[4]').click()
time.sleep(2)
driver.find_element_by_xpath('/html/body/form/div[1]/div[11]/div[4]').click()
time.sleep(2)
print(myusername)
driver.quit() #退出驱动
但是当我们填报完成的以后,需要一段检验是否填报完成的代码。
这部借用了一位大佬的代码
def email():
msg_from = '********@qq.com' # 发送方邮箱
passwd = '***********' # 填入发送方邮箱的授权码
msg_to = '1508339765@qq.com' # 收件人邮箱
subject = "健康打卡" # 主题
content = "今日以健康打卡 %s"%now# 正文
msg = MIMEText(content)
msg['Subject'] = subject
msg['From'] = msg_from
msg['To'] = msg_to
try:
s = smtplib.SMTP_SSL("smtp.qq.com", 465)# 邮件服务器及端口号
s.login(msg_from, passwd)
s.sendmail(msg_from, msg_to, msg.as_string())
except s.SMTPException.e:
print( "发送失败")
finally:
s.quit()
最后一步:做一个定时器
import time
delta=timedelta(hours=0.1)
time1="00:40:00.0000"
time2="00:45:00.0000"
while True:
now=str(datetime.datetime.now())
now2=now[11:] #字符串分割
if time1<now2<time2:
#这里使用列表是为了可以导入多人的账号和密码实现一个程序打多次卡
l1[] #在括号内导入账号
l2[] #在括号内输入密码
for x in range(0,len(l1)):
f(l1[x],l2[x])
email()
break
else:
time.sleep(240) #为了减少内存使用240秒循环检验一次
总结以上代码:
from email.mime.text import MIMEText
def f(x,y):
now=str(datetime.datetime.now())
myusername = x
mypassword = y
driver = webdriver.Chrome()
driver.get("https://jksb.v.zzu.edu.cn/vls6sss/zzujksb.dll/first0")
driver.maximize_window()
time.sleep(5)
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[2]/div[3]/input").send_keys(myusername)
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[3]/div[3]/input").send_keys(mypassword)
driver.find_element_by_xpath("/html/body/form/div/div[2]/div[5]/div/input").click()
time.sleep(2)
driver.switch_to.frame("zzj_top_6s")
driver.find_element_by_xpath('/html/body/form/div/div[13]/div[5]/div[4]').click()
time.sleep(2)
driver.find_element_by_xpath('/html/body/form/div[1]/div[11]/div[4]').click()
time.sleep(2)
print(myusername)
driver.quit()
def email():
msg_from = '*********@qq.com'
passwd = '**********'
msg_to = '1508339765@qq.com'
subject = "健康打卡"
content = "今日以健康打卡 %s"%now
msg = MIMEText(content)
msg['Subject'] = subject
msg['From'] = msg_from
msg['To'] = msg_to
try:
s = smtplib.SMTP_SSL("smtp.qq.com", 465)
s.login(msg_from, passwd)
s.sendmail(msg_from, msg_to, msg.as_string())
except s.SMTPException.e:
print( "发送失败")
finally:
s.quit()
delta=timedelta(hours=0.1)
time1="00:40:00.0000"
time2="00:45:00.0000"
while True:
now=str(datetime.datetime.now())
now2=now[11:]
l1=["x","x1","x2"]
l2=["y","y1","y1"]
if time1<now2:
for x in range(0,len(l1)):
f(l1[x],l2[x])
email()
break
else:
time.sleep(240)
2.某宝双十一的剁手脚本
就因为这个脚本博主冲动消费买了个华为watch2,哎,11月果真是一个吃土的一个月(苦涩)。
编写代码:
导入模块:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import requests
from datetime import datetime
登录代码:
browser.get("https://www.taobao.com") #进入网址
time.sleep(3)
if browser.find_element_by_link_text("亲,请登录"): #通过text找到按钮
browser.find_element_by_link_text("亲,请登录").click()
print("15秒内完成扫码登录")
time.sleep(15)
browser.get("https://cart.taobao.com/cart.htm") #转到购物车页面
time.sleep(3)
now = datetime.datetime.now()
print('login success:', now.strftime('%Y-%m-%d %H:%M:%S')) #格式化事件并且打印
建立购买函数:
def buy(time):
while True:
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') #对比时间时间一到就结算
if now > times:
while True:
try:
if browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[2]/div[1]/div/div[1]/div/div/label'): #通过xpath点击全选按钮
browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[2]/div[1]/div/div[1]/div/div/label').click()
break
except:
print("fail")
pass
while True:
try:
if browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[1]/div[1]/a'): #点击结算按钮
browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[1]/div[1]/a').click()
print("success")
except:
print("try again")
while True:
try:
browser.find_element_by_xpath('//*[@id="submitOrderPC_1"]/div/a[2]').click() #点击结算按钮
now1 = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
print("抢购成功:%s" % now1)
break
except:
print("try again")
time.sleep(0.01) #这里还可以设置再小但是你的电脑需要足够好
注:这里感叹一下某宝真是太好了,一个iframe都没设置
总结代码:
from selenium import webdriver
import datetime
import time
def login():
browser.get("https://www.taobao.com/?spm=a2e0b.20350158.1581860521.1.61d4468aFOInE5&pid=mm_26632258_3504122_32538762&union_lens=recoveryid%3A201_11.11.157.222_7363867_1605016281980%3Bprepvid%3A201_11.11.157.222_7363867_1605016281980&clk1=7a5b6b5aee71a0bb3fdd0c89f88f9132")
time.sleep(3)
if browser.find_element_by_link_text("亲,请登录"):
browser.find_element_by_link_text("亲,请登录").click()
print("15秒内完成扫码")
time.sleep(15)
browser.get("https://cart.taobao.com/cart.htm?spm=a21bo.2017.1997525049.1.600111d9idGUwV&from=mini&pm_id=1501036000a02c5c3739")
time.sleep(3)
now = datetime.datetime.now()
print('login success:', now.strftime('%Y-%m-%d %H:%M:%S'))
def buy(times):
while True:
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
if now > times:
while True:
try:
if browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[2]/div[1]/div/div[1]/div/div/label'):
browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[2]/div[1]/div/div[1]/div/div/label').click()
break
except:
print("fail")
# 点击结算按钮
while True:
try:
if browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[1]/div[1]/a'):
browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[1]/div[1]/a').click()
print("success")
except:
print("try again")
while True:
try:
browser.find_element_by_xpath('//*[@id="submitOrderPC_1"]/div/a[2]').click()
now1 = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
print("抢购成功:%s" % now1)
break
except:
print("try again")
time.sleep(0.01)
if __name__ == "__main__":
times = input("请输入抢购时间,格式如(2020-11-11 00:00:00.000000):")
browser = webdriver.Chrome()
browser.maximize_window()
login()
buy(times)
3.某省宗教的答题脚本
这个导入了一个新的模块:xlrd,这是一个专门处理excel的模块
导入模块:
from selenium import webdriver
import time
import xlrd
import re
这里我们考虑一个问题:题库的建立与答案的选择
对于题库,有人专门提供了一个,但需要进行处理
1.选项全部删除
2.取消科学计数法等隐藏格式
最后处理结果如图:
代码编写:
登录页面:
wb=xlrd.open_workbook("宗教知识竞赛题283题20181022(1)(1)(1).xlsx") #打开整理成excel模式的题库
ws=wb.sheet_by_name('建议格式') #打开建议格式这一个工作表
browser = webdriver.Chrome()
browser.get("http://exam.hm86.cn/vip/login/login.php")
browser.maximize_window() #最大化窗口
print("请在45秒内完成登录")
time.sleep(45)
browser.get("http://exam.hm86.cn/web/front/study/examination.php?paperid=1000000001&usercodepaperid=M2T0A2w0M-D1F1f-M1T2AhwmMsDoEfxtMTQ0M18xMDAwMDAwMDAx") #进入答题页面
循环遍历提取题目,并且通过比对题库找出答案
for path in range(1,500):
xpath = '/html/body/div[2]/div[2]/div/table/tbody/tr[%d]/th/font' % path
try:
if browser.find_element_by_xpath(xpath):
element =browser.find_element_by_xpath(xpath)
question=str(element.text)
i=-1
for question_find in ws.col_values(2):
#print("hello")
i=i+1
if question_find1 == question1:
answer=ws.cell_value(i, 3)
#choose(answer,path)
#browser.back()
#browser.refresh()
l.append(answer)
y.append(path)
else:
pass
except:
pass
但你运行结束打印l列表时,发现什么也不打印,也就是说这个题库与题目源码提取出的题目不太一样,但是,你对比题库源码与自己的题库时,发现这个题库的标点符号与自己的题库的有些不一样(有些是英文标点),这时,你可以选择提取文字进行对比:
定义提取文字的函数:
def switch_word(x):
strs = re.sub(u"([^\u4e00-\u9fa5])", "", x) #通过re。sub()函数提取文字
return strs
补充:
pattern | Value |
---|---|
\u4e00-\u9fa5 | 汉字的unicode范围 |
\u0030-\u0039 | 数字的unicode范围 |
\u0041-\u005a | 大写字母unicode范围 |
\u0061-\u007a | 小写字母unicode范围 |
改正代码如下:
for path in range(1,500):
xpath = '/html/body/div[2]/div[2]/div/table/tbody/tr[%d]/th/font' % path
try:
if browser.find_element_by_xpath(xpath):
element =browser.find_element_by_xpath(xpath)
question=str(element.text)
question1=switch_word(question)
i=-1
for question_find in ws.col_values(2):
i=i+1
question_find1=switch_word(question_find)
if question_find1 == question1:
answer=ws.cell_value(i, 3)
l.append(answer)
y.append(path)
else:
pass
except:
pass
选项的判断:
对于任何一个选项有a,b,c,d,正确,和错误六个选项,所以这里加入一个判断函数就可以实现:
def judge(x,a):
if x=='A':
path1 = a + 1
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr["+str(path1)+"]/td[1]/input"
c=browser.find_element_by_xpath(xpath1)
elif x=='B':
path1 = a + 2
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
elif x=='C':
path1 = a + 3
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
elif x=='D':
path1 = a + 4
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
elif x=='正确':
path1 = a + 1
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
elif x=='错误':
path1 = a + 2
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
但是,当你运行时,发现程序报错:找到不到该元素,检擦源码时,发现源码没有任何变化,仔细观察发现,每点击一下,页面会小小的刷新一下,这是element发生了变化,可以通过以下json解决
def judge(x,a):
if x=='A':
path1 = a + 1
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr["+str(path1)+"]/td[1]/input"
c=browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='B':
path1 = a + 2
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='C':
path1 = a + 3
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='D':
path1 = a + 4
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='正确':
path1 = a + 1
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='错误':
path1 = a + 2
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
最后就剩下筛选还有多选题和判断题这个东西
代码如下:
c=len(l)
for a in range(c):
x=l[a]
q=y[a]
if len(x)==1: #判断字符串长度
judge(x[0],q)
elif len(x)==2:
judge(x,q) #易知”正确“字符长度为2,加入一个判断正确字符的judge()函数
judge(x[0],q)
judge(x[1],q)
elif len(x)==3:
judge(x[0],q)
judge(x[1],q)
judge(x[2],q)
elif len(x)==4:
judge(x[0], q)
judge(x[1], q)
judge(x[2], q)
judge(x[3],q)
总结代码:
from selenium import webdriver
import time
import xlrd
import re
def switch_word(x):
strs = re.sub(u"([^\u4e00-\u9fa5])", "", x)
return strs
def judge(x,a):
if x=='A':
path1 = a + 1
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr["+str(path1)+"]/td[1]/input"
c=browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='B':
path1 = a + 2
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='C':
path1 = a + 3
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='D':
path1 = a + 4
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='正确':
path1 = a + 1
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
elif x=='错误':
path1 = a + 2
xpath1 = "/html/body/div[2]/div[2]/div/table/tbody/tr[" + str(path1) + "]/td[1]/input"
c = browser.find_element_by_xpath(xpath1)
browser.execute_script("arguments[0].click();", c)
c=0
l=[]
y=[]
r='[’!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n。!,]+[’!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~]+ '
wb=xlrd.open_workbook("宗教知识竞赛题283题20181022(1)(1)(1).xlsx")
ws=wb.sheet_by_name('建议格式')
browser = webdriver.Chrome()
browser.get("http://exam.hm86.cn/vip/login/login.php")
browser.maximize_window()
time.sleep(45)
browser.get("http://exam.hm86.cn/web/front/study/examination.php?paperid=1000000001&usercodepaperid=M2T0A2w0M-D1F1f-M1T2AhwmMsDoEfxtMTQ0M18xMDAwMDAwMDAx")
for path in range(1,500):
xpath = '/html/body/div[2]/div[2]/div/table/tbody/tr[%d]/th/font' % path
try:
if browser.find_element_by_xpath(xpath):
element =browser.find_element_by_xpath(xpath)
question=str(element.text)
question1=switch_word(question)
i=-1
for question_find in ws.col_values(2):
#print("hello")
i=i+1
question_find1=switch_word(question_find)
if question_find1 == question1:
answer=ws.cell_value(i, 3)
#choose(answer,path)
#browser.back()
#browser.refresh()
l.append(answer)
y.append(path)
else:
pass
except:
pass
c=len(l)
for a in range(c):
x=l[a]
q=y[a]
if len(x)==1:
judge(x[0],q)
elif len(x)==2:
judge(x,q)
judge(x[0],q)
judge(x[1],q)
elif len(x)==3:
judge(x[0],q)
judge(x[1],q)
judge(x[2],q)
elif len(x)==4:
judge(x[0], q)
judge(x[1], q)
judge(x[2], q)
judge(x[3],q)