实现分析
1、通过selenium实现账号登录,下一个视频点击,弹窗点击。使用
2、requests+BeautifulSoup爬取每一节视频的长度,弹窗时间,然后通过pyhton中time库设置延迟视频(长度+10S)的,达到看课的效果。
3、对应第二次刷课,即课程非第一节,需要从上一次未看完的最后一节课程重新开始刷。(例如,上次刷到第7节课7分30秒,还没看完,然后关闭程序,下次打开程序,需要从第7节课0分开始重新看);
弹窗问题:
1、将爬取的弹窗出现时间,可能存在多个弹窗时间,并将时间格式转换为数字格式,处理弹窗之间的时间差,设置相应的延迟。
2、第一次点击弹窗任意选项,然后确定,如果错误,会在弹窗左上角出现正确答案,爬取该答案用于第二次点击。
代码
1.引入库
from selenium import webdriver
import requests
from bs4 import BeautifulSoup
import re
import time
2.登录、选择所选课程
def login(web): #进入所选的课程
web.get("https://passport.zhihuishu.com/login?service=https://onlineservice.zhihuishu.com/login/gologin")
time.sleep(5)
print("1")
web.find_element_by_id("qStudentID").click()
web.find_element_by_id("quickSearch").send_keys("学校名称")
web.find_element_by_tag_name("font").click()
web.find_element_by_id("clCode").send_keys("***********") #学号
web.find_element_by_id("clPassword").send_keys("***********") #密码
web.find_element_by_class_name("wall-sub-btn").click()
time.sleep(30)
web.find_element_by_xpath("//div[@id='sharingClassed']/div[2]/ul/div/dl/dt/div").click()#需要根据所选课程修改‘div[2]’的索引
3.获取当前看的节数即改节对应的id
def get_list(web): #获取视频对应的id
html = BeautifulSoup(web.page_source,"html.parser")
p = r'video-\d\d+'
viedo_num_list=re.findall(p,str(html.find_all("div",id="chapterList"))) #视频的id
return viedo_num_list
def get_dict(web,video_num_dict,video_num_time,video_num_list):
html = BeautifulSoup(web.page_source,"html.parser")
for i in video_num_list:
text = html.find("li",id=i).span.b.text
video_num_dict[text] = i
for i in video_num_list:
time = html.find("li",id=i).div.contents[7].text
time = int(time.split(":")[1])*60+int(time.split(":")[2])
video_num_time[i] = str(time)
print(video_num_dict)
def get_video_number(web): #获取当前看到哪一节
html = BeautifulSoup(web.page_source,"html.parser")
time.sleep(3)
number = html.find("span",id="lessonOrder").text.split("、")[0]
print(number)
return number
4.点击观看视频、点击弹窗、看完点击下一个视频。
def play_now_video(web,number,video_num_dict,video_num_time): #从已看过的视频开始接着看
web.find_element_by_id(video_num_dict[number]).click() #重新点击当前视频
time.sleep(3)
html = BeautifulSoup(web.page_source,"html.parser")
print("已点击")
#点击弹窗
video_time = int(video_num_time[video_num_dict[number]]) #视频时间
window_time_old=[]
window_time_new=[]
window_time=[]
window_time_old = html.find_all("span",id="examDot_undefined") #弹窗时间
try:
for i in window_time_old:
window_time_new.append(int(i.attrs["timenote"].split(":")[1])*60+int(i.attrs["timenote"].split(":")[2]))
for i in window_time_new:
if(i<0):
window_time_new.reverse()
break
last_window_time = window_time_new[-1]
for i in range(len(window_time_new)):
if i == 0:
window_time.append(window_time_new[i])
else:
window_time.append(window_time_new[i]-window_time_new[i-1])
print(window_time)
print(video_time)
print("正在等待弹窗!")
for i in window_time:
time.sleep(i+5)
#切入iframe
web.switch_to_frame(web.find_element_by_id("tmDialog_iframe"))
web.find_elements_by_tag_name("label")[0].click()
time.sleep(2)
web.switch_to_default_content()
time.sleep(2)
print("正在关闭弹窗")
print("AAA!!")
web.find_element_by_xpath("//div[@class='wrap_popboxes tanti_popchapter']/div/div[2]/a/span").click()
print("BBB!!")
except:
print("没有弹窗")
last_window_time = 0
wait_time = (video_time - last_window_time)+5
print("结束弹窗,等待视频结束")
time.sleep(wait_time)
print("该视频观看结束")
def clear_old_video(number,video_num_dict):
for i in list(video_num_dict.keys()):
if i != number:
del video_num_dict[i]
if i == number:
break
4.主方法调用
def main():
web = webdriver.Chrome()
login(web) #登录
time.sleep(5)
#关闭弹窗
#web.find_element_by_xpath("//a[@class='popboxes_close tmui_txt_hidd']").click()
# web.find_element_by_class_name("popbtn_yes").click()
number = get_video_number(web)
video_num_dict = dict() #视频id对应的序列号
video_num_time = dict() #视频对应时间
video_num_list = get_list(web)
time.sleep(3)
get_dict(web,video_num_dict,video_num_time,video_num_list)
clear_old_video(number,video_num_dict) #删除已经看过的视频
#print(video_num_dict,video_num_time)
for i in video_num_dict:
play_now_video(web,i,video_num_dict,video_num_time)
print(i)