石墨文档网页视频链接批量保存到百度网盘

1.打开网页,批量保存

其中xpath可能会改变,自行debug更改

from selenium import webdriver
from time import sleep
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
import time
import traceback
import logging

f = open("C:\\yuyang2\\桌面\\yyDownloadError.log",'w+',encoding='utf-8') 
xpathDict = {"jpgc":'//*[@id="root"]/div/div[2]/div[2]/div/div[1]/div/section/div/div/div/div[1]/div/div/div[1]/div[2]/a/div[2]/div[1]/div/div'}
xpathDict['jprb'] = '//*[@id="root"]/div/div[2]/div[2]/div/div[1]/div/section/div/div/div/div[1]/div/div/div[1]/div[3]/a/div[2]/div[1]/div/div'
xpathDict['jphg'] = '//*[@id="root"]/div/div[2]/div[2]/div/div[1]/div/section/div/div/div/div[1]/div/div/div[1]/div[2]/a/div[2]/div[1]/div/div'

driver = webdriver.Chrome("C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe") #拿到 driver
driver.get("https://shimo.im/login?from=home") 
driver.implicitly_wait(15) #隐式等待最长时间5s
waitTime, frequency = 10, 1
driver.find_element_by_class_name("wechat").click()  #微信扫码登陆
WebDriverWait(driver, waitTime, frequency).until(EC.url_to_be("https://shimo.im/desktop"))#等待10s,直到页面跳转
driver.find_element_by_link_text("工作台").click()
jpgc_ele = WebDriverWait(driver,waitTime, frequency).until(EC.presence_of_element_located((By.XPATH,xpathDict['jphg'])))
jpgc_ele.click()  #直到找到橘片国产,点击链接

page_url = driver.page_source  #获取这整个页面html字符串
url_list = re.findall('href="(https://pan.baidu.com.*?)"',page_url)
for i,url in enumerate(url_list):
    print("正在保存第%d个视频!" % (i))
    driver.get(url)  #获取第一条视频链接
    try:
        time.sleep(0.5)
        WebDriverWait(driver,waitTime, frequency).until(EC.presence_of_element_located((By.ID,"accessCode"))).send_keys("WJSY") #填写提取码
    except Exception as e:
        messsage = "链接为%s的第%d个视频保存失败!,错误信息为%s %s\n"%(url,i,e,traceback.format_exc())
        f.write(messsage)
        print(messsage)
        continue
    driver.find_element_by_link_text("提取文件").click()   
    time.sleep(0.1)
    try:
        WebDriverWait(driver,waitTime, frequency).until(EC.presence_of_element_located((By.CLASS_NAME,"EOGexf"))).click()  #选中保存内容
    except Exception as e:
        messsage = "链接为%s的第%d个视频保存失败!,错误信息为%s %s\n"%(url,i,e,traceback.format_exc())
        print(messsage)
        f.write(messsage)
        continue
    driver.find_element_by_link_text("保存到网盘").click()
    if i == 0:
        WebDriverWait(driver,waitTime, frequency).until(EC.presence_of_element_located((By.ID,"TANGRAM__PSP_11__footerQrcodeBtn"))).click()#点击扫一扫
        WebDriverWait(driver,waitTime, frequency).until(EC.presence_of_element_located((By.CLASS_NAME,"user-name"))) #等待知道出现用户名 沧海二阳
        WebDriverWait(driver,waitTime, frequency).until(EC.presence_of_element_located((By.CLASS_NAME,"EOGexf"))).click() #等待手机扫码结束10s内,选中存储文件
        driver.find_element_by_link_text("保存到网盘").click()
    time.sleep(1)
    #移动鼠标到弹窗
    user_info_ele = driver.find_element_by_id("fileTreeDialog")
    ActionChains(driver).move_to_element(user_info_ele).perform()  #开始移动
    selectTree = {'movie':'//div[2]/div/ul/li/ul/li[6]/div/span/span','jlfq':'//div[2]/div/ul/li/ul/li[6]/ul/li[9]/div/span/span','gc':'//div[2]/div/ul/li/ul/li[6]/ul/li[9]/ul/li/div/span/span','rb':'//div[2]/div/ul/li/ul/li[6]/ul/li[9]/ul/li[2]/div/span/span','hg':'//div[2]/div/ul/li/ul/li[6]/ul/li[9]/ul/li[2]/div'}
    for k,xpath in selectTree.items(): #将选择tree的xpath放入字典中
        if k == 'gc'or k == 'rb': #国产已经保存完毕,所以跳过国产
            continue
        WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,xpath))).click()
    driver.find_element_by_css_selector("#fileTreeDialog > div.dialog-footer.g-clearfix > a.g-button.g-button-blue-large > span > span").click()#单击确定
    WebDriverWait(driver,waitTime, frequency).until(EC.presence_of_element_located((By.CLASS_NAME,'trans-tip-view'))) #等待5s内,直到出现点击查看,
driver.quit()

2. 后处理

将被禁的url列表,从网页中找出名字,并重新保存

from selenium import webdriver
from time import sleep
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
import time
import traceback

xpathDict = {}
xpathDict['jphg'] = '//*[@id="root"]/div/div[2]/div[2]/div/div[1]/div/section/div/div/div/div[1]/div/div/div[1]/div[2]/a/div[2]/div[1]/div/div'
driver = webdriver.Chrome("C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe") #拿到 driver
driver.get("https://shimo.im/login?from=home")
driver.implicitly_wait(15) #隐式等待最长时间5s
waitTime, frequency = 10, 1
driver.find_element_by_class_name("wechat").click()  #微信扫码登陆
WebDriverWait(driver, waitTime, frequency).until(EC.url_to_be("https://shimo.im/desktop"))#等待10s,直到页面跳转
driver.find_element_by_link_text("工作台").click()
jpgc_ele = WebDriverWait(driver,waitTime, frequency).until(EC.presence_of_element_located((By.XPATH,xpathDict['jphg'])))
jpgc_ele.click()  #直到找到橘片国产,点击链接

page_url = driver.page_source  #获取这整个页面html字符串

url_list = re.findall('href="(https://pan.baidu.com.*?)"',page_url)
name_list =  re.findall('(【\S+\s*\S*\s*\S*\s*\S*】\s*\S*)链接:*<',page_url)
assert len(url_list) == len(name_list)

selectDict = {}
for url, name in zip(url_list,name_list):
    selectDict[url] = name

save_str = ''
with open("C:\\yuyang2\\桌面\\橘片被禁列表.txt",'r',encoding='utf-8') as f:
    line_list = f.read().splitlines()
    for line in line_list:
        if line == "欧美:":
            save_str += line + '\n'
        if line in selectDict.keys():
            save_str += selectDict[line] +  ":  " + line + '\n'
with open("C:\\yuyang2\\桌面\\橘片被禁列表_yy.txt", 'a+', encoding='utf-8') as f:
    f.write(save_str)

注:可以将名字和url一起打印,则不需后处理

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值