Python 爬取CCTV视频

# !/usr/bin/env python
# -*-coding:utf-8-*-
# date :2021/8/5 15:02
# author:Sabo
# CCTV官网:https://tv.cctv.com/index.shtml

from bs4 import BeautifulSoup as BS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from os import system

def getvideoLinks(htmlSource):
    videoLinksList = []
    videoTitleList = []
    mainPage = BS(htmlSource, "html.parser")
    childPage = mainPage.find("div", attrs={"class":"lcon"}).find_next("ul")
    aTags = childPage.find_all("a")
    for aTag in aTags:
        videoLinksList.append(aTag.get("href"))
    for aTag in aTags:
        videoTitleList.append(aTag.get("title"))
    return videoLinksList, videoTitleList


def switchToNowWindow(driver):
    window_handles = driver.window_handles
    driver.switch_to.window(window_handles[-1])
    return driver

def goToMainUrl(dstUrl, videoName):
    driver = webdriver.Chrome()
    driver.get(dstUrl)
    driver.implicitly_wait(3)
    driver.maximize_window()
    driver = switchToNowWindow(driver)
    driver.find_element_by_id("mytxtdafdfasdf").send_keys(videoName, Keys.ENTER)
    driver = switchToNowWindow(driver)
    print(driver.current_url)
    return driver.page_source

# 利用os模块调用cmd利用you-get指令下载视频
def download(savePath, videoName, videoUrl):
    commond = 'you-get -o {0} -O {1} "{2}"'.format(savePath, videoName, videoUrl)
    print(commond)
    system(commond)


def downloadAll(savePath, videoLinks, videoTitle):
    for index in range(videoLinks.__len__()):
        download(savePath=savePath, videoName=videoTitle[index], videoUrl=videoLinks[index], )


def formatvideoTitle(videoTitles):
    for index in range(videoTitles.__len__()):
        videoTitles[index]=videoTitles[index].replace(" ","-")
    return videoTitles


def main(videoName, savePath):
    savePath += videoName
    # 央视官网首页
    mainUrl = "https://tv.cctv.com/index.shtml"
    page_source = goToMainUrl(dstUrl=mainUrl, videoName=videoName)
    # 网页处理数据
    videoLinksList, videoTitleList = getvideoLinks(htmlSource=page_source)
    videoTitleList = formatvideoTitle(videoTitles=videoTitleList)
    downloadAll(savePath=savePath, videoLinks=videoLinksList, videoTitle=videoTitleList)
    
if __name__ == '__main__':
    # # 初始变量
    # videoName = "舌尖上的中国"
    videoName = str(input("请输入你想下载的视频名字:"))
    savePath = str(input("请输入你要保存的盘符名字(例如D:或者F:等)"))
    # savePath = r"F:/"
    savePath+="/"
    print(savePath)
    main(videoName=videoName, savePath=savePath)

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值