Python 爬取CCTV视频

最新推荐文章于 2023-01-25 19:40:32 发布

垂柳向晚伴残笛

最新推荐文章于 2023-01-25 19:40:32 发布

阅读量2.1k

点赞数

分类专栏： Python学习记录

本文链接：https://blog.csdn.net/csdnsabo/article/details/119539118

版权

Python学习记录专栏收录该内容

26 篇文章 0 订阅

订阅专栏

# !/usr/bin/env python
# -*-coding:utf-8-*-
# date :2021/8/5 15:02
# author:Sabo
# CCTV官网：https://tv.cctv.com/index.shtml

from bs4 import BeautifulSoup as BS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from os import system

def getvideoLinks(htmlSource):
    videoLinksList = []
    videoTitleList = []
    mainPage = BS(htmlSource, "html.parser")
    childPage = mainPage.find("div", attrs={"class":"lcon"}).find_next("ul")
    aTags = childPage.find_all("a")
    for aTag in aTags:
        videoLinksList.append(aTag.get("href"))
    for aTag in aTags:
        videoTitleList.append(aTag.get("title"))
    return videoLinksList, videoTitleList


def switchToNowWindow(driver):
    window_handles = driver.window_handles
    driver.switch_to.window(window_handles[-1])
    return driver

def goToMainUrl(dstUrl, videoName):
    driver = webdriver.Chrome()
    driver.get(dstUrl)
    driver.implicitly_wait(3)
    driver.maximize_window()
    driver = switchToNowWindow(driver)
    driver.find_element_by_id("mytxtdafdfasdf").send_keys(videoName, Keys.ENTER)
    driver = switchToNowWindow(driver)
    print(driver.current_url)
    return driver.page_source

# 利用os模块调用cmd利用you-get指令下载视频
def download(savePath, videoName, videoUrl):
    commond = 'you-get -o {0} -O {1} "{2}"'.format(savePath, videoName, videoUrl)
    print(commond)
    system(commond)


def downloadAll(savePath, videoLinks, videoTitle):
    for index in range(videoLinks.__len__()):
        download(savePath=savePath, videoName=videoTitle[index], videoUrl=videoLinks[index], )


def formatvideoTitle(videoTitles):
    for index in range(videoTitles.__len__()):
        videoTitles[index]=videoTitles[index].replace(" ","-")
    return videoTitles


def main(videoName, savePath):
    savePath += videoName
    # 央视官网首页
    mainUrl = "https://tv.cctv.com/index.shtml"
    page_source = goToMainUrl(dstUrl=mainUrl, videoName=videoName)
    # 网页处理数据
    videoLinksList, videoTitleList = getvideoLinks(htmlSource=page_source)
    videoTitleList = formatvideoTitle(videoTitles=videoTitleList)
    downloadAll(savePath=savePath, videoLinks=videoLinksList, videoTitle=videoTitleList)
    
if __name__ == '__main__':
    # # 初始变量
    # videoName = "舌尖上的中国"
    videoName = str(input("请输入你想下载的视频名字:"))
    savePath = str(input("请输入你要保存的盘符名字（例如D:或者F:等）"))
    # savePath = r"F:/"
    savePath+="/"
    print(savePath)
    main(videoName=videoName, savePath=savePath)

垂柳向晚伴残笛

关注

0
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
Python 爬取CCTV视频

# !/usr/bin/env python# -*-coding:utf-8-*-# date :2021/8/5 15:02# author:Sabo# CCTV官网：https://tv.cctv.com/index.shtmlfrom bs4 import BeautifulSoup as BSfrom selenium import webdriverfrom selenium.webdriver.common.keys import Keysfrom os import sys
复制链接

扫一扫