# !/usr/bin/env python
# -*-coding:utf-8-*-
# date :2021/8/5 15:02
# author:Sabo
# CCTV官网:https://tv.cctv.com/index.shtml
from bs4 import BeautifulSoup as BS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from os import system
def getvideoLinks(htmlSource):
videoLinksList = []
videoTitleList = []
mainPage = BS(htmlSource, "html.parser")
childPage = mainPage.find("div", attrs={"class":"lcon"}).find_next("ul")
aTags = childPage.find_all("a")
for aTag in aTags:
videoLinksList.append(aTag.get("href"))
for aTag in aTags:
videoTitleList.append(aTag.get("title"))
return videoLinksList, videoTitleList
def switchToNowWindow(driver):
window_handles = driver.window_handles
driver.switch_to.window(window_handles[-1])
return driver
def goToMainUrl(dstUrl, videoName):
driver = webdriver.Chrome()
driver.get(dstUrl)
driver.implicitly_wait(3)
driver.maximize_window()
driver = switchToNowWindow(driver)
driver.find_element_by_id("mytxtdafdfasdf").send_keys(videoName, Keys.ENTER)
driver = switchToNowWindow(driver)
print(driver.current_url)
return driver.page_source
# 利用os模块调用cmd利用you-get指令下载视频
def download(savePath, videoName, videoUrl):
commond = 'you-get -o {0} -O {1} "{2}"'.format(savePath, videoName, videoUrl)
print(commond)
system(commond)
def downloadAll(savePath, videoLinks, videoTitle):
for index in range(videoLinks.__len__()):
download(savePath=savePath, videoName=videoTitle[index], videoUrl=videoLinks[index], )
def formatvideoTitle(videoTitles):
for index in range(videoTitles.__len__()):
videoTitles[index]=videoTitles[index].replace(" ","-")
return videoTitles
def main(videoName, savePath):
savePath += videoName
# 央视官网首页
mainUrl = "https://tv.cctv.com/index.shtml"
page_source = goToMainUrl(dstUrl=mainUrl, videoName=videoName)
# 网页处理数据
videoLinksList, videoTitleList = getvideoLinks(htmlSource=page_source)
videoTitleList = formatvideoTitle(videoTitles=videoTitleList)
downloadAll(savePath=savePath, videoLinks=videoLinksList, videoTitle=videoTitleList)
if __name__ == '__main__':
# # 初始变量
# videoName = "舌尖上的中国"
videoName = str(input("请输入你想下载的视频名字:"))
savePath = str(input("请输入你要保存的盘符名字(例如D:或者F:等)"))
# savePath = r"F:/"
savePath+="/"
print(savePath)
main(videoName=videoName, savePath=savePath)
Python 爬取CCTV视频
最新推荐文章于 2023-01-25 19:40:32 发布