1.下载视频-精彩音乐汇
from bs4 import BeautifulSoup as BS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from os import system
from os import listdir
from os import mkdir
def getvideoLinks(htmlSource):
videoLinksList = []
videoTitleList = []
mainPage = BS(htmlSource, "html.parser")
childPage = mainPage.find("div", attrs={"class": "timg"}).find_next("ul")
aTags = childPage.find_all("a")
for aTag in aTags:
videoLinksList.append(aTag.get("lanmu1"))
for aTag in aTags:
videoTitleList.append(aTag.get("title"))
return videoLinksList, videoTitleList
def switchToNowWindow(driver):
window_handles = driver.window_handles
driver.switch_to.window(window_handles[-1])
return driver
def goToMainUrl(dstUrl, videoName):
driver = webdriver.Chrome()
driver.get(dstUrl)
driver.implicitly_wait(3)
driver.maximize_window()
driver = switchToNowWindow(driver)
driver.find_element_by_id("mytxtdafdfasdf").send_keys(videoName, Keys.ENTER)
driver = switchToNowWindow(driver)
return driver.page_source
def download(savePath, videoName, videoUrl):
commond = 'you-get -o {0} -O {1} "{2}"'.format(savePath, videoName, videoUrl)
print(commond)
system(commond)
def downloadAll(savePath, videoLinks, videoTitle, keywords):
download_flag = True
dirs = listdir(path=savePath)
for index in range(videoLinks.__len__()):
download_flag = True
for index_keywords in range(keywords.__len__()):
if keywords[index_keywords] in videoTitle[index]:
print("%s 不符合下载条件!" % videoTitle[index])
download_flag = False
break
if download_flag and (videoTitle[index] + ".mp4") in dirs:
print("%s 已经存在!" % videoTitle[index])
download_flag = False
if download_flag:
download(savePath=savePath, videoName=videoTitle[index], videoUrl=videoLinks[index], )
def formatvideoTitle(videoTitles):
for index in range(videoTitles.__len__()):
videoTitles[index] = videoTitles[index].replace(" ", "-")
return videoTitles
def get_main_url(page_suorce):
video_links_list_ul = []
main_page = BS(page_suorce, 'html.parser')
child_page = main_page.find("div", attrs={"class": "lcon"}).find_all("ul")
for temp in (child_page):
aTags = temp.find_all("a")
for aTag in aTags:
video_links_list_ul.append(aTag.get("href"))
return video_links_list_ul
def get_signal_download_link(dstUrl):
driver = webdriver.Chrome()
driver.get(dstUrl)
driver.implicitly_wait(3)
driver.maximize_window()
driver = switchToNowWindow(driver)
driver.find_element_by_class_name("li_cur").click()
driver = switchToNowWindow(driver)
return driver.page_source
def get_dst_link(page_source, print_flag):
videoLinksList = []
videoTitleList = []
aTags = []
mainPage = BS(page_source, "html.parser")
childPage = mainPage.find_all("div", attrs={"class": "text_box_141010"})
for childPage_a in childPage:
aTag = childPage_a.find("p").find("a")
aTags.append(aTag)
for index in range(aTags.__len__()):
videoLinksList.append(aTags[index].get("href"))
for index in range(aTags.__len__()):
videoTitleList.append(aTags[index].get("title"))
if print_flag:
print("videoLinksList:%s" % videoLinksList)
print("videoTitleList:%s" % videoTitleList)
return videoLinksList, videoTitleList
def check_dir_exist(save_path, video_name):
dirs = (listdir(save_path))
if video_name not in dirs:
print("%s不存在!稍后将创建~" % (save_path + video_name))
mkdir(save_path + "/" + video_name)
return False
print("%s已经存在!无需创建~" % (save_path + video_name))
return True
def main(videoName, savePath):
keywords = ["会", "专辑", "旋律", "《精彩音乐汇》"]
check_dir_exist(save_path=savePath, video_name=videoName)
savePath += videoName
mainUrl = "https://tv.cctv.com/index.shtml"
page_source = goToMainUrl(dstUrl=mainUrl, videoName=videoName)
link_list = get_main_url(page_suorce=page_source)
for index_link_list in range(link_list.__len__()):
signal_links = get_signal_download_link(dstUrl=link_list[index_link_list].__str__())
videoLinksList, videoTitleList = get_dst_link(page_source=signal_links, print_flag=False)
videoTitleList = formatvideoTitle(videoTitles=videoTitleList)
downloadAll(savePath=savePath, videoLinks=videoLinksList, videoTitle=videoTitleList, keywords=keywords)
if __name__ == '__main__':
videoName = "精彩音乐汇"
savePath = r"F:/"
main(videoName=videoName, savePath=savePath)
2.生成对应的txt文件(用于存储标题和对应的链接)
from bs4 import BeautifulSoup as BS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from os import system
from os import listdir
from os import mkdir
def getvideoLinks(htmlSource):
videoLinksList = []
videoTitleList = []
mainPage = BS(htmlSource, "html.parser")
childPage = mainPage.find("div", attrs={"class": "timg"}).find_next("ul")
aTags = childPage.find_all("a")
for aTag in aTags:
videoLinksList.append(aTag.get("lanmu1"))
for aTag in aTags:
videoTitleList.append(aTag.get("title"))
return videoLinksList, videoTitleList
def switchToNowWindow(driver):
window_handles = driver.window_handles
driver.switch_to.window(window_handles[-1])
return driver
def goToMainUrl(dstUrl, videoName):
driver = webdriver.Chrome()
driver.get(dstUrl)
driver.implicitly_wait(3)
driver.maximize_window()
driver = switchToNowWindow(driver)
driver.find_element_by_id("mytxtdafdfasdf").send_keys(videoName, Keys.ENTER)
driver = switchToNowWindow(driver)
return driver.page_source
def download(savePath, videoName, videoUrl):
commond = 'you-get -o {0} -O {1} "{2}"'.format(savePath, videoName, videoUrl)
print(commond)
system(commond)
def downloadAll(savePath, videoLinks, videoTitle, keywords):
download_flag = True
dirs = listdir(path=savePath)
for index in range(videoLinks.__len__()):
download_flag = True
for index_keywords in range(keywords.__len__()):
if keywords[index_keywords] in videoTitle[index]:
print("%s 不符合下载条件!" % videoTitle[index])
download_flag = False
break
if download_flag and (videoTitle[index] + ".mp4") in dirs:
print("%s 已经存在!" % videoTitle[index])
download_flag = False
if download_flag:
download(savePath=savePath, videoName=videoTitle[index], videoUrl=videoLinks[index], )
def formatvideoTitle(videoTitles):
for index in range(videoTitles.__len__()):
videoTitles[index] = videoTitles[index].replace(" ", "-")
return videoTitles
def get_main_url(page_suorce):
video_links_list_ul = []
main_page = BS(page_suorce, 'html.parser')
child_page = main_page.find("div", attrs={"class": "lcon"}).find_all("ul")
for temp in (child_page):
aTags = temp.find_all("a")
for aTag in aTags:
video_links_list_ul.append(aTag.get("href"))
return video_links_list_ul
def get_signal_download_link(dstUrl):
driver = webdriver.Chrome()
driver.get(dstUrl)
driver.implicitly_wait(3)
driver.maximize_window()
driver = switchToNowWindow(driver)
driver.find_element_by_class_name("li_cur").click()
driver = switchToNowWindow(driver)
return driver.page_source
def get_dst_link(page_source, print_flag):
videoLinksList = []
videoTitleList = []
aTags = []
mainPage = BS(page_source, "html.parser")
childPage = mainPage.find_all("div", attrs={"class": "text_box_141010"})
for childPage_a in childPage:
aTag = childPage_a.find("p").find("a")
aTags.append(aTag)
for index in range(aTags.__len__()):
videoLinksList.append(aTags[index].get("href"))
for index in range(aTags.__len__()):
videoTitleList.append(aTags[index].get("title"))
if print_flag:
print("videoLinksList:%s" % videoLinksList)
print("videoTitleList:%s" % videoTitleList)
return videoLinksList, videoTitleList
def check_file_exist_in_dst_dir(dst_dir, dst_file):
dirs = (listdir(dst_dir))
if dst_file not in dirs:
print("《%s》文件不存在!稍后将创建~" % (dst_dir + "/" +dst_file))
with open(file=dst_dir + "/" + dst_file, mode="w") as f:
f.close()
return False
print("《%s》文件已经存在!无需创建~" % (dst_dir +"/" +dst_file))
return True
def check_dir_exist(save_path, video_name):
dirs = (listdir(save_path))
if video_name not in dirs:
print("《%s》文件夹 不存在!稍后将创建~" % (save_path + video_name).replace("./",""))
mkdir(save_path + "/" + video_name)
return False
print("《%s》文件夹 已经存在!无需创建~" % (save_path + video_name))
return True
def write_files(videoLinksList, videoTitleList, orignal_save_path):
title_list = []
check_file_exist_in_dst_dir(dst_dir=orignal_save_path.__str__()+"精彩音乐汇", dst_file="UrlAndTitle.txt")
with open(file=orignal_save_path.__str__()+"精彩音乐汇/UrlAndTitle.txt", mode="r") as f:
title_list = f.readlines()
for index in range(videoTitleList.__len__()):
with open(file=orignal_save_path.__str__()+"精彩音乐汇/UrlAndTitle.txt", mode="a+") as f:
if ("Title : "+videoTitleList[index]+"\n") not in title_list:
f.seek(0)
f.write("Title : ")
f.write(videoTitleList[index] + "\n")
f.write("Link : ")
f.write(videoLinksList[index] + "\n\n")
print("Title : %s" % videoTitleList[index])
print("Link : %s" % videoLinksList[index])
else:
print("《"+videoTitleList[index].strip().__str__()+"》已经存在!")
def main(videoName, savePath):
keywords = ["会", "专辑", "旋律", "《精彩音乐汇》"]
check_dir_exist(save_path=savePath, video_name=videoName)
original_save_path = savePath
savePath += videoName
mainUrl = "https://tv.cctv.com/index.shtml"
page_source = goToMainUrl(dstUrl=mainUrl, videoName=videoName)
link_list = get_main_url(page_suorce=page_source)
for index_link_list in range(link_list.__len__()):
signal_links = get_signal_download_link(dstUrl=link_list[index_link_list].__str__())
videoLinksList, videoTitleList = get_dst_link(page_source=signal_links, print_flag=False)
videoTitleList = formatvideoTitle(videoTitles=videoTitleList)
write_files(videoTitleList=videoTitleList, videoLinksList=videoLinksList, orignal_save_path=original_save_path)
if __name__ == '__main__':
videoName = "精彩音乐汇"
savePath = r"F:/"
main(videoName=videoName, savePath=savePath)
3.用于在txt文件中通过输入快速找到相关的链接
def get_dirs_from_path_and_name(file_path_and_name):
dirs = []
with open(file=file_path_and_name, mode="r") as f:
dir = f.readlines()
for index in range(dir.__len__()):
dirs.append(dir[index].strip().__str__())
return dirs
def from_title_find_link_in_dirs(dirs, mp4_name):
if ("Title : "+mp4_name) in dirs:
for index in range(dirs.__len__()):
if ("Title : "+mp4_name) == dirs[index]:
print("%s" % dirs[index+1])
break
else:
print("%s不在文件中!" % mp4_name)
def main():
dir_path = "F:/精彩音乐汇/UrlAndTitle.txt"
while True:
dirs_list = get_dirs_from_path_and_name(file_path_and_name=dir_path)
mp4_name = input("MP4 name:")
from_title_find_link_in_dirs(dirs=dirs_list, mp4_name=mp4_name)
if __name__ == '__main__':
main()