import argparse
from os import system
from selenium import webdriver
from bs4 import BeautifulSoup as BS
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
from os import listdir
from os import remove
from os import mkdir
class BilibiliUp(object):
def __init__(self):
super(BilibiliUp, self).__init__()
self.headless_flag = True
self.home_page_suorce = ""
self.root_vedio_head = "https://www.bilibili.com/video/"
self.vedio_seq_numbers = []
self.vedio_links = []
self.vedio_titles = []
self.current_home_page = ""
self.count = 0
self.root_vedio_links = []
self.home_pages = []
self.download_video_flag = False
self.write_links_flag = False
self.debug = False
def set_debug(self):
self.debug = True
def clear_debug(self):
self.debug = False
def set_download_video_flag(self):
self.download_video_flag = True
def clear_download_video_flag(self):
self.download_video_flag = False
def set_write_links_flag(self):
self.write_links_flag = True
def clear_write_links_flag(self):
self.write_links_flag = False
def set_headless_flag(self):
self.headless_flag = True
def clear_headless_flag(self):
self.headless_flag = False
@staticmethod
def check_dir_exist(save_path, video_name):
dirs = listdir(save_path)
if video_name not in dirs:
print("《%s》不存在!稍后将创建~" % (save_path + video_name))
mkdir(save_path + "/" + video_name)
return False
print("《%s》已经存在!无需创建~" % (save_path + video_name))
return True
def get_up_vedio_links(self, up_name):
root_url = "https://search.bilibili.com/"
if self.headless_flag:
chrome_options = Options()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
else:
driver = webdriver.Chrome()
driver.get(root_url)
driver.implicitly_wait(3)
driver.maximize_window()
driver.find_element(by=By.CLASS_NAME, value="search-input-el").send_keys(up_name, Keys.ENTER)
sleep(1)
driver.find_element(by=By.XPATH, value='/html/body/div[3]/div/div[2]/div[1]/div[2]/div/nav/ul/li[8]/span').click()
sleep(1)
driver.find_element(by=By.XPATH, value='/html/body/div[3]/div/div[2]/div[2]/div/div/div[2]/div/div/div/div/h2/a').click()
sleep(1)
driver = self.switchToNowWindow(driver)
masterpiece_flag = True
if masterpiece_flag:
try:
driver.find_element(by=By.CLASS_NAME, value='more').click()
masterpiece_flag = False
except:
print("有代表作元素点击失败,该up主没有设置代表作视频")
elif masterpiece_flag:
try:
driver.find_element(by=By.CLASS_NAME, value='more').click()
masterpiece_flag = False
except:
print("无代表作元素点击失败,出错了")
elif masterpiece_flag:
print("出错了")
return None
driver = self.switchToNowWindow(driver)
sleep(1)
count = self.get_acount_of_home_page(driver.page_source)
page_source = driver.page_source
current_url = driver.current_url
driver.close()
return page_source, current_url, count
def get_all_page_suorce(self, home_vedio_links):
home_pages = []
if self.headless_flag:
for index in range(home_vedio_links.__len__()):
chrome_options = Options()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
driver.get(home_vedio_links[index])
sleep(1)
driver.implicitly_wait(3)
home_pages.append(driver.page_source)
driver.close()
if self.debug:
break
else:
for index in range(home_vedio_links.__len__()):
driver = webdriver.Chrome()
driver.get(home_vedio_links[index])
sleep(1)
driver.implicitly_wait(3)
home_pages.append(driver.page_source)
driver.close()
if self.debug:
break
return home_pages
def get_acount_of_home_page(self, home_page):
main_page = BS(home_page, "html.parser")
child_page = main_page.find("span", attrs={"class": "be-pager-total"})
try:
count_str = child_page.get_text()
count = count_str.split(" ")[1]
except:
print("作品不足一页")
count = 1
return int(count)
def get_all_up_vedio_links(self, current_url):
vedio_links = []
vedio_links.append(current_url)
print("该up总共有 %d 页视频" % self.count)
for index in range(self.count):
vedio_links.append(current_url + "?tid=0&page=" + (index + 2).__str__() + "&keyword=&order=pubdate")
print("第 %d 页信息获取完毕~" % (index+1))
return vedio_links
def switchToNowWindow(self, driver):
window_handles = driver.window_handles
driver.switch_to.window(window_handles[-1])
return driver
def get_index_of_vedio(self, page_suorce):
video_index_list = []
video_title_list = []
main_page = BS(page_suorce, "html.parser")
child_page = main_page.find_all("a", class_="cover")
child_page = child_page[0:(child_page.__len__()/2).__int__()]
for index in range(child_page.__len__()):
if self.debug:
print(index)
print(child_page[index])
print("index", child_page[index].get("href").__str__().split("/")[-1])
print("title", child_page[index].find_next("img").get("alt"))
print("\n")
video_index_list.append(child_page[index].get("href").__str__().split("/")[-1])
video_title_list.append(child_page[index].find_next("img").get("alt"))
return video_index_list, video_title_list
def cat_links(self, vedio_seq_numbers):
vedio_link_list = []
for index in range(vedio_seq_numbers.__len__()):
vedio_link_list.append(self.root_vedio_head + vedio_seq_numbers[index].__str__())
return vedio_link_list
def write_links_in_file(self, vedio_links, file_name, vedio_titles):
dir_list = listdir()
title_list = []
if (file_name + ".txt") not in dir_list:
with open(file=(file_name + ".txt"), mode="w+") as f:
f.close()
with open(file=(file_name + ".txt"), mode="r", encoding="utf-8") as f:
title_list = f.readlines()
f.close()
with open(file=(file_name + ".txt"), mode="a+", encoding="utf-8") as f:
for index in range(vedio_links.__len__()):
if ("Title : " + vedio_titles[index] + "\n") not in title_list:
f.seek(0)
f.write("Title : ")
f.write(vedio_titles[index] + "\n")
f.write("Link : ")
f.write(vedio_links[index] + "\n\n")
print("Title : %s" % vedio_titles[index])
print("Link : %s" % vedio_links[index])
else:
print("《" + vedio_titles[index].strip().__str__() + "》已经存在!")
f.close()
def download(self, savePath, videoName, videoUrl):
print("Video title is : %s" % videoName)
commond = "you-get -o {} \"{}\"".format(
savePath,
videoUrl)
print(commond)
system(commond)
def downloadAll(self, savePath, videoLinks, videoTitle, keywords):
dirs = listdir(path=savePath)
for index in range(videoLinks.__len__()):
download_flag = True
for index_keywords in range(keywords.__len__()):
if keywords[index_keywords] in videoTitle[index]:
print("%s 不符合下载条件!" % videoTitle[index])
download_flag = False
break
if download_flag and (videoTitle[index].replace("/", "").replace(" ", "-").replace('★', "-") + ".mp4") in dirs:
print("{0}".format(videoTitle[index].replace("/", "").replace(" ", "-").replace('★', "-") + ".mp4"))
print("%s 已经存在!无需重复下载" % videoTitle[index])
download_flag = False
if download_flag:
print("downloading~~")
self.download(savePath=savePath, videoName=videoTitle[index], videoUrl=videoLinks[index], )
def print_line(self):
print(
"--------------------------------------------------------------------------------------------------------------------------------")
def delete_useless_files(self, path):
delete_type = [".xml", ".ts"]
files_list = listdir(path)
for file in files_list:
delete_flag = False
for index in range(delete_type.__len__()):
if delete_type[index] in file:
delete_flag = True
break
if delete_flag:
print("Delete {0}".format(path + "/" + file))
remove(path + "/" + file)
else:
print("keep {0}".format(path + "/" + file))
def get_up_names_from_commond(self):
parser = argparse.ArgumentParser(description="""input up\'s names (you want to download their or his video\n
use English \',\' to split them""")
parser.add_argument('--name', default='音乐私藏馆', help='input up\'s names')
args = parser.parse_args()
ans = str(args.name).split(',')
print('up\'s names : {}'.format(ans))
return ans
def main(self, up_name, save_path):
self.home_page_suorce, self.current_home_page, self.count = self.get_up_vedio_links(up_name)
self.root_vedio_links = self.get_all_up_vedio_links(current_url=self.current_home_page)
self.home_pages = self.get_all_page_suorce(self.root_vedio_links)
for index in range(self.home_pages.__len__()):
self.vedio_seq_numbers, self.vedio_titles = self.get_index_of_vedio(page_suorce=self.home_pages[index])
self.vedio_links = self.cat_links(vedio_seq_numbers=self.vedio_seq_numbers)
self.print_line()
print("index = {0}".format(index.__str__()))
if self.write_links_flag:
self.write_links_in_file(vedio_links=self.vedio_links,
file_name=save_path + "/" + up_name.__str__(),
vedio_titles=self.vedio_titles)
if self.download_video_flag:
self.downloadAll(savePath=save_path, videoLinks=self.vedio_links,
videoTitle=self.vedio_titles, keywords=[])
self.print_line()
self.delete_useless_files(save_path)
if __name__ == '__main__':
check_dir = r"F:/b站up主们的视频/"
bilibili_test = BilibiliUp()
up_name = ["gaoming714"]
bilibili_test.clear_headless_flag()
bilibili_test.set_download_video_flag()
bilibili_test.set_write_links_flag()
bilibili_test.set_download_video_flag()
bilibili_test.clear_debug()
for index in range(up_name.__len__()):
save_path = check_dir + up_name[index].__str__().strip()
bilibili_test.check_dir_exist(save_path=check_dir, video_name=up_name[index])
bilibili_test.main(up_name[index], save_path)