多线程爬取B站视频

最新推荐文章于 2024-05-24 16:30:33 发布

learner_witt

最新推荐文章于 2024-05-24 16:30:33 发布

阅读量501

点赞数 1

本文链接：https://blog.csdn.net/learner_witt/article/details/109063621

版权

#threading.Thread()

import os
import re
import time
import datetime
import requests
import threading
from moviepy.editor import *


# 获取视频及音频的源地址
def get_url(url):
	url = 'https://www.bilibili.com/video/' + video_id + '?from=search'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
}
	response = requests.get(url=url, headers=headers).text
	video_url = re.search('duration.*?baseUrl":"(.*?)"', response)
	video_url = video_url.group(1)
	audio_url = re.search('audio.*?baseUrl":"(.*?)"', response)
	audio_url = audio_url.group(1)
	return video_url,audio_url


#多线程下载
def download(url_1, video_id):
	all_thread = 1
	url_2 = 'https://www.bilibili.com/video/' + video_id + '?from=search'
#获取视频大小
	headers = {
	'Referer': url_2,
	'Range': 'bytes=0-10000',
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
}
	file = requests.get(url=url_1, headers=headers)
	file_size = int(file.headers['Content-Range'][14:])	
	print('video size:' + str(int(file_size / 1024 / 1024)) + "MB")
	if file_size:
		fp = open('2.mp4', 'wb')
		fp.truncate(file_size)		
		print('视频大小：' + str(int(file_size / 1024 / 1024)) + "MB")
		fp.close()
	size = 5242880
	if file_size > size:
		all_thread = int(file_size / size)
		if all_thread > 10:
			all_thread = 10
	part = file_size // all_thread
	threads = []
	starttime = datetime.datetime.now().replace(microsecond=0)
	for i in range(all_thread):
		start = part * i
		if i == all_thread - 1:
			end = file_size
		else:
			end = start + part
		if i > 0:
			start += 1
		headers = headers.copy()
		headers['Range'] = 'bytes=%s-%s' % (start, end)
		t = threading.Thread(target=Handler, name='th-' + str(i), kwargs = {'start': start, 'end': end, 'url': url_1, 'filename': '2.mp4', 'headers': headers})
		t.setDaemon(True)	#随进程退出的标记
		threads.append(t)
	for t in threads:
		time.sleep(0.2)
		t.start()
	for t in threads:
		t.join()

def Handler(start, end, url, filename, headers={}):
	tt_name = threading.current_thread().getName()
	print(tt_name + 'is begin\t')
	r = requests.get(url, headers=headers, stream=True)
	total_size = end - start
	downsize = 0
	startTime = time.time()
	with open(filename, 'r+b') as fp:
		fp.seek(start)
		var = fp.tell()  #获得文件指针位置
		for chunk in r.iter_content(204800):   #边下载边存硬盘
			if chunk:
				fp.write(chunk)
				downsize += len(chunk)
				line = tt_name + '-downloading %d KB/s - %.2f MB， 共 %.2f MB'
				line = line % (downsize / 1024 / (time.time() - startTime), downsize / 1024 / 1024,total_size / 1024 / 1024)
				print(line, end='\r')
def get_headers():
	pass
if __name__ == '__main__':
	video_id = input('please input Bv number:')
	base_url = 'https://www.bilibili.com/'
	url = base_url + '/video/' + video_id + '?from=search'
	video_url = get_url(url)
	video_url = video_url[0]
	download(video_url, video_id)

多线程爬取B站视频，后边尽量更加完善，更加美观。

多线程参考：https://blog.csdn.net/s_kangkang_A/article/details/103051184

learner_witt

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
多线程爬取B站视频

#threading.Thread()import osimport reimport timeimport datetimeimport requestsimport threadingfrom moviepy.editor import *# 获取视频及音频的源地址def get_url(url): url = 'https://www.bilibili.com/video/' + video_id + '?from=search' headers = { 'User-Ag
复制链接

扫一扫