前文回顾
https://blog.csdn.net/weixin_44476410/article/details/118958509?spm=1001.2014.3001.5501
因为遇到标题的编码问题做了些修改
from lxml import etree
import requests
import os
import re
from sys import stdout
import time
# 正则提取bv号
def bvre(bv):
bv += '?'
rx = '(BV[\S]*?)\\?'
r_bv = re.findall(rx,bv)[0]
r_bv = r_bv.replace('/','')
print('\nBV:',r_bv)
return r_bv
# 获取视频名称
def name(bv,headers):
url = 'https://www.bilibili.com/video/'+bv
text = requests.get(url,headers = headers).text
tree = etree.HTML(text)
name = tree.xpath('//*[@id="viewbox_report"]/h1/span/text()')[0]
return name
'''
# =========备用api=========
# 获取(默认第一页的)cid
def cid(bv,headers):
url = 'https://api.bilibili.com/x/player/pagelist'
param = {
'bvid':'%s'%bv,
'jsonp':'jsonp'
}
text = requests.get(url,params = param,headers = headers).json()
cid = text['data'][0]['cid']
return cid
'''
# 获取分页信息以及对应的cid
# https://api.bilibili.com/x/web-interface/view/detail?bvid=BV1CX4y1P7M8&aid=713833694&need_operation_card=1&web_rm_repeat=&need_elec=1&out_referer=https%3A%2F%2Fspace.bilibili.com%2F
def page(bv):
url = 'https://api.bilibili.com/x/web-interface/view/detail'
param = {
'bvid':'%s'%bv,
}
text = requests.get(url,params = param,headers = headers).json()
text_pic = text['data']['View']['pic']
print("\n视频封面链接pic:",text_pic)
av=text['data']['View']['aid'] # AV号
print("AV号:","av" + str(av))
text_cid = text['data']['View']['pages']
print("\n分页信息以及对应的cid:")
for page in text_cid:
if page['page'] == 1:
f_cid = page['cid']
print('第 %d 页 cid:%s'%(page['page'],page['cid']))
cid = input("输入分页或cid值(默认第一页):")
for page in text_cid:
if cid == "":
cid = f_cid
num = 1
break
else:
cid = int(cid)
if page['page'] == cid or page['cid'] == cid:
cid = page['cid']
num = page['page']
break
ret = [cid,num]
return ret
# 获取视频url
def flv(cid,bv,headers,quality):
url = 'https://api.bilibili.com/x/player/playurl'
param = {
'cid':'%s'%cid,
'bvid':'%s'%bv,
'qn':'%s'%quality,
}
text = requests.get(url,params = param,headers = headers).json()
return text
# 请求视频并保存
def get_flv(name,flv_url,headers,page_num):
def MB(byte):
return byte / 1024 / 1024
print("\n等待响应数据(需要的时间较长)...")
# 当把get函数的stream参数设置成True时,它不会立即开始下载
# 当你使用iter_content或iter_lines遍历内容或访问内容属性时才开始下载
response = requests.get(flv_url, stream=True, headers = headers)
code = response.status_code
print('\n响应码:',code)
file_size = int(response.headers['content-length']) # 文件大小 Byte
print("文件大小: {:.2f} MB\n".format(file_size/1024/1024))
if code == 200:
# 删除标题中的 / 等字符,防止文件路径错误
name = name.replace('/','').replace('|','')
down_size = 0 # 已下载字节数
old_down_size = 0 # 上一次已下载字节数
interval = 0.5 # 下载输出间隔
time_ = time.time()
# 针对标题不能保存为路径的情况
localtime = time.localtime(time.time())#获取当前时间
tname = time.strftime('%Y%m%d-%H%M%S',localtime)#把获取的时间转换成"年月日格式”
try:
with open(r"./%s-%d.flv"%(name,page_num),'wb') as fp1:
pass
path1 = r"./%s-%d.flv"%(name,page_num)
path2 = name+"\nhttps://www.bilibili.com/video/"+bv+"?p=%d\n\n"%page_num
except:
path1 = r"./%s-%d.flv"%(tname,page_num)
path2 = tname+"\nhttps://www.bilibili.com/video/"+bv+"?p=%d\n\n"%page_num
with open(path1,'wb') as fp1:
for chunk in response.iter_content(chunk_size=512):
if chunk:
fp1.write(chunk)
down_size += len(chunk)
if time.time() - time_ > interval:
# rate = down_size / file_size * 100 # 进度 0.01%
speed = (down_size - old_down_size) / interval # 速率 0.01B/s
old_down_size = down_size
time_ = time.time()
print_params = [MB(speed), MB(down_size), MB(file_size), (file_size - down_size) / speed]
# \r 光标回到行首
print('\r{:.1f}MB/s - {:.2f}MB,共 {:.2f}MB,还剩 {:.0f} 秒 '.format(*print_params), end='')
print('\r下载成功' + " "*50)
with open("./list.txt",'a') as fp2:
fp2.write(path2)
print("视频获取成功\n(若视频清晰度不符,请及时更新cookie值)")
else:
print("视频获取失败")
# main
bv = input("输入BV号(网页链接):")
bv = bvre(bv)
# 获取本地txt保存的cookie
cookie = ""
if os.path.exists("./cookie.txt"):
with open('./cookie.txt') as fp:
cookie = fp.read()
cookie = cookie.split('\n')[0]
print('\ncookie:',cookie)
headers = {
'Referer':'https://www.bilibili.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'cookie':"%s"%cookie,
}
cid_get = page(bv)
cid = cid_get[0]
page_num = cid_get[1]
# cid = cid(bv,headers)
print('\ncid:',cid)
print('page:',page_num)
print('\n可获取cookie值的链接(需先在浏览器正常登录)\nhttps://api.bilibili.com/x/player/playurl?cid=%s&bvid=%s'%(cid,bv))
name = name(bv,headers)
print('\n标题:',name)
quality = ''
text =flv(cid,bv,headers,quality)
qn = text['data']['support_formats']
print("\n可选择的清晰度(部分清晰度可能获取失败):")
for qu in qn:
print(('清晰度:%s'%qu['new_description']).ljust(15)+('视频质量参数:%d'%qu['quality']).ljust(15)+('格式参数:%s'%qu['format']).ljust(15))
quality = input("输入清晰度对应的视频质量参数(默认1080p):")
if quality == '':
quality = '80'
text = flv(cid,bv,headers,quality)
flv_url = text['data']['durl'][0]['url']
print('\nflv_url:',flv_url)
get_flv(name,flv_url,headers,page_num)