网络爬虫:爬取B站番剧

代码如下:

#https://api.bilibili.com/pgc/player/web/v2/playurl
#实例:https://www.bilibili.com/bangumi/play/ep829609
import requests
from moviepy.editor import VideoFileClip, AudioFileClip
import tkinter as tk
from tkinter import filedialog
from os import remove
from sys import stdout
def select_file():
    root = tk.Tk()
    root.withdraw()  # 隐藏主窗口
    file_path = filedialog.askdirectory(title="选择保存的文件夹")  # 弹出文件选择对话框
    print(file_path)  # 打印选择的文件路径
    return file_path
def num(u):
    a = ''
    for i in u:
        if i >= '0' and i <= '9':
            a+=i
        else:
            if a != '':
                a = ''
    return a
def download_file(url, path, headers, str):
        r = requests.get(url=url, headers=headers,stream=True)
        chunk_size = 1024
        content_size = int(r.headers['content-length'])
        print("下载开始")
        with open(path, "wb") as f:
            n = 1
            for chunl in r.iter_content(chunk_size=chunk_size):
                loaded = n*1024.0/content_size
                f.write(chunl)
                stdout.write('\r' + str +'已下载{0:%}'.format(loaded))
                stdout.flush()
                n+=1
            f.close()
file_path = select_file()
web_site = input("请输入网址链接(输入ep后面6个数字就行了):(示例:https://www.bilibili.com/bangumi/play/ep829609)")
flag1, flag2= False, False
headers = {
    "Cookie":"buvid3=D60DF002-3CC5-E87D-F7FD-7DB148FEDBCC02558infoc; buvid4=9AAA6C63-4271-9F52-255F-65EA5359695D04204-022102715-9t1udJ1Su9sR2DFyZp2BJA%3D%3D; rpdid=|(k|k)lul~)m0J'uYYmllYJJ|; buvid_fp_plain=undefined; header_theme_version=CLOSE; CURRENT_FNVAL=4048; FEED_LIVE_VERSION=V8; b_nut=100; _uuid=5E93174A-9B101-1065A-DE3F-6E4104110F3FA534525infoc; enable_web_push=DISABLE; CURRENT_QUALITY=0; PVID=1; fingerprint=f3e8823973e1f3b05160d2c0272873f8; home_feed_column=5; browser_resolution=1488-714; buvid_fp=f3e8823973e1f3b05160d2c0272873f8; DedeUserID=3537110827010558; DedeUserID__ckMd5=ccd5fe7d354a325e; bp_t_offset_3537110827010558=978056970621681664; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjgwMjcwMDEsImlhdCI6MTcyNzc2Nzc0MSwicGx0IjotMX0.pFihE-f26xDtSyWD778kI_XEV6i_BppwFvxYbkznw0Y; bili_ticket_expires=1728026941; SESSDATA=e6fac1d0%2C1743320649%2C45a1b%2Aa2CjDxekdNfmOkEasF0vK3U3Rqbb4IiBET-LhSQPHFo4CE3bZqbgw_BM6doxnExdCFixwSVm50RVpXa0c1SlJGdFhfbTNtN0hhYlkyQ28zLXFTbmVuV3lJbVRmeGpiUTFEVVd1YXBWTHZZSm04elV3b05IdTRJMTFmWHMxR1NMNWMyQnpIaDBWUXZRIIEC; bili_jct=527155b56fe9d4f51ee047b750c1dd95; sid=6ytmu2ii; b_lsid=2D410B10EF_1924D729C17",
    "Referer":"https://www.bilibili.com/bangumi/play/ep833029?spm_id_from=333.337.0.0&from_spmid=666.25.episode.0",
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 SLBrowser/9.0.5.8121 SLBChan/103 SLBVPV/64-bit"
}
viedo_id = num(web_site)
print(viedo_id)
url = f"https://api.bilibili.com/pgc/view/web/season?ep_id={viedo_id}"
response = requests.get(url=url, headers=headers)
json_data = response.json()
lis = json_data['result']['episodes']
cur = 0
for li in lis:
    cur+=1
    print(str(cur) + li['share_copy'] + ' ' + li['badge'])
start = int(input("从第几集开始"))
end = int(input("到第几集结束"))
cur = 0
for li in lis:
    cur+=1
    if cur < start:
        continue
    if cur > end:
        break
    try:
        aid = li['aid']
        cid = li['cid']
        ep_id = li['ep_id']
        title = li['share_copy']
        link = "https://api.bilibili.com/pgc/player/web/v2/playurl"
        data = {
            "support_multi_audio": "true",
            "avid" : aid,
            "cid" : cid,
            "qn" : "0",
            "fnver" : "0",
            "fnval" : "4048",
            "fourk" : "1",
            "gaia_source" : '',
            "from_client" : "BROWSER",
            "ep_id" : ep_id
        }
        print(title)
        link_data = requests.get(params=data, url=link, headers=headers).json()
        video_url = link_data["result"]['video_info']['dash']['video'][0]['baseUrl']
        audio_url = link_data["result"]['video_info']['dash']['audio'][0]['baseUrl']
        print("获得url")
        download_file(video_url, f"{file_path}/{title}.mp4", headers, '视频')
        print("获取视频成功")
        download_file(audio_url, f"{file_path}/{title}audio.mp4", headers, '音频')
        print("获取音频成功")
        if (not flag1) and (not flag2):
            question = input("是否合并音频和视频?(YES/NO)(YYES/NNO表示之后都默认合并或不合并)")
            if question.title() == 'Yes':
                print(title+"合并中")
                video = VideoFileClip(f"{file_path}/{title}.mp4")
                audio = AudioFileClip(f"{file_path}/{title}audio.mp4")
                video = video.set_audio(audio)
                video.write_videofile(f"{file_path}/{title}new.mp4", codec='libx264', audio_codec='aac')
                remove(f"{file_path}/{title}.mp4")
                remove(f"{file_path}/{title}audio.mp4")
            elif question.title() == 'Yyes':
                flag1 = True
            elif question.title() == 'Nno':
                flag2 = True
        if flag1:
            print(title + "合并中")
            video = VideoFileClip(f"{file_path}/{title}.mp4")
            audio = AudioFileClip(f"{file_path}/{title}audio.mp4")
            video = video.set_audio(audio)
            video.write_videofile(f"{file_path}/{title}new.mp4", codec='libx264', audio_codec='aac')
            remove(f"{file_path}/{title}.mp4")
            remove(f"{file_path}/{title}audio.mp4")
        print(title + "爬取成功")
    except Exception as e:
        print(e)

拿走,勿谢

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值