Python爬取B站视频

老规矩,先上代码:

import tkinter as tk
import requests
import re
import json
import pprint
import subprocess
import os
import random


def trying():
    Q = w.get()
    if 'https://www.bilibili.com/' in Q:
        reg()
    else:
        n['text'] = '请检查输入'

def reg():
    Q = w.get()
    D = str(random.uniform(-99999999999999999, 999999999999999999999999999999))
    E = '\\' + D

    def askURL(url):

        head = {
            'referer':
                'https://www.bilibili.com/',
            'user-agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.124 Safari/537.36 Edg/102.0.1245.44'
        }
        response = requests.get(url=url, headers=head)
        return response

    def getVideoInfo(response):
        findUrl = re.compile('<script>window.__playinfo__=(.*?)</script>')
        VideoInfo = re.findall(findUrl, response.text)[0]
        jsonData = json.loads(VideoInfo)
        return jsonData

    def download(jsonData):
        path = "视频"
        if not os.path.isdir(path):
            os.mkdir(path)
        audioURL = jsonData['data']['dash']['audio'][0]['baseUrl']
        videoURL = jsonData['data']['dash']['video'][0]['baseUrl']
        print(audioURL)
        print(videoURL)

        audioContent = askURL(audioURL).content
        with open(path + './' + E + '.mp3', mode='wb') as f:
            f.write(audioContent)
        videoContent = askURL(videoURL).content
        with open(path + './' + E + '.mp4', mode='wb') as f:
            f.write(videoContent)

    def title_find (response):
        findUrl = re.compile('title=(.*?) class')
        VideoInfo = re.findall(findUrl, response.text)[0]
        ## 字符串转换为python的数据类型,便于后续操作
        jsonData = json.loads(VideoInfo)
        return jsonData

    def audioAndVideo(audioFile, videoFile):
        load = os.getcwd()
        outfile_name = './output.mp4'
        cmd = load + fr'\ffmpeg\bin\ffmpeg.exe -i {audioFile} -i {videoFile} -acodec copy -vcodec copy {outfile_name}'
        print(cmd)
        subprocess.call(cmd, shell=True)

    def rename_files(dir_path, old_name, new_name):
        for root, dirs, files in os.walk(dir_path):
            for file in files:
                if file == old_name:
                    old_file_path = os.path.join(root, file)
                    new_file_path = os.path.join(root, new_name)
                    os.rename(old_file_path, new_file_path)

    def main():
        load = os.getcwd()
        html = askURL(url=Q)
        jsonData = getVideoInfo(html)
        pprint.pprint(jsonData)
        download(jsonData)
        audioAndVideo(r'' + load + '\视频' + E + '.mp3', r'' + load + '\视频' + E + '.mp4')
        title_name = title_find(html)
        print(title_name)
        directory = load
        old_filename = 'output.mp4'
        new_filename = title_name+'.mp4'
        rename_files(directory, old_filename, new_filename)
        n['text'] = '文件放置在'+load
        pass

    if __name__ == '__main__':
        main()

f1 = tk.Tk()
f1.geometry("250x100")
f1.resizable(False, False)
f1.title("B站")
w =tk.Entry(f1)
w.place(x=90, y=20)
z = tk.Label(f1, text="路径")
z.place(x=30, y=20)
a = tk.Button(f1, text="确认", command=trying, width=40, height=1)
a.pack(side="bottom")
n = tk.Label(f1, text=" ")
n.pack(side="bottom")
tk.mainloop()

其中,ffmpeg需下载,并放到和文件一个目录下,地址:

链接:https://pan.baidu.com/s/1dPZsU30JJN8XXbE7WrMEvQ?pwd=leez 
提取码:leez

  • 5
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值