用Python爬虫做一个短视频+评论下载小工具

成品展示:

 

 复制以上代码用于视频下载

 按照以上步骤获取评论路径复制

 

 选择要保存到哪个文件夹中

输入复制的视频地址和评论地址

 保存完成 

 

获取音视频部分主要代码(只是主要功能代码):

def get_data(urls, address):
    response = get_response(urls).text
    # 使用正则表达式获取主要数据
    info = re.findall('<script>window.__playinfo__=(.*?)</script>', response)[0]
    global title
    # 获取视频名称 ==》 为之后保存的文件命名
    title = re.findall('<h1 title="(.*?)" class="video-title tit">', response)[0]
    # print(response)
    json_data = json.loads(info)
    # pprint.pprint(json_data)
    video_url = json_data['data']['dash']['video'][0]['baseUrl']
    audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
    # print(video_url)
    # print(audio_url)
    video_content = get_response(video_url).content
    audio_content = get_response(audio_url).content
    # 保存到文件
    with open(address + title + '.mp4', 'wb') as f:
        f.write(video_content)
    with open(address + title + '.mp3', 'wb') as f:
        f.write(audio_content)
    # 使用 ffmpeg 合并音频、视频
    # D:\pycharm\ffmpeg-4.3.1-2021-01-01-essentials_build\bin\ffmpeg.exe 是 ffmpeg.exe 地址
    # -i "{address}{title}.mp3" -i "{address}{title}.mp4" 需要合并的 MP3 和 MP4 地址
    # "{address}{title}-.mp4" 最终合成的视频、音频保存到改地址
    cmd = fr'"D:\pycharm\ffmpeg-4.3.1-2021-01-01-essentials_build\bin\ffmpeg.exe" -i "{address}{title}.mp3" -i "{address}{title}.mp4" -acodec copy -vcodec copy "{address}{title}-.mp4"'
    subprocess.call(cmd, shell=True)
    # 删除单独的音频
    os.remove(fr"{address}{title}.mp3")
    # 删除单独的视频
    os.remove(fr"{address}{title}.mp4")

 获取评论主要代码:

def get_data0(url0, file_path, file_name):
    response = get_response0(url0)
    dict_data = json.loads(response)
    # pprint.pprint(dict_data)
    json_data = json.dumps(dict_data)
    # pprint.pprint(json_data) # 格式化输出json格式数据
    # 将评论写入到 text 文本中
    with open(file_path + f"{file_name}.txt", 'a', encoding='utf-8') as file:

        for i in range(len(dict_data['data']['replies'])):
            one_main = dict_data['data']['replies'][i]
            # pprint.pprint(one_main)
            main_content = one_main["content"]["message"]
            uname = one_main['member']['uname']
            usex = one_main['member']['sex']
            usign = one_main['member']['sign']
            like = one_main['like']
            uid = one_main['mid']
            if usign == '':
                usign = "null"
            if usex == '':
                usex = "null"
            if uname == '':
                uname = "null"
            file.write(
                f'\t昵称:  {uname}   性别:  {usex}\t个性标签:  {usign}\n\t获赞:  {like}\tUID:{uid}\n\n\t评论:  {main_content}')
            file.write('\n\n')
            if 'reply_control' in one_main.keys():
                if 'sub_reply_entry_text' in one_main['reply_control']:
                    if str(one_main['replies']) != 'None':
                        file.write("\t\t跟评:  ")
                        for j in range(len(one_main['replies'])):
                            file.write(
                                '\n---------------------------------------------------------------------------------')
                            one_reply = one_main['replies'][j]
                            reply_message = one_reply['content']['message']
                            reply_name = one_reply['member']['uname']
                            reply_sex = one_reply['member']['sex']
                            reply_sign = one_reply['member']['sign']
                            reply_uid = one_reply['member']['mid']
                            if reply_sign == '':
                                reply_sign = "null"
                            if reply_sex == '':
                                reply_sex = "null"
                            if reply_name == '':
                                reply_name = "null"
                            file.write(
                                f'\n\t\t\t昵称:  {reply_name}   性别:  {reply_sex}\t个性标签:  {reply_sign}\n\t\t\tUID:{reply_uid}\n\t\t\t评论:  {reply_message}')
            file.write(
                '\n*********************************************************************************************************')
            file.write('\n\n\n')

视频保存按钮功能代码(调用上面所写方法):

    def download(self):
        address = self.lineEdit.text()
        code_ = self.lineEdit_2.text()
        main(code_, address)
        # 设置下载完成提示
        self.lineEdit_2.setText("下载完成")

评论保存按钮功能代码:

# 评论保存功能按钮
    def save(self):
        urls = self.lineEdit_3.text()
        filepath = self.lineEdit.text()
        address = (filepath + "/").replace('"', '')
        global title
        filename = title
        get_data0(urls, address, filename)
        # 设置评论保存成功提示
        self.lineEdit_3.setText("评论保存成功")

按钮、文本框属性设置:

    # 设置按钮和文本框属性
    def retranslateUi(self, Form):
        _translate = QtCore.QCoreApplication.translate
        Form.setWindowTitle(_translate("Form", "Form"))
        self.pushButton.setText(_translate("Form", "视频下载"))
        self.lineEdit.setText(_translate("Form", "下载视频保存地址"))
        self.label_2.setText(_translate("Form", "屁颠儿屁颠儿"))
        self.lineEdit_2.setText(_translate("Form", "预下载视频后缀"))
        self.lineEdit_3.setText(_translate("Form", "热门评论URL"))
        self.pushButton_2.setText(_translate("Form", "评论保存"))

全功能完整代码:

import sys
import random
import requests
import re
import json
import subprocess
import os
import time

from PyQt5 import QtCore, QtGui, QtWidgets
global title

def get_response0(url0):
    heads = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 "
        "Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 "
        "Safari/537.75.14",
        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
    ]
    headers = {
        'user-agent': random.choice(heads),
        'referer': 'https://www.bilibili.com/video/BV1Tr4y1g7RA?spm_id_from=333.851.b_7265636f6d6d656e64.3&vd_source=d03c9841720e0b9824a0ab0a2945d20e',
        'cookie': "buvid3=2E4E4E5E-27A6-6A1D-42D4-924871F88FA520575infoc; i-wanna-go-back=-1; _uuid=D67110931-8DA5-8410E-94AF-1D7E1110A66D320755infoc; buvid4=F82E83DB-11DE-9169-3ABA-D49868954FFF21655-022060401-tnVc6wLeIYviJecGNRjhVg%3D%3D; nostalgia_conf=-1; CURRENT_BLACKGAP=0; fingerprint=ade468f62bacff496254e1cd6685a12a; buvid_fp_plain=undefined; SESSDATA=94776347%2C1669829549%2C960b6%2A61; bili_jct=e0047cd3733fbecce02142c879bf2d9f; DedeUserID=297704642; DedeUserID__ckMd5=6a2e18b325933872; sid=atmg2c94; buvid_fp=ade468f62bacff496254e1cd6685a12a; rpdid=|(J~R~|Y|)YR0J'uYlRk~~YRm; b_ut=5; LIVE_BUVID=AUTO9616554750049862; is-2022-channel=1; blackside_state=0; CURRENT_QUALITY=80; bp_video_offset_297704642=679665453528776700; b_lsid=31086498E_181E74749D6; innersign=1; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_2E4E4E5E%22%3A%22181E74750FD%22%2C%22333.788.fp.risk_2E4E4E5E%22%3A%22181E74779FC%22%7D%7D; CURRENT_FNVAL=4048; PVID=1"
    }

    response = requests.get(url0, headers).text

    return response


def get_data0(url0, file_path, file_name):
    response = get_response0(url0)
    dict_data = json.loads(response)
    # pprint.pprint(dict_data)
    json_data = json.dumps(dict_data)
    # pprint.pprint(json_data) # 格式化输出json格式数据
    # 将评论写入到 text 文本中
    with open(file_path + f"{file_name}.txt", 'a', encoding='utf-8') as file:

        for i in range(len(dict_data['data']['replies'])):
            one_main = dict_data['data']['replies'][i]
            # pprint.pprint(one_main)
            main_content = one_main["content"]["message"]
            uname = one_main['member']['uname']
            usex = one_main['member']['sex']
            usign = one_main['member']['sign']
            like = one_main['like']
            uid = one_main['mid']
            if usign == '':
                usign = "null"
            if usex == '':
                usex = "null"
            if uname == '':
                uname = "null"
            file.write(
                f'\t昵称:  {uname}   性别:  {usex}\t个性标签:  {usign}\n\t获赞:  {like}\tUID:{uid}\n\n\t评论:  {main_content}')
            file.write('\n\n')
            if 'reply_control' in one_main.keys():
                if 'sub_reply_entry_text' in one_main['reply_control']:
                    if str(one_main['replies']) != 'None':
                        file.write("\t\t跟评:  ")
                        for j in range(len(one_main['replies'])):
                            file.write(
                                '\n---------------------------------------------------------------------------------')
                            one_reply = one_main['replies'][j]
                            reply_message = one_reply['content']['message']
                            reply_name = one_reply['member']['uname']
                            reply_sex = one_reply['member']['sex']
                            reply_sign = one_reply['member']['sign']
                            reply_uid = one_reply['member']['mid']
                            if reply_sign == '':
                                reply_sign = "null"
                            if reply_sex == '':
                                reply_sex = "null"
                            if reply_name == '':
                                reply_name = "null"
                            file.write(
                                f'\n\t\t\t昵称:  {reply_name}   性别:  {reply_sex}\t个性标签:  {reply_sign}\n\t\t\tUID:{reply_uid}\n\t\t\t评论:  {reply_message}')
            file.write(
                '\n*********************************************************************************************************')
            file.write('\n\n\n')


def get_response(urls):
    headers = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.124 '
        'Safari/537.36 Edg/102.0.1245.41',
        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 "
        "Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 "
        "Safari/537.75.14",
        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    ]
    cookie = "buvid3=2E4E4E5E-27A6-6A1D-42D4-924871F88FA520575infoc; i-wanna-go-back=-1; _uuid=D67110931-8DA5-8410E-94AF-1D7E1110A66D320755infoc; buvid4=F82E83DB-11DE-9169-3ABA-D49868954FFF21655-022060401-tnVc6wLeIYviJecGNRjhVg%3D%3D; nostalgia_conf=-1; CURRENT_BLACKGAP=0; fingerprint=ade468f62bacff496254e1cd6685a12a; buvid_fp_plain=undefined; SESSDATA=94776347%2C1669829549%2C960b6%2A61; bili_jct=e0047cd3733fbecce02142c879bf2d9f; DedeUserID=297704642; DedeUserID__ckMd5=6a2e18b325933872; sid=atmg2c94; buvid_fp=ade468f62bacff496254e1cd6685a12a; rpdid=|(J~R~|Y|)YR0J'uYlRk~~YRm; b_ut=5; LIVE_BUVID=AUTO9616554750049862; is-2022-channel=1; CURRENT_QUALITY=0; blackside_state=0; b_lsid=CE1010A2A4_1817726D2D0; innersign=1; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_2E4E4E5E%22%3A%221817726D5FD%22%2C%22333.788.fp.risk_2E4E4E5E%22%3A%221817726EDAE%22%7D%7D; PVID=4; bp_video_offset_297704642=673085623853121700; CURRENT_FNVAL=4048"
    referer = 'https://www.bilibili.com/ '
    header = {
        'user-agent': random.choice(headers),
        'cookie': cookie,
        'referer': referer
    }
    response = requests.get(url=urls, headers=header)
    return response


def get_data(urls, address):
    response = get_response(urls).text
    # 使用正则表达式获取主要数据
    info = re.findall('<script>window.__playinfo__=(.*?)</script>', response)[0]
    global title
    # 获取视频名称 ==》 为之后保存的文件命名
    title = re.findall('<h1 title="(.*?)" class="video-title tit">', response)[0]
    # print(response)
    json_data = json.loads(info)
    # pprint.pprint(json_data)
    video_url = json_data['data']['dash']['video'][0]['baseUrl']
    audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
    # print(video_url)
    # print(audio_url)
    video_content = get_response(video_url).content
    audio_content = get_response(audio_url).content
    # 保存到文件
    with open(address + title + '.mp4', 'wb') as f:
        f.write(video_content)
    with open(address + title + '.mp3', 'wb') as f:
        f.write(audio_content)
    # 使用 ffmpeg 合并音频、视频
    # D:\pycharm\ffmpeg-4.3.1-2021-01-01-essentials_build\bin\ffmpeg.exe 是 ffmpeg.exe 地址
    # -i "{address}{title}.mp3" -i "{address}{title}.mp4" 需要合并的 MP3 和 MP4 地址
    # "{address}{title}-.mp4" 最终合成的视频、音频保存到改地址
    cmd = fr'"D:\pycharm\ffmpeg-4.3.1-2021-01-01-essentials_build\bin\ffmpeg.exe" -i "{address}{title}.mp3" -i "{address}{title}.mp4" -acodec copy -vcodec copy "{address}{title}-.mp4"'
    subprocess.call(cmd, shell=True)
    # 删除单独的音频
    os.remove(fr"{address}{title}.mp3")
    # 删除单独的视频
    os.remove(fr"{address}{title}.mp4")

# 视频保存main方法
def main(code, address):
    # code: 复制网页地址后缀
    url = 'https://www.bilibili.com/video/' + code
    # 对 address 进行调整
    address = (address + "\\").replace('"', '')
    # 调用 get_data 方法保存视频
    get_data(url, address)


class Ui_Form(object):
    def setupUi(self, Form):
        Form.setObjectName("Form")
        Form.resize(720, 491)
        font = QtGui.QFont()
        font.setFamily("方正姚体")
        font.setPointSize(12)
        Form.setFont(font)
        self.pushButton = QtWidgets.QPushButton(Form)
        self.pushButton.setGeometry(QtCore.QRect(590, 230, 101, 41))
        font = QtGui.QFont()
        font.setFamily("华文行楷")
        font.setPointSize(15)
        self.pushButton.setFont(font)
        self.pushButton.setObjectName("pushButton")
        self.lineEdit = QtWidgets.QLineEdit(Form)
        self.lineEdit.setGeometry(QtCore.QRect(20, 180, 541, 41))
        font = QtGui.QFont()
        font.setFamily("华文行楷")
        font.setPointSize(15)
        font.setBold(False)
        font.setWeight(50)
        font.setKerning(True)
        self.lineEdit.setFont(font)
        self.lineEdit.setObjectName("lineEdit")
        self.label = QtWidgets.QLabel(Form)
        self.label.setGeometry(QtCore.QRect(0, 0, 721, 491))
        self.label.setText("")
        # 背景图片
        self.label.setPixmap(QtGui.QPixmap("D:/Desktop/eclipse-java-2020-06-R-win32-x86_64/resource/bilibili.gif"))
        self.label.setScaledContents(True)
        self.label.setObjectName("label")
        self.label_2 = QtWidgets.QLabel(Form)
        self.label_2.setGeometry(QtCore.QRect(70, 40, 581, 91))
        font = QtGui.QFont()
        font.setFamily("华文彩云")
        font.setPointSize(43)
        font.setBold(False)
        font.setItalic(False)
        font.setWeight(50)
        self.label_2.setFont(font)
        self.label_2.setObjectName("label_2")
        self.lineEdit_2 = QtWidgets.QLineEdit(Form)
        self.lineEdit_2.setGeometry(QtCore.QRect(20, 230, 541, 41))
        font = QtGui.QFont()
        font.setFamily("华文行楷")
        font.setPointSize(15)
        font.setBold(False)
        font.setWeight(50)
        font.setKerning(True)
        self.lineEdit_2.setFont(font)
        self.lineEdit_2.setObjectName("lineEdit_2")
        self.lineEdit_3 = QtWidgets.QLineEdit(Form)
        self.lineEdit_3.setGeometry(QtCore.QRect(20, 310, 541, 41))
        font = QtGui.QFont()
        font.setFamily("华文行楷")
        font.setPointSize(15)
        self.lineEdit_3.setFont(font)
        self.lineEdit_3.setObjectName("lineEdit_3")
        self.pushButton_2 = QtWidgets.QPushButton(Form)
        self.pushButton_2.setGeometry(QtCore.QRect(590, 310, 101, 41))
        font = QtGui.QFont()
        font.setFamily("华文行楷")
        font.setPointSize(15)
        self.pushButton_2.setFont(font)
        self.pushButton_2.setObjectName("pushButton_2")
        self.label.raise_()
        self.lineEdit.raise_()
        self.pushButton.raise_()
        self.label_2.raise_()
        self.lineEdit_2.raise_()
        self.lineEdit_3.raise_()
        self.pushButton_2.raise_()

        self.retranslateUi(Form)
        self.pushButton.clicked.connect(lambda: self.download())
        self.pushButton_2.clicked.connect(lambda: self.save())
        QtCore.QMetaObject.connectSlotsByName(Form)

    # 设置按钮和文本框属性
    def retranslateUi(self, Form):
        _translate = QtCore.QCoreApplication.translate
        Form.setWindowTitle(_translate("Form", "Form"))
        self.pushButton.setText(_translate("Form", "视频下载"))
        self.lineEdit.setText(_translate("Form", "下载视频保存地址"))
        self.label_2.setText(_translate("Form", "屁颠儿屁颠儿"))
        self.lineEdit_2.setText(_translate("Form", "预下载视频后缀"))
        self.lineEdit_3.setText(_translate("Form", "热门评论URL"))
        self.pushButton_2.setText(_translate("Form", "评论保存"))

    # 给下载按钮download方法
    def download(self):
        address = self.lineEdit.text()
        code_ = self.lineEdit_2.text()
        main(code_, address)
        # 设置下载完成提示
        self.lineEdit_2.setText("下载完成")

    # 评论保存功能按钮
    def save(self):
        urls = self.lineEdit_3.text()
        filepath = self.lineEdit.text()
        address = (filepath + "/").replace('"', '')
        global title
        filename = title
        get_data0(urls, address, filename)
        # 设置评论保存成功提示
        self.lineEdit_3.setText("评论保存成功")


if __name__ == '__main__':
    app = QtWidgets.QApplication(sys.argv)
    widget = QtWidgets.QWidget()
    ui = Ui_Form()  # 类名
    ui.setupUi(widget)
    widget.show()

    sys.exit(app.exec_())

最后使用pyinstaller打包成exe文件:

pyinstaller -F -w -i "resources\logo.ico" Pidian.py

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值