python获取b站视频封面及弹幕

利用python获取b站视频封面及弹幕

获取弹幕

从https://api.bilibili.com/x/v1/dm/list.so?oid=286266029k可以看到弹幕列表
在这里插入图片描述
该网页为xml格式,利用etree.parser,xpath解析获取弹幕列表
从https://api.bilibili.com/x/player/pagelist?bvid=BV1eh41127Ma&jsonp=jsonp中找到了oid(/cid)
在这里插入图片描述
利用
data = json.loads(req.text)
对返回解析,得到一个python字典对象?该字典data项为一列表,该列表cid项即为cid
data = data[‘data’][0][‘cid’]

def jx(aid, oid):
    url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=' + oid
    head = {
        'Host': 'api.bilibili.com',
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/69.0.3497.92 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': ''

    }
    rp = requests.get(url, head)
    with open('bilibili.xml', 'wb') as f:
        f.write(rp.content)
    ht = etree.parse('bilibili.xml', etree.HTMLParser())

    results = ht.xpath('//d//text()')
    with open(path.get() + '/' + 'bilibili' + aid + '.txt', 'w', encoding='utf-8') as f:
        for i in range(len(results)):
            f.write(results[i] + '\n')

            # f.write(bytes(results,encoding='utf-8'))

    print(results)


def get_oid(id,kind):
    creat_path()
    if(kind == 'av'):
     abu = 'https://api.bilibili.com/x/player/pagelist?aid=' + str(id) + '&jsonp=jsonp'
    else:
     abu = 'https://api.bilibili.com/x/player/pagelist?bvid=' + str(id) + '&jsonp=jsonp'
    h = {
        'Host': 'api.bilibili.com',
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/69.0.3497.92 Safari/537.36',
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': ''
    }

    req = requests.get(abu, h)
    data = json.loads(req.text)
    # print(req.text)
    data = data['data'][0]['cid']
    # print(data)
    # return data
    jx(id, str(data))

获取封面

于视频主页的html中的head可以看到图片链接地址。
(部分视频有请求错误,原因可能是请求头有问题。b反扒机制)

def imaged(bv,kind):
    #获取封面,
    creat_path()
    #urlb = 'https://search.bilibili.com/all?keyword=' +bv +'&from_source=nav_search_new'
    if (kind == 'av'):
        url = 'https://www.bilibili.com/video/av' + bv + '/'
    else:
        url = 'https://www.bilibili.com/video/BV' + bv + '/'
    h = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"
            }

    resp = requests.get(url, h)
    #print(resp.text)

    soup = bs4.BeautifulSoup(resp.text, 'html.parser')
    # links = soup.find_all('head',meta = '')
    for i in soup.find_all('meta', itemprop="image"):
        #print(i[0])
        # print(i['content'])
        p_url = i['content']
        #print('Urlll' + p_url)
        #print(path.get() + '/' + bv + p_url[p_url.rfind('.'):])
        urllib.request.urlretrieve(p_url, path.get() + '/' + bv + p_url[p_url.rfind('.'):])

完整代码

import requests
from lxml import etree
import json
import bs4
import urllib.request
import tkinter
from tkinter import filedialog
from tkinter import ttk
import os


def creat_path():
    if not os.path.exists(path.get()):
        os.makedirs(path.get())


def jx(aid, oid):
    url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=' + oid
    head = {
        'Host': 'api.bilibili.com',
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/69.0.3497.92 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': ''

    }
    rp = requests.get(url, head)
    with open('bilibili.xml', 'wb') as f:
        f.write(rp.content)
    ht = etree.parse('bilibili.xml', etree.HTMLParser())

    results = ht.xpath('//d//text()')
    with open(path.get() + '/' + 'bilibili' + aid + '.txt', 'w', encoding='utf-8') as f:
        for i in range(len(results)):
            f.write(results[i] + '\n')

            # f.write(bytes(results,encoding='utf-8'))

    print(results)


def get_oid(id,kind):

    creat_path()
    if(kind == 'av'):
        # 获取视频oid,根据av,bv不同,从不同链接地址获取
     abu = 'https://api.bilibili.com/x/player/pagelist?aid=' + str(id) + '&jsonp=jsonp'
    else:
     abu = 'https://api.bilibili.com/x/player/pagelist?bvid=' + str(id) + '&jsonp=jsonp'
    h = {
        'Host': 'api.bilibili.com',
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/69.0.3497.92 Safari/537.36',
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': ''
    }

    req = requests.get(abu, h)
    data = json.loads(req.text)
    # print(req.text)
    data = data['data'][0]['cid']
    # print(data)
    # return data
    jx(id, str(data))


def imaged(bv,kind):
    #获取封面,
    creat_path()
    #urlb = 'https://search.bilibili.com/all?keyword=' +bv +'&from_source=nav_search_new'
    if (kind == 'av'):
        url = 'https://www.bilibili.com/video/av' + bv + '/'
    else:
        url = 'https://www.bilibili.com/video/BV' + bv + '/'
    h = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"
            }

    resp = requests.get(url, h)
    #print(resp.text)

    soup = bs4.BeautifulSoup(resp.text, 'html.parser')
    # links = soup.find_all('head',meta = '')
    for i in soup.find_all('meta', itemprop="image"):
        #print(i[0])
        # print(i['content'])
        p_url = i['content']
        #print('Urlll' + p_url)
        #print(path.get() + '/' + bv + p_url[p_url.rfind('.'):])
        urllib.request.urlretrieve(p_url, path.get() + '/' + bv + p_url[p_url.rfind('.'):])


def sel_path():
    path_ = tkinter.filedialog.askdirectory()
    path.set(path_)


if __name__ == '__main__':
    root = tkinter.Tk()
    root.geometry('250x150')
    path = tkinter.StringVar(root, value="D:/APC")

    LessonChosen = tkinter.ttk.Combobox(width=3,state='readonly')
    LessonChosen['values'] = ('BV','av')  # 设置下拉列表的值
    LessonChosen.grid(column=2, row=0)  # 设置其在界面中出现的位置  column代表列   row 代表行
    LessonChosen.current(0)
    lain = tkinter.Label(text='视频号')
    buttonDm = tkinter.Button(root, text='弹幕', command=lambda: get_oid(inid.get(),LessonChosen.get()))
    inid = tkinter.Entry()
    buttonFm = tkinter.Button(root, text='封面', command=lambda: imaged(inid.get(),LessonChosen.get()))
    lain.grid(row=0, column=1)
    inid.grid(row=0, column=0)
    buttonDm.grid(row=5, column=0)
    buttonFm.grid(row=5, column=1)
    tkinter.Entry(textvariable=path).grid(row=4, column=0)
    tkinter.Button(text='路径选择', command=sel_path).grid(row=4, column=1)

    root.mainloop()
  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值