一、爬取弹幕
import requests
import json
import re
def download_page(url):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
res = requests.get(url,headers)
return res
def get_cid(av):
"""
:param av:B站视频的av号 如:https://www.bilibili.com/video/av95811021
:return:cid
"""
av = av.strip('av')
url = f'https://api.bilibili.com/x/player/pagelist?aid={av}&jsonp=jsonpa'
res = download_page(url)
res_text = res.text
res_dict = json.loads(res_text)
cid = res_dict['data'][0]['cid']
return cid
def get_danmu(cid):
"""
:param cid: 获取弹幕所需的id
:return: 弹幕列表
"""
url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={cid}'
res = download_page(url)
res_xml = res.content.decode('utf-8')
pattern = re.compile('<d.*?>(.*?)</d>')
danmu_list =