#bilibili弹幕抓取
import requests
import re
from bs4 import BeautifulSoup
import operator#排序
def getHTMLText(url):
try:
print("获取url中...")
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'}
r = requests.get(url,headers=headers)
print("获取url完成")
return r.text
except:
print("获取Url失败")
def parsePage(text):
try:
print("解析文本...")
keyStr = re.findall(r'upgcxcode/78/39/[\d]*',text)#B站有两种寻址方式,第二种多一些
key = eval(keyStr[0].split('/')[3])
print(key)
commentUrl = 'https://api.bilibili.com/x/v1/dm/list.so?oid=' + str(key) # 弹幕存储地址
res=requests.get(commentUrl)
res.encoding = 'utf-8'
commentText=res.text
soup = Beautiful
利用av号爬取Bilibili视频弹幕
最新推荐文章于 2024-05-30 22:18:55 发布