利用Python爬取B站视频
主要参考lancely、温欣爸比、纯洁的微笑、码农家园的四篇博文。
1.爬取的python程序
以下为程序总体,从码农家园摘录而来,其中涉及到队列的问题可以参考纯洁的微笑
import requests,threading,re,json,os,time
from lxml import etree
from queue import Queue
headers = {
'Connection': 'keep-alive',
'Referer': 'https://www.bilibili.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
video_queue = Queue(100)
def single_data(url):
resp = requests.get(url,headers=headers)
html = etree.HTML(resp.text)
title = html.xpath('//div[@id="viewbox_report"]/h1/@title')[0]
print('下载:',title)
data = re.search(r'__playinfo__=(.*?)</script><script>',resp.text).group(1)
data = json.loads(data)
try:
time = data['data']['dash']['duration']