python爬取b站视频封面_学习笔记(4)[Python爬虫]:爬取B站搜索界面的所有视频的封面...

学习笔记(4)[Python爬虫]:爬取B站搜索界面的所有视频的封面

import os

import requests

import re

import json

from bs4 import BeautifulSoup

headers = {

'Host': 'search.bilibili.com',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0',

'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',

'Accept-Encoding': 'gzip, deflate, br',

'Connection': 'keep-alive',

'Cookie': "sid=9929ha37; _uuid=FC54B86B-0908-ABD2-3477-79EDAABB172C51786infoc; buvid3=293ACCA2-296A-4737-9E10-FDB90441803453950infoc; DedeUserID=5200237; DedeUserID__ckMd5=0edb78f23ca63f84; SESSDATA=b1fd4a18%2C1602645457%2C7d0e9*41; bili_jct=e57309d60e6d25f7c0da09fbf3c84007; PVID=2; CURRENT_FNVAL=16; LIVE_BUVID=AUTO8915871711402137; rpdid=|(umu)R~RkRJ0J'ul)~u~umuR; bp_t_offset_5200237=384977950470039482; dy_spec_agreed=1; bsource=seo_baidu; arrange=matrix",

'Upgrade-Insecure-Requests': '1',

'Cache-Control': 'max-age=0',

'TE': 'Trailers'

}

table='fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF'

tr={}

for i in range(58):

tr[table[i]]=i

s=[11,10,3,8,4,6]

xor=177451812

add=8728348608

def dec(x):

r=0

for i in range(6):

r+=tr[x[s[i]]]*58**i

return (r-add)^xor

def enc(x):

x=(x^xor)+add

r=list('BV1  4 1 7  ')

for i in range(6):

r[s[i]]=table[x//58**i%58]

return ''.join(r)

def downloadCover(bvid):

avid = str(dec(bvid))

print(avid)

videoUrl = 'https://api.bilibili.com/x/web-interface/view?aid=' + avid

videoInfo = requests.get(videoUrl)

videoJson = json.loads(videoInfo.text)

picSrc = videoJson['data']['pic']

imgInfo = requests.get(picSrc)

if not os.path.exists('./image'):

os.mkdir('./image')

with open('./image/' + str(bvid) + '.jpg','wb') as f:

f.write(imgInfo.content)

if __name__ == '__main__':

print("请输入搜索关键词")

keyword = str(input())

print("请输入查询页面的范围")

pageRange = input().split()

pageMin = int(pageRange[0])

pageMax = int(pageRange[1])

for index in range(pageMin,pageMax+1):

url = 'https://search.bilibili.com/all?keyword=' + keyword + \

'&page=' + str(index)

print(url)

htmlInfo = requests.get(url, headers = headers)

soup = BeautifulSoup(htmlInfo.text, 'html.parser')

print(soup)

videoList = soup.find_all(attrs={'href':re.compile('BV')})

print(videoList)

bvidList= list()

for item in videoList:

href = str(item.get('href'))

print(href)

bvid = re.findall(r'BV(.+?)\?',href)

bvidList.append('BV' + bvid[0])

downloadCover('BV' + bvid[0])

print(bvidList)

本文禁止转载或摘编

--

--

--

分享到:

投诉或建议

评论

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值