# coding:utf-8
import requests
import json
import time
import pymysql
import bs4
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
result = []
def get_aid(page):
url = 'https://search.bilibili.com/all?keyword=爬虫&from_source=nav_search&spm_id_from=333.851.b_696e7465726e6174696f6e616c486561646572.11' + '&page=' + str(page)
response = requests.get(url, headers=headers, verify=False).text
time.sleep(1)
try:
soup = bs4.BeautifulSoup(response, 'lxml').find('div', attrs={'id':'all-list'}).find('div', attrs={'class':'mixin-list'})
ul = soup.find('ul', attrs={'class':'video-list clearfix'}).find_all('li', attrs={'class':'video-item matrix'})
for item in ul:
# print(item)
info = item.find('div', attrs={'class': 'headline clearfix'}).find('span', attrs={'class': 'type avid'}).get_text()
aid = info.replace('av', '')
print(aid)
result.append(aid)
return result
except:
print('something is wrong')
def get_contents(url):
response = requests.get(url=url, headers=headers, verify=False).json()
time.sleep(1)
try:
data_1 = response['data']
data = data_1['stat']
aid = data['aid']
view = data['view']
coin = data['coin']
like = data['like']
favorite = data['favorite']
share = data['share']
danmaku = data['danmaku']
print('视频编号', aid)
print('观看数量', view)
print('投币数量', coin)
print('收藏数量', favorite)
print('点赞数量', like)
print('分享数量', share)
print('弹幕数量', danmaku)
except:
print('------------')
if __name__ == '__main__':
for i in range(1, 50):
result = get_aid(i)
for i in result:
url = 'https://api.bilibili.com/x/web-interface/view?aid=' + str(i)
get_contents(url)
爬虫实战29:爬取哔哩哔哩网站视频信息
最新推荐文章于 2024-06-26 09:51:11 发布