爬虫实战29:爬取哔哩哔哩网站视频信息

# coding:utf-8
import requests
import json
import time
import pymysql
import bs4
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
result = []


def get_aid(page):
    url = 'https://search.bilibili.com/all?keyword=爬虫&from_source=nav_search&spm_id_from=333.851.b_696e7465726e6174696f6e616c486561646572.11' + '&page=' + str(page)
    response = requests.get(url, headers=headers, verify=False).text
    time.sleep(1)
    try:
        soup = bs4.BeautifulSoup(response, 'lxml').find('div', attrs={'id':'all-list'}).find('div', attrs={'class':'mixin-list'})
        ul = soup.find('ul', attrs={'class':'video-list clearfix'}).find_all('li', attrs={'class':'video-item matrix'})
        for item in ul:
            # print(item)
            info = item.find('div', attrs={'class': 'headline clearfix'}).find('span', attrs={'class': 'type avid'}).get_text()
            aid = info.replace('av', '')
            print(aid)
            result.append(aid)
        return result
    except:
        print('something is wrong')


def get_contents(url):
    response = requests.get(url=url, headers=headers, verify=False).json()
    time.sleep(1)
    try:
        data_1 = response['data']
        data = data_1['stat']
        aid = data['aid']
        view = data['view']
        coin = data['coin']
        like = data['like']
        favorite = data['favorite']
        share = data['share']
        danmaku = data['danmaku']

        print('视频编号', aid)
        print('观看数量', view)
        print('投币数量', coin)
        print('收藏数量', favorite)
        print('点赞数量', like)
        print('分享数量', share)
        print('弹幕数量', danmaku)

    except:
        print('------------')


if __name__ == '__main__':
    for i in range(1, 50):
        result = get_aid(i)
    for i in result:
        url = 'https://api.bilibili.com/x/web-interface/view?aid=' + str(i)
        get_contents(url)
    

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值