python爬取b站排行榜_抓取+硒元素，获得Bilibili排行榜（紧急列表）（动态加载）,scrapyselenium,获取,哔哩,应援...

最新推荐文章于 2021-03-26 17:23:08 发布

weixin_39635648

最新推荐文章于 2021-03-26 17:23:08 发布

阅读量196

点赞数

文章标签： python爬取b站排行榜

目标数据：

watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0ZlcmVuY3o=,size_16,color_FFFFFF,t_70

爬虫代码：

# -*- coding: utf-8 -*-

import scrapy

from bilibili_yy.items import BilibiliYyItem

import re

from selenium import webdriver

import pyperclip

class BiliSpider(scrapy.Spider):

name = 'bili'

# allowed_domains = ['manga.bilibili.com']

start_urls = ['https://manga.bilibili.com/ranking?from=manga_homepage#/ouenn/']

def __init__(self):

self.driver = webdriver.Chrome()

def parse(self, response):

item = BilibiliYyItem()

for data_s in response.xpath('//div[@class="rank-item dp-i-block border-box p-relative"]'):

pmqingkuang = data_s.xpath('.//div[starts-with(@class,"rank-movement p-absolute bg-center bg-cover bg-no-repeat")]/@class').extract()[0]

if len(data_s.xpath('.//span[starts-with(@class,"digit-item bg-center bg-contain bg-no-repeat dp-i-block digit-")]')) == 2:

item['paiming'] = re.findall(r"\d", data_s.xpath('.//span[starts-with(@class,"digit-item bg-center bg-contain bg-no-repeat dp-i-block digit-")]/@class').extract()[0])[0]+ re.findall(r"\d", data_s.xpath('.//span[starts-with(@class,"digit-item bg-center bg-contain bg-no-repeat dp-i-block digit-")]/@class').extract()[1])[0]

else:

item['paiming'] = re.findall(r"\d", data_s.xpath('.//span[starts-with(@class,"digit-item bg-center bg-contain bg-no-repeat dp-i-block digit-")]/@class').extract()[0])[0].zfill(2)

if 'hold' in pmqingkuang:

item['pmqingkuang'] = '保持'

elif 'up' in pmqingkuang:

item['pmqingkuang'] = '上升'

else:

item['pmqingkuang'] = '下降'

item['pic_link'] = data_s.xpath('.//div[starts-with(@class,"manga-cover bg-center bg-cover bg-no-repeat")]/@data-src').extract()[0]

item['cartoon_link'] ='https://manga.bilibili.com'+ data_s.xpath('.//a[starts-with(@class,"dp-block manga-title")]/@href').extract()[0]

item['name'] = data_s.xpath('.//a[starts-with(@class,"dp-block manga-title")]/text()').extract()[0]

item['author'] = data_s.xpath('.//p[@class="fans-author-text t-over-hidden t-no-wrap"]/text()').extract()[0]

item['fensizhi'] = data_s.xpath('.//p[@class="fans-value"]/text()').extract()[0].replace(' 万粉丝值','')

if data_s.xpath('.//div[@class="award-user-ctnr p-absolute w-100"]/div[2]/@title'):

item['zhugong1'] = data_s.xpath('.//div[@class="award-user-ctnr p-absolute w-100"]/div[2]/@title').extract()[0]

else:

item['zhugong1'] = ''

if data_s.xpath('.//div[@class="award-user-ctnr p-absolute w-100"]/div[3]/@title'):

item['zhugong2'] = data_s.xpath('.//div[@class="award-user-ctnr p-absolute w-100"]/div[3]/@title').extract()[0]

else:

item['zhugong2'] = ''

if data_s.xpath('.//div[@class="award-user-ctnr p-absolute w-100"]/div[4]/@title'):

item['zhugong3'] = data_s.xpath('.//div[@class="award-user-ctnr p-absolute w-100"]/div[4]/@title').extract()[0]

else:

item['zhugong3'] = ''

yield item

def close_spider(self,spider):

print('关闭浏览器对象')

self.driver.quit()

写出mongo：

watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0ZlcmVuY3o=,size_16,color_FFFFFF,t_70

全部文件下载：

weixin_39635648

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
python爬取b站排行榜_抓取+硒元素，获得Bilibili排行榜（紧急列表）（动态加载）,scrapyselenium,获取,哔哩,应援...

目标数据：爬虫代码：# -*- coding: utf-8 -*-import scrapyfrom bilibili_yy.items import BilibiliYyItemimport refrom selenium import webdriverimport pyperclipclass BiliSpider(scrapy.Spider):name = 'bili'# allowed_...
复制链接

扫一扫