2021-03-24

爬取豆瓣

from selenium import webdriver
import time
from pyecharts.charts import Bar
from pyecharts import options as opts

class DouBanSpider():

    def __init__(self):
        self.driver = webdriver.Chrome()
        self.url = 'https://movie.douban.com/'
        self.dicts = {9:0,8:0,7:0,6:0,5:0,4:0,3:0,2:0,1:0,0:0}
    def open(self, url):
        self.driver.get(url)
    
    def get_tag_by_id(self, id):
        return self.driver.find_element_by_id(id)
    
    def get_tag_by_cn(self, cn):
        return self.driver.find_element_by_class_name(cn)
    
    def set_keys(self, tag, value):
        tag.send_keys(value)
    
    def parse_content(self):
        div_list = self.driver.find_elements_by_xpath('//div[contains(@class,"sc-bZQynM")]')
        # info = ''
        # pl = ''
        # rating_num = 0
        for div in div_list:
            title = div.find_element_by_xpath('.//div[@class="title"]/a').text
            try:
                rating_num = div.find_element_by_class_name('rating_nums')
                rating_num = rating_num.text
            except Exception as e:
                rating_num = 0
            pl = div.find_element_by_class_name('pl').text
            info = div.find_element_by_xpath('.//div[@class="item-root"]//div[@class="meta abstract"]').text
            action = div.find_element_by_xpath('.//div[@class="item-root"]//div[@class="meta abstract_2"]').text
            print(title, rating_num, pl, info,action)
            self.cal_rating(float(rating_num))
    
        # self.paint_table()  # 爬完绘制
        try:
            next = self.driver.find_element_by_xpath('//div[contains(@class,"paginator")]//a[last()]')
            next.click()
            self.parse_content()
        except Exception as e:
            self.paint_table() # 爬完绘制
            self.driver.close()


    def cal_rating(self,grade):
        if grade >= 9:
            self.dicts[9] = self.dicts[9]+1
        elif grade >= 8 and grade < 9:
            self.dicts[8] = self.dicts[8] + 1
        elif grade >= 7 and grade < 8:
            self.dicts[7] = self.dicts[7] + 1
        elif grade >= 6 and grade < 7:
            self.dicts[6] = self.dicts[6] + 1
        elif grade >= 5 and grade < 6:
            self.dicts[5] = self.dicts[5] + 1
        elif grade >= 4 and grade < 5:
            self.dicts[4] = self.dicts[4] + 1
        elif grade >= 3 and grade < 4:
            self.dicts[3] = self.dicts[3] + 1
        elif grade >= 2 and grade < 3:
            self.dicts[2] = self.dicts[2] + 1
        elif grade >= 1 and grade < 2:
            self.dicts[1] = self.dicts[1] + 1
        else:
            self.dicts[0] = self.dicts[0] + 1
    
    def paint_table(self):
        bar = (
            Bar()
                .add_xaxis([str(key) for key in self.dicts.keys()])
                .add_yaxis("xxx", list(self.dicts.values()))
                .set_global_opts(title_opts=opts.TitleOpts(title="某商场销售情况"))
        )
        bar.render()
    def start(self):
        self.open(self.url)
        time.sleep(1)
        inp = self.get_tag_by_id('inp-query')
        # val = input('请输入你要爬取的电影明星')
        val = '成龙'
        self.set_keys(inp, val)
        btn = self.get_tag_by_cn('inp-btn')
        btn.click()
        time.sleep(2)
        self.parse_content()
        
    if __name__ == '__main__':
        dbs = DouBanSpider()
        dbs.start()  
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值