selenium 淘宝实例

# selenium本身自带这种幽灵浏览器/无头浏览器

import time
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from multiprocessing import Pool


class TaoBao(object):
    options = webdriver.FirefoxOptions()
    options.headless = True

    def __init__(self):
        self.start_url = "https://www.taobao.com/"
        self.list_driver = webdriver.Firefox(firefox_options=self.options)
        self.detail_driver = webdriver.Firefox(firefox_options=self.options)
        # self.pool = pool

    def start(self, url):
        self.list_driver.get(url)
        search_input = WebDriverWait(self.list_driver, 15).until(
            lambda list_driver: list_driver.find_element_by_id('q'))
        search_input.send_keys('笔记本电脑')
        self.list_driver.find_element_by_class_name('btn-search').click()

        # 获取列表页源代码,获取前休眠
        # time.sleep(0.5)

        # self.list_driver.page_source
        self.parse_list()

    def get_list(self, url):
        self.list_driver.get(url)
        # self.parse_list()

    def parse_list(self, html=None):

        # 查找下一页的标签。
        url = WebDriverWait(self.list_driver, 15).until(
            lambda list_driver: list_driver.find_element_by_css_selector('.next > a'))
        url = url.get_attribute('href')
        # 当第一页的数据提取完毕,开始进行第二页的访问时,将第二页的请求,放入进程池。
        # apply_async()添加异步任务的方法
         pool = Pool(1)
        pool.apply_async(self.get_list, args=(url,), callback=self.parse_list)
        pool.close()
        pool.join()

        print('------')

        # self.pool.close()
        # self.pool.join()

        # 提取付款人数,详情页的连接
        # pay_nums = WebDriverWait(self.list_driver, 15).until(lambda list_driver: list_driver.find_elements_by_class_name('deal-cnt'))
        #
        # for pay_num in pay_nums:
        #     pay_num = pay_num.text
        #     detail_url = WebDriverWait(self.list_driver, 15).until(
        #     lambda list_driver: list_driver.find_element_by_css_selector('.ctx-box .title .J_ClickStat'))
        #     detail_url = detail_url.get_attribute('href')
        #
        #     # self.pool.apply_async(self.get_detail, args=(detail_url, ), callback=self.save_mongo)
        #     self.get_detail(detail_url, pay_num)

        # self.get_list(url)

    def get_detail(self, detail_url, pay_num):
        self.detail_driver.get(detail_url)
        print(pay_num)

        num_list = []
        nums = WebDriverWait(self.detail_driver, 15).until(
            lambda detail_driver: detail_driver.find_elements_by_class_name('tm-count'))
        for num in nums:
            num_list.append(num.text)

        result_dict = {
            'pay_num': pay_num,
            'num': num_list
        }
        return result_dict

    def save_mongo(self, data):
        print('保存数据', data)


if __name__ == '__main__':
    # pool = Pool()
    # 默认是主进程
    taobao = TaoBao()
    taobao.start(taobao.start_url)

    # pool.close()
    # pool.join() # 让主进程等待子进程任务执行完毕以后,主进程再退出。
    print('结束')



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值