京东电饭锅页面爬虫

京东电饭锅页面爬虫

from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time

# 1.创建浏览器
# 1)创建配置对象
options = ChromeOptions()
# 2)添加配置
# 取消图片加载,提高速度
options.add_argument('blink-settings=imagesEnabled=false')
# 设置取消测试环境
options.add_experimental_option('excludeSwitches', ['enable-automation'])

# 2)通过配置对象创建浏览器对象
b = Chrome(options=options)

# 2.打开网页
b.get('https://search.jd.com/Search?keyword=%E7%94%B5%E9%A5%AD%E9%94%85&enc=utf-8&wq=%E7%94%B5%E9%A5%AD%E9%94%85&pvid=388bf700598a4288b08f0bd375c4f93e')
time.sleep(1)

for page in range(10):
    # 3.滚动页面
    for _ in range(10):
        b.execute_script('window.scrollBy(0, 800)')
        time.sleep(1)

    # 4.解析数据
    soup = BeautifulSoup(b.page_source, 'lxml')
    goods_div = soup.select('#J_goodsList .gl-i-wrap')
    for div in goods_div:
        name_a = div.select_one('.p-name a')
        name = name_a.text
        goods_url = 'https:' + name_a.attrs['href']
        price = div.select_one('.p-price i').text.strip()
        commit = div.select_one('.p-commit a').text.strip()
        shop_a = div.select_one('.p-shop a')
        shop_name = shop_a.text
        shop_url = 'https:' + shop_a.attrs['href']
        print(name, goods_url, price, commit, shop_name, shop_url)
    print('---------------------1页数据解析完成----------------------')
    # 5. 翻页
    next = b.find_element(By.CLASS_NAME, 'pn-next')
    next.click()
    time.sleep(3)


input('end:')

京东电饭锅详情页爬虫

from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
import time
from bs4 import BeautifulSoup

options = ChromeOptions()
# 不加载图片, 提升速度
options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
# 设置取消测试环境
options.add_experimental_option('excludeSwitches', ['enable-automation'])
b = Chrome(options=options)
b.get('https://search.jd.com/Search?keyword=%E7%94%B5%E9%A5%AD%E9%94%85&enc=utf-8&wq=%E7%94%B5%E9%A5%AD%E9%94%85&pvid=388bf700598a4288b08f0bd375c4f93e')

# 获取所有商品详情的超链接
all_goods_a = b.find_elements(By.CSS_SELECTOR, '#J_goodsList .gl-i-wrap>.p-img>a')

for a in all_goods_a:
    a.click()
    time.sleep(1)

    # 切换到详情页
    b.switch_to.window(b.window_handles[-1])
    time.sleep(2)

    # 滚动详情页,然后点击'商品评价'
    b.execute_script('window.scrollBy(0, 900)')
    time.sleep(1)

    comment = b.find_elements(By.CSS_SELECTOR, '#detail>.tab-main>ul>li')[-2]
    comment.click()

    time.sleep(2)

    print(b.page_source)
    print('================================获取完一个商品===============================')

    b.close()
    b.switch_to.window(b.window_handles[0])


input('end:')
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值