day19-selenium作业

最新推荐文章于 2024-07-10 09:15:55 发布

FXGBG

最新推荐文章于 2024-07-10 09:15:55 发布

阅读量105

点赞数

分类专栏：学习文章标签： python

本文链接：https://blog.csdn.net/FXGBG/article/details/129888566

版权

学习专栏收录该内容

59 篇文章 0 订阅

订阅专栏

使用selenium抓取京东数据

from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from csv import writer
import time, os

# 创建浏览器对象
b = Chrome()

# 获取网页
b.get(r'https://search.jd.com/Search?keyword=%E7%94%B5%E9%A5%AD%E9%94%85&enc=utf-8&wq=%E7%94%B5%E9%A5%AD%E9%94%85&pvid=20d97125d00a409fb95d2735aeb0a7c6')


def goods(p=1):
    gl = []
    for i in range(p):
        # 预先加载翻页
        for x in range(7):
            b.execute_script('window.scrollBy(0, 1000)')
            time.sleep(1)

        # 获取商品列表
        time.sleep(5)
        goods_list = b.find_elements(By.CSS_SELECTOR, '#J_goodsList>ul>li>div')

        for x in goods_list:
            # 获取商品价格
            price = x.find_element(By.CSS_SELECTOR, '.p-price i').text
            # 商品介绍
            introduce = '-'.join(x.find_element(By.CSS_SELECTOR, '.p-name-type-2 em').text.split('\n'))
            # 商品评论
            comments = x.find_element(By.CSS_SELECTOR, '.p-commit strong').text
            # 商品店铺
            try:
                store = x.find_element(By.CSS_SELECTOR, '.p-shop span').text
                store_url = x.find_element(By.CSS_SELECTOR, '.p-shop>span>a').get_attribute('href')
            except:
                store = '厂家直销'
                store_url = '暂无厂家链接'
            # 详情页网址
            detail_url = x.find_element(By.CSS_SELECTOR, '.p-name-type-2>a').get_attribute('href')
            # 店铺详情网址
            gl.append([price, introduce, comments, store, detail_url, store_url])

        print(f'第{i + 1}页加载完成')
        next_page = b.find_element(By.CLASS_NAME, 'pn-next')
        next_page.click()
        time.sleep(5)
    return gl


if __name__ == '__main__':
    if not os.path.exists(r'file'):
        os.mkdir(r'file')
    f = open(r'file/goods.csv', 'w', encoding='utf-8', newline='')
    w1 = writer(f)
    goods_list = goods(10)
    w1.writerows(goods_list)
    f.close()