【实例】京东爬取10页电饭煲数据

路u

已于 2023-04-01 10:44:40 修改

阅读量48

点赞数

文章标签： python 开发语言爬虫 csv

于 2023-03-31 21:01:59 首次发布

本文链接：https://blog.csdn.net/weixin_50007404/article/details/129888582

版权

【实例】京东爬取10页电饭煲数据

from selenium.webdriver import Chrome
import time
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import csv


def JD():
    b = Chrome()
    b.get('https://www.jd.com/')

    seach = b.find_element(By.ID, 'key')
    seach.send_keys('电饭煲\n')
    time.sleep(1)

    for i in range(10):

        for x in range(10):
            b.execute_script('window.scrollBy(0, 800)')
            time.sleep(2)

        # time.sleep(2)
        result = b.find_elements(By.CSS_SELECTOR, '#J_goodsList>ul>li')
        print(len(result))

        soup = BeautifulSoup(b.page_source, 'lxml')
        p = 0
        for r in result:
            p += 1
            name = r.find_element(By.CSS_SELECTOR, '.p-name>a>em').text.strip()
            # if '京品家电' in name or '爱心东东' in name:
            #     name = name[4:]

            price = r.find_element(By.CSS_SELECTOR, '.p-price>strong>i').text
            store = r.find_element(By.CLASS_NAME, 'p-shop').text
            comment = r.find_element(By.CLASS_NAME, 'p-commit').text
            # name = soup.select('.p-name>a>em')[p-1].text
            link1 = 'https:' + soup.select('.p-img>a')[p-1].attrs['href']
            try:
                link2 = 'https:' + soup.select('.J_im_icon>a')[p-1].attrs['href']
            except:
                link2 = ' '

            print(p, name, price, store, comment, link1, link2)
            w1.writerow([name, price, store, comment, link1, link2])
        print(f'=================第{i+1}页完成===============')

        b.find_element(By.CLASS_NAME, 'pn-next').click()
        time.sleep(2)


    input('')

if __name__ == '__main__':
    w1 = csv.writer(open('files/京东电饭煲.csv', 'w', encoding='utf-8', newline=''))
    w1.writerow(['名称', '价格', '店铺', '评论', '商品详情网址', '店铺详情网址'])
    JD()