使用selenium爬取淘宝网页商品

import json
from urllib.parse import quote
from selenium.common.exceptions import TimeoutException
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup

browser = webdriver.Chrome()
wait = WebDriverWait(browser,15)
KEYWORD = 'iPad'

def index_page(page):
    '''
    抓取所搜页
    :param page: 页码
    :return:
    '''
    print('正在抓取第',page,'页')
    try:
        url = 'https://s.taobao.com/search?q=' + quote(KEYWORD)
        browser.get(url)
        if page > 1:
            input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-pager div.form > input')))
            submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#mainsrp-pager div.form > span.btn.J_Submit')))
            input.clear()
            input.send_keys(page)
            submit.click()
        wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR,'#mainsrp-pager li.item.active > span'),str(page)))
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'.m-itemlist .items .item')))
        get_products()
    except TimeoutException:
        index_page(page)

def get_products():
    '''
    提取商品
    :return:
    '''
    html = browser.page_source
    soup = BeautifulSoup(html,'lxml')
    items = soup.find('div',{'class':'m-itemlist'}).find_all('div',{'data-category':'auctions'})
    #print(items)
    product_list = []
    for item in items:
        #print(item)
        product = {
            'image':item.find('div',{'class':'pic'}).a.img['src'],
            'price':item.find('div',{'class':'price g_price g_price-highlight'}).get_text().strip(),
            'deal':item.find('div',{'class':'deal-cnt'}).get_text(),
            'title':item.find('div',{'class':'row row-2 title'}).get_text().strip(),
            'shop':item.find('div',{'class':'shop'}).get_text().strip(),
            'location':item.find('div',{'class':'location'}).get_text().strip()
        }
        #print(product)
        product_list.append(product)
    with open('product.json','a',encoding='utf-8') as f:
        f.write(json.dumps(product_list,indent=2,ensure_ascii=False))
    print('ok')

for i in range(1,101):
    index_page(i)

部分结果为:

{
    "image": "//g-search1.alicdn.com/img/bao/uploaded/i4/imgextra/i2/13022581/O1CN01x8TLzf1Uw8ijNtCZa_!!0-saturn_solar.jpg_230x230.jpg",
    "price": "¥2398.00",
    "deal": "1734人付款",
    "title": "12期免息【送电动牙刷+1年延保】Apple/苹果 iPad 2018款 9.7英寸平板电脑 wifi平板电脑 吃鸡玩家",
    "shop": "绿森数码官方旗舰店",
    "location": "浙江 杭州"
  },
  {
    "image": "//g-search1.alicdn.com/img/bao/uploaded/i4/i2/1669409267/O1CN012IKKoeqk1Nwld2s_!!0-item_pic.jpg_230x230.jpg",
    "price": "¥2398.00",
    "deal": "5244人付款",
    "title": "[12期分期][两年保修]Apple/苹果 iPad 2018款 9.7英寸wifi新款平板电脑32G/128G 正品国行新品授权旗舰店",
    "shop": "卓辰数码旗舰店",
    "location": "浙江 杭州"
  },
  {
    "image": "//g-search1.alicdn.com/img/bao/uploaded/i4/i4/268451883/O1CN01fxAXtR1PmSGc2xFVa_!!0-item_pic.jpg_230x230.jpg",
    "price": "¥2328.00",
    "deal": "1825人付款",
    "title": "2018款ipad Apple/苹果 iPad 2018款 9.7英寸32/128Gwifi 平板电脑 3/6/12期分期 正品国行 全国联保",
    "shop": "三际数码官方旗舰店",
    "location": "山东 济南"
  },
  {
    "image": "//g-search3.alicdn.com/img/bao/uploaded/i4/i2/2616970884/O1CN011IOub5IKnvUp9nh_!!0-item_pic.jpg_230x230.jpg",
    "price": "¥2688.00",
    "deal": "8681人付款",
    "title": "【官网价优惠高达971元】Apple/苹果 iPad mini4 7.9英寸平板电脑 128G WiFi版 迷你智能平板电脑 金属轻薄",
    "shop": "苏宁易购官方旗舰店",
    "location": "江苏 南京"
  },
  {
    "image": "//g-search3.alicdn.com/img/bao/uploaded/i4/i1/2616970884/O1CN01Ld0NTf1IOub5txwnH_!!0-item_pic.jpg_230x230.jpg",
    "price": "¥2565.00",
    "deal": "5521人付款",
    "title": "【官网价优惠至高511元】2018新款 Apple/苹果 9.7英寸iPad 智能平板电脑 正品国行32G 128G",
    "shop": "苏宁易购官方旗舰店",
    "location": "江苏 南京"
  },
  {
    "image": "//g-search2.alicdn.com/img/bao/uploaded/i4/i3/1669409267/O1CN019lgc8W2IKKoe8pxMJ_!!0-item_pic.jpg_230x230.jpg",
    "price": "¥2158.00",
    "deal": "534人付款",
    "title": "Apple/苹果 Ipad 2017款 9.7英寸 平板电脑 32G/128G Wifi版",
    "shop": "卓辰数码旗舰店",
    "location": "浙江 杭州"
  },
  {
    "image": "//g-search3.alicdn.com/img/bao/uploaded/i4/i4/1115488308/O1CN012BF6yiFNCxMurOm_!!0-item_pic.jpg_230x230.jpg",
    "price": "¥2328.00",
    "deal": "33人付款",
    "title": "2328起!送钢化膜/现货速发/12期分期 Apple/苹果 iPad 2018款9.7英寸平板电脑WiFi新款平板电脑",
    "shop": "君问数码官方旗舰店",
    "location": "浙江 杭州"
  },
  {
    "image": "//g-search2.alicdn.com/img/bao/uploaded/i4/i3/1917047079/O1CN0122AEDnF6rAZNIFG_!!0-item_pic.jpg_230x230.jpg",
    "price": "¥6499.00",
    "deal": "",
    "title": "Apple/苹果 11 英寸 iPad Pro",
    "shop": "applestore官方旗舰店",
    "location": "上海"
  },
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值