Python 爬虫:selenium自动爬取淘宝商品

这个代码基于上篇的爬虫加以修改,可以灵活的输入想要爬取的商品名称,以及爬取页数。

不在详细介绍,上篇基本备注都比较详细,就直接上代码了:

import requests
from lxml import etree
from selenium import webdriver
import time
import csv

#selenium+xpath爬取整站任意商品
browser = webdriver.Firefox()
global k
 
def get_info(url,count):
    count+=1
    browser.get(url)
    html = browser.page_source
    selector = etree.HTML(html)
    infos = selector.xpath('//div[@class="item J_MouserOnverReq  "]')
    for info in infos:
       goods_name = info.xpath('.//a/img/@alt')[0]
        price = info.xpath('.//div[@class="price g_price g_price-highlight"]//strong/text()')[0]
        shop_name = info.xpath('.//div[@class="shop"]/a//span[2]/text()')[0]
        location = info.xpath('.//div[@class="location"]/text()')[0]
        sell_number = info.xpath('.//div[@class="deal-cnt"]/text()')[0]
        goods_info = [goods_name, price, shop_name, location, sell_number]
        save(goods_info)
        print(
                goods_name+'\n',
                price+'\n',
                shop_name+'\n',
                location+'\n',
                sell_number+'\n',
            )

    
    if count<=k:
        get_page(url,count)
    else:
        pass

def get_page(url,count):
    browser.get(url)
    browser.implicitly_wait(5)
    browser.find_element_by_xpath('//a[@trace="srp_bottom_pagedown"]').click()
    time.sleep(5)
    get_info(browser.current_url,count)

def save(iten):
    with open('ptoducts.csv','a+',encoding='utf-8',newline='')as f:
        writer = csv.writer(f)
        writer.writerow(iten)

if __name__ == '__main__':
    save(['名称', '价格/元', '店铺名', '地区', '销量'])
    url = 'https://www.taobao.com/'
    browser.get(url)
    browser.implicitly_wait(5)
    browser.find_element_by_id('q').clear()
    time.sleep(10)
    namekeys = str(input('请输入要搜索的商品:'))
    k = int(input('请输入爬取页数:')) 
    browser.find_element_by_id('q').send_keys(namekeys)
    browser.find_element_by_class_name('btn-search').click()
    time.sleep(20)
    get_info(browser.current_url,1)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值