python selenium爬取淘宝

我们帅哥都搞IT

已于 2024-03-11 18:00:13 修改

阅读量452

点赞数 8

文章标签： python 爬虫

于 2024-03-11 17:56:56 首次发布

本文链接：https://blog.csdn.net/qq_40183816/article/details/136630940

版权

最新 selenium 爬取淘宝 selenium 需要安装低版本Google 浏览器请下载并在python环境下安装好驱动包自己百度

from selenium import webdriver
import time
import csv
import re
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# 获取商品
def get_product(key_word):
    # 定位输入框
    browser.find_element_by_id("q").send_keys(key_word)
    # 定义点击按钮，并点击
    browser.find_element_by_class_name('btn-search').click()
    browser.maximize_window()
    # 等待20秒，方便手动登录
    time.sleep(20)
    # 定位这个“页码”，获取“共100页这个文本”
  #  page_info = browser.find_element_by_xpath('//div[@class="next-btn-helper"]').text
    page_info ="共100页这个文本"
    # findall()返回的是一个列表
    page = re.findall("(\d+)", page_info)[0]
    return page


# 获取数据
def get_data():
    # 所有的信息都在items节点下
    # item1 = browser.find_elements_by_xpath('//div[@class="Content--contentInner--QVTcU0M"]')
    # print(item1)
    items = browser.find_elements_by_xpath('//div[@class="Card--doubleCard--wznk5U4"]/div[@class="Card--mainPicAndDesc--wvcDXaK"]')
    print(items)
    for item in items:
        pro_desc = item.find_element_by_xpath('.//div[@class="Title--title--jCOPvpf"]/span').text
        # 价格
        pro_price = item.find_element_by_xpath('.//span[@class="Price--priceInt--ZlsSi_M"]').text + item.find_element_by_xpath('.//span[@class="Price--priceFloat--h2RR0RK"]').text


        # 使用正则表达式提取付款人数的数字部分
        payment_count_text = item.find_element_by_xpath('.//span[@class="Price--realSales--FhTZc7U"]').text
        buy_num  = re.search(r'\d+', payment_count_text).group()



        # 付款人数

        # 店铺
        shop = shop_name_element = WebDriverWait(browser, 10).until(
        EC.visibility_of_element_located((By.XPATH, '//a[@class="ShopInfo--shopName--rg6mGmy"]')))
        shop = shop_name_element.text

        # 发货地
        address = item.find_element_by_xpath('.//span[@class="Price--procity--_7Vt3mX"]').text

        with open('{}.csv'.format(key_word), mode='a', newline='', encoding='utf-8-sig') as f:
            csv_writer = csv.writer(f, delimiter=',')
            csv_writer.writerow([pro_desc, pro_price, buy_num, shop, address])


if __name__ == '__main__':
    key_word = input("请输入您要搜索的商品：")
    browser = webdriver.Chrome()
    browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {

        "source": """
        Object.defineProperty(navigator, 'webdriver', {
          get: () => undefined
        })
      """
    })
    browser.get('https://www.taobao.com/')
    page = get_product(key_word)
    print(page)
    get_data()
    page_num = 1
    while int(page) != page_num:
        print("=" * 100)
        print("正在爬取第{}页".format(page_num + 1))
        browser.get('https://s.taobao.com/search?q={}&s={}'.format(key_word, page_num * 44))
        browser.implicitly_wait(15)
        get_data()
        page_num += 1
    print("爬取结束！")



```![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/d1dd6afca2114e09bc806a43b203a809.png#pic_center)