爬虫 AliExpress Scraper Python - Selenium

最新推荐文章于 2024-06-07 09:58:32 发布

cyyeve

最新推荐文章于 2024-06-07 09:58:32 发布

阅读量817

点赞数 8

文章标签： python 开发语言爬虫

本文链接：https://blog.csdn.net/cyyeve/article/details/136015572

版权

import csv
import datetime
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

date = datetime.datetime.now().strftime('%Y-%m-%d')

def login():
    username = driver.find_element(By.ID,'fm-login-id')
    password = driver.find_element(By.ID,'fm-login-password')
    UN = '1234567890@qq.com'
    PW = '00000000'
    sleep(1)
    username.send_keys(UN)
    driver.execute_script(f"document.evaluate(`//*[@id='fm-login-id']`, document).iterateNext().setAttribute('value', '{UN}')")
    sleep(1)
    password.send_keys(PW)
    driver.execute_script(f"document.evaluate(`//*[@id='fm-login-password']`, document).iterateNext().setAttribute('value', '{PW}')")
    sleep(1)
    sign_in = driver.find_element(By.CSS_SELECTOR, '.login-submit')
    sign_in.click()

def slide():
    iframe = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID, 'baxia-dialog-content')))
    driver.switch_to.frame(iframe)
    sliderBg = driver.find_element(By.ID, 'nc_1_n1t')
    slider = driver.find_element(By.ID, 'nc_1_n1z')
    action_chains = ActionChains(driver)
    action_chains.click_and_hold(slider).move_by_offset(sliderBg.size['width'], 0).release().perform()
    driver.switch_to.default_content()

def suspondWindowHandler():
    try:
        suspondWindow = driver.find_element(By.XPATH, "/html/body/div[4]/div/div[1]/img[2]")
        suspondWindow.click()
    except:
        pass

def scrollDown():
    for y in range(15):
        js = 'window.scrollBy(0, 300)'
        driver.execute_script(js)
        sleep(0.5)
    sleep(10)

def nextPage():
    next = driver.find_element(By.CSS_SELECTOR, '.next-next')
    next.click()
    sleep(5)

searchText = input("Please enter the product name: ")

option = webdriver.ChromeOptions()
option.add_experimental_option('detach', True)
option.add_argument('--disable-notifications')
option.add_argument('--start-maximized')
driver = webdriver.Chrome('./chromedriver.exe', options=option)
driver.implicitly_wait(30)
driver.get('https://www.aliexpress.com/')

login()
slide()
suspondWindowHandler()

driver.find_element(By.ID, 'search-key').send_keys(searchText)
driver.find_element(By.CSS_SELECTOR, '.search-button').click()

number = 1
csvRows = []

for i in range(1):
    scrollDown()
    # Create a BeautifulSoup object to parse the HTML content of the driver
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    # Find all the product listings on the page
    products = soup.find_all('a', class_='manhattan--container--1lP57Ag cards--gallery--2o6yJVt')
    # Loop through each product listings and extract the product details
    for product in products:
        try:
            # Extract the product name, price, sold, rating and store
            title = product.find('h1', class_='manhattan--titleText--WccSjUS').text.strip()
            price = (product.find('div', class_='manhattan--price--WvaUgDY').text.strip()).replace('US $', '').strip()
            store = product.find('span', class_='cards--store--A2ezoRc').text.strip()
            try:
                sold = (product.find('span', class_='manhattan--trade--2PeJIEB').text.strip()).replace(' sold', '').strip()
            except:
                sold = ''
            try:
                rating = product.find('span', class_='manhattan--evaluation--3cSMntr').text.strip()
            except:
                rating = ''
            csvRows.append([number, title, price, sold, rating, store])
            number = number + 1 
        except:
            pass
    nextPage()

csvHeaders = ['NO.', 'PRODUCT NAME', 'PRICE', 'SOLD', 'RATING', 'STORE NAME']
with open(("d:/MyVSCODE/Aliexpress/%s-%s.csv" % (searchText, date)), mode='w', newline='', encoding='utf-8-sig') as f:
    writer = csv.writer(f)
    writer.writerow(csvHeaders)
    writer.writerows(csvRows)

print('Data saved to local disk.')

driver.quit()