爬虫 AliExpress Scraper Python - Selenium

import csv
import datetime
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

date = datetime.datetime.now().strftime('%Y-%m-%d')

def login():
    username = driver.find_element(By.ID,'fm-login-id')
    password = driver.find_element(By.ID,'fm-login-password')
    UN = '1234567890@qq.com'
    PW = '00000000'
    sleep(1)
    username.send_keys(UN)
    driver.execute_script(f"document.evaluate(`//*[@id='fm-login-id']`, document).iterateNext().setAttribute('value', '{UN}')")
    sleep(1)
    password.send_keys(PW)
    driver.execute_script(f"document.evaluate(`//*[@id='fm-login-password']`, document).iterateNext().setAttribute('value', '{PW}')")
    sleep(1)
    sign_in = driver.find_element(By.CSS_SELECTOR, '.login-submit')
    sign_in.click()

def slide():
    iframe = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID, 'baxia-dialog-content')))
    driver.switch_to.frame(iframe)
    sliderBg = driver.find_element(By.ID, 'nc_1_n1t')
    slider = driver.find_element(By.ID, 'nc_1_n1z')
    action_chains = ActionChains(driver)
    action_chains.click_and_hold(slider).move_by_offset(sliderBg.size['width'], 0).release().perform()
    driver.switch_to.default_content()

def suspondWindowHandler():
    try:
        suspondWindow = driver.find_element(By.XPATH, "/html/body/div[4]/div/div[1]/img[2]")
        suspondWindow.click()
    except:
        pass

def scrollDown():
    for y in range(15):
        js = 'window.scrollBy(0, 300)'
        driver.execute_script(js)
        sleep(0.5)
    sleep(10)

def nextPage():
    next = driver.find_element(By.CSS_SELECTOR, '.next-next')
    next.click()
    sleep(5)

searchText = input("Please enter the product name: ")

option = webdriver.ChromeOptions()
option.add_experimental_option('detach', True)
option.add_argument('--disable-notifications')
option.add_argument('--start-maximized')
driver = webdriver.Chrome('./chromedriver.exe', options=option)
driver.implicitly_wait(30)
driver.get('https://www.aliexpress.com/')

login()
slide()
suspondWindowHandler()

driver.find_element(By.ID, 'search-key').send_keys(searchText)
driver.find_element(By.CSS_SELECTOR, '.search-button').click()

number = 1
csvRows = []

for i in range(1):
    scrollDown()
    # Create a BeautifulSoup object to parse the HTML content of the driver
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    # Find all the product listings on the page
    products = soup.find_all('a', class_='manhattan--container--1lP57Ag cards--gallery--2o6yJVt')
    # Loop through each product listings and extract the product details
    for product in products:
        try:
            # Extract the product name, price, sold, rating and store
            title = product.find('h1', class_='manhattan--titleText--WccSjUS').text.strip()
            price = (product.find('div', class_='manhattan--price--WvaUgDY').text.strip()).replace('US $', '').strip()
            store = product.find('span', class_='cards--store--A2ezoRc').text.strip()
            try:
                sold = (product.find('span', class_='manhattan--trade--2PeJIEB').text.strip()).replace(' sold', '').strip()
            except:
                sold = ''
            try:
                rating = product.find('span', class_='manhattan--evaluation--3cSMntr').text.strip()
            except:
                rating = ''
            csvRows.append([number, title, price, sold, rating, store])
            number = number + 1 
        except:
            pass
    nextPage()

csvHeaders = ['NO.', 'PRODUCT NAME', 'PRICE', 'SOLD', 'RATING', 'STORE NAME']
with open(("d:/MyVSCODE/Aliexpress/%s-%s.csv" % (searchText, date)), mode='w', newline='', encoding='utf-8-sig') as f:
    writer = csv.writer(f)
    writer.writerow(csvHeaders)
    writer.writerows(csvRows)

print('Data saved to local disk.')

driver.quit()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值