import csv
import datetime
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
date = datetime.datetime.now().strftime('%Y-%m-%d')
def login():
username = driver.find_element(By.ID,'fm-login-id')
password = driver.find_element(By.ID,'fm-login-password')
UN = '1234567890@qq.com'
PW = '00000000'
sleep(1)
username.send_keys(UN)
driver.execute_script(f"document.evaluate(`//*[@id='fm-login-id']`, document).iterateNext().setAttribute('value', '{UN}')")
sleep(1)
password.send_keys(PW)
driver.execute_script(f"document.evaluate(`//*[@id='fm-login-password']`, document).iterateNext().setAttribute('value', '{PW}')")
sleep(1)
sign_in = driver.find_element(By.CSS_SELECTOR, '.login-submit')
sign_in.click()
def slide():
iframe = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID, 'baxia-dialog-content')))
driver.switch_to.frame(iframe)
sliderBg = driver.find_element(By.ID, 'nc_1_n1t')
slider = driver.find_element(By.ID, 'nc_1_n1z')
action_chains = ActionChains(driver)
action_chains.click_and_hold(slider).move_by_offset(sliderBg.size['width'], 0).release().perform()
driver.switch_to.default_content()
def suspondWindowHandler():
try:
suspondWindow = driver.find_element(By.XPATH, "/html/body/div[4]/div/div[1]/img[2]")
suspondWindow.click()
except:
pass
def scrollDown():
for y in range(15):
js = 'window.scrollBy(0, 300)'
driver.execute_script(js)
sleep(0.5)
sleep(10)
def nextPage():
next = driver.find_element(By.CSS_SELECTOR, '.next-next')
next.click()
sleep(5)
searchText = input("Please enter the product name: ")
option = webdriver.ChromeOptions()
option.add_experimental_option('detach', True)
option.add_argument('--disable-notifications')
option.add_argument('--start-maximized')
driver = webdriver.Chrome('./chromedriver.exe', options=option)
driver.implicitly_wait(30)
driver.get('https://www.aliexpress.com/')
login()
slide()
suspondWindowHandler()
driver.find_element(By.ID, 'search-key').send_keys(searchText)
driver.find_element(By.CSS_SELECTOR, '.search-button').click()
number = 1
csvRows = []
for i in range(1):
scrollDown()
# Create a BeautifulSoup object to parse the HTML content of the driver
soup = BeautifulSoup(driver.page_source, 'html.parser')
# Find all the product listings on the page
products = soup.find_all('a', class_='manhattan--container--1lP57Ag cards--gallery--2o6yJVt')
# Loop through each product listings and extract the product details
for product in products:
try:
# Extract the product name, price, sold, rating and store
title = product.find('h1', class_='manhattan--titleText--WccSjUS').text.strip()
price = (product.find('div', class_='manhattan--price--WvaUgDY').text.strip()).replace('US $', '').strip()
store = product.find('span', class_='cards--store--A2ezoRc').text.strip()
try:
sold = (product.find('span', class_='manhattan--trade--2PeJIEB').text.strip()).replace(' sold', '').strip()
except:
sold = ''
try:
rating = product.find('span', class_='manhattan--evaluation--3cSMntr').text.strip()
except:
rating = ''
csvRows.append([number, title, price, sold, rating, store])
number = number + 1
except:
pass
nextPage()
csvHeaders = ['NO.', 'PRODUCT NAME', 'PRICE', 'SOLD', 'RATING', 'STORE NAME']
with open(("d:/MyVSCODE/Aliexpress/%s-%s.csv" % (searchText, date)), mode='w', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(csvHeaders)
writer.writerows(csvRows)
print('Data saved to local disk.')
driver.quit()
爬虫 AliExpress Scraper Python - Selenium
最新推荐文章于 2024-08-21 08:35:53 发布