import time
import pymongo
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from random import randint
class WpShop():
mongoClient=pymongo.MongoClient()
db=mongoClient['py_spider']['wp_shop']
#设置浏览器
options=webdriver.ChromeOptions()
# 屏蔽图片,加快访问速度
prefs = {"profile.managed_default_content_settings.images": 2}
options.add_experimental_option('prefs',prefs)
#加载谷歌浏览器
brower=webdriver.Chrome(options=options)
# 获取唯品会首页并搜索商品
@classmethod
def base(cls):
cls.brower.get('https://www.vip.com/')
#等待加载
wait=WebDriverWait(cls.brower,10)
#搜索框输入
el_input=wait.until(EC.presence_of_element_located(
(By.XPATH,'//input[@class="c-search-input J-search-input"]')
))
el_input.send_keys('电脑')
#点击进行搜索
el_button=wait.until(EC.presence_of_element_located(
(By.XPATH,'//span[@class="c-search-icon"]')
))
time.sleep(2)
el_button.click()
time.sleep(randint(1,3))
# 滚动条滚动
@classmethod
def dropDown(cls):
for i in range(1,12):
js_code=f'document.documentElement.scrollTop = {i * 1000}'
cls.brower.execute_script(js_code)
time.sleep(randint(1,3))
# 数据解析
@classmethod
def parseData(cls):
cls.dropDown()#滚动页面,以加载所有数据
div_list=cls.brower.find_elements(
By.XPATH,'//section[@id="J_searchCatList"]/div[@class="c-goods-item J-goods-item c-goods-item--auto-width"]')
for div in div_list:
item=dict()
href=div.find_element(By.XPATH,'./a')
price=div.find_element(By.XPATH,'.//div[@class="c-goods-item__sale-price J-goods-item__sale-price"]')
title=div.find_element(By.XPATH,'.//div[2]/div[2]')
item['title']=title.text
item['price']=price.text
item['href']=href.get_attribute("href")
print(item)
cls.saveData(item)
cls.nextPage()#翻页
# 保存数据
@classmethod
def saveData(cls,item):
cls.db.insert_one(item)
# 翻页
@classmethod
def nextPage(cls):
try:
#翻页按钮
next_button=cls.brower.find_element(By.XPATH,'.//div//*[@id="J_nextPage_link"]')
if next_button.is_displayed():
next_button.click()
cls.parseData()
else:
cls.brower.close()
except Exception as e:
print("最后一页",e)
cls.brower.quit()
# 启动函数
@classmethod
def main(cls):
cls.base()
cls.parseData()
if __name__=='__main__':
ws=WpShop()
ws.main()
使用selenium抓取信息
于 2023-12-15 10:17:45 首次发布