一、开发工具
python版本2023.2.1
相关模块
scrapy模块
selenium模块
python自带模块
二、环境搭配
安装Python并添加到环境变量,pip安装需要的相关模块即可。
三、chrome driver安装
根据chrome版本下载chrome driver
Chrome driver下载链接https://storage.googleapis.com/chrome-for-testing-public/124.0.6367.91/win64/chromedriver-win64.zip
将安装好的chrome driver放到python包里面
四、代码实现
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import csv
f = open('面.csv',mode='a',encoding='utf-8',newline='')
csv_writer=csv.DictWriter(f,fieldnames=[
'标题' ,
'价格',
'评论',
'店铺名' ,
'详情页',
])
csv_writer.writeheader()
user_data_dir=r'C:\Users\熊一\AppData\Local\Google\Chrome\User Data'
user_option=webdriver.ChromeOptions()
user_option.add_argument(f'--user-data-dir={user_data_dir}')
driver=webdriver.Chrome(options=user_option)
driver.get('https://www.jd.com/')
driver.find_element(By.CSS_SELECTOR, "#key").send_keys('面膜')
driver.find_element(By.CSS_SELECTOR,'#search > div > div.form > button > i').click()
def drop_down():
for x in range(1, 12, 2):
j = x / 9
js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight * %f' % j
driver.execute_script(js)
drop_down()
lis = driver.find_elements(By.CSS_SELECTOR,'#J_goodsList > ul li')
for li in lis:
title = li.find_element(By.CSS_SELECTOR,'.p-name em').text
price = li.find_element(By.CSS_SELECTOR,'.p-price i').text
comment = li.find_element(By.CSS_SELECTOR, '.p-commit a ').text
shop_name = li.find_element(By.CSS_SELECTOR, '.p-shop a').text
href = li.find_element(By.CSS_SELECTOR, '.p-img a').get_attribute('href')
print(title,price,comment,shop_name,href)
dit={
'标题':title,
'价格': price,
'评论': comment,
'店铺名': shop_name,
'详情页': href,
}
csv_writer.writerow(dit)
time.sleep(15)