话不多说直接上代码!
仅供学习!请勿转载 转载必究!
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys import time from selenium.webdriver.common.action_chains import ActionChains import pandas as pd import numpy import os import csv driver = webdriver.Chrome() url = 'https://www.jd.com' def main(): driver.get(url) # 等待页面加载完成 search_box = driver.find_element(By.ID,'key') search_box.send_keys('人工智能') # 提交搜索请求 search_box.send_keys(Keys.ENTER) # 滚动到底部,以便加载更多内容 driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # 等待新的内容加载完成 WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.gl-item'))) # 获取商品元素 for item in range(1,31): selecter = '#J_goodsList > ul > li:nth-child(%d)' % (item) goods = driver.find_elements(By.CSS_SELECTOR,selecter) for good in goods: #把good.text的每一行遍历到一个列表里面 lines = good.text.split('\n') #print(item, '行数据:', lines[0], lines[1], lines[2], lines[3], lines[4], lines[5], lines[6]) #保存数据 data = {'item': item, 'line0': lines[0], 'line1': lines[1], 'line2': lines[2], 'line3': lines[3], 'line4': lines[4], 'line5': lines[5], 'line6': lines[6]} print(data) #把数据遍历到csv文件里 with open('jd_products.csv', 'a', newline='',encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=data.keys()) if item == 1: writer.writeheader() writer.writerow(data) df = pd.DataFrame(data, index=[0]) #创建保存数据的文件夹 if not os.path.exists('jd_products'): os.makedirs('jd_products') df.to_csv('jd_products.csv', mode='a', header=False) if __name__ == '__main__': main()
前提条件你得配置好谷歌浏览器哦!!!!
运行结果如下: