前言
笔者刚刚入门学习python之前是搞c++的,本片就当作学习的分享,如果有错误或者更好的方案请留言,一起学习
环境
python 3.8.xxx
需要安装的库
selenium、pyautogui、PIL、谷歌浏览器驱动
如果下载过程中慢,可以指定国内地址:pip install selenium -i https://pypi.douban.com/simple/
谷歌浏览器驱动下载地址:https://chromedriver.storage.googleapis.com/index.html
需要注意的是,浏览器的驱动版本号要保持浏览器的版本一直,比如电脑上安装的浏览器版本是:
114.0.5735.91,驱动下载114.0.5735.xx版本的
库的作用
selenium库主要是,用来自动化操作html页面元素的,可以查找元素,模拟点击事件,发送文本信息等,需要一点html的基础
pyautogui库主要是,模拟鼠标和键盘的事件,在本例当中是模拟鼠标点击另存为和确定保存动作
PIL库只要是用来讲下载的xx.webp格式转化为jpg格式,当然也可以转化为其他格式
代码:
以下参数根据实际情况修改。
query_key = '美女' #下载关键字 download_num = 100 #限制下载数量 save_path=r'D:\testspace\20230603\image' #保存路径 download_path = r'C:\Users\QC\Downloads' #默认的下载路径
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver import Keys
import selenium.common.exceptions
import pyautogui
from PIL import Image
import time
wd = None
query_key = '美女' #下载关键字
download_num = 100 #限制下载数量
save_path=r'D:\testspace\20230603\image' #保存路径
download_path = r'C:\Users\QC\Downloads' #默认的下载路径
#下载文件
def downloadFile(element):
ActionChains(wd).context_click(element).perform()
time.sleep(1)
pyautogui.typewrite(['v'])
time.sleep(1)
pyautogui.typewrite(['enter'])
#保存文件
def saveFile():
num = 0
for file_name in os.listdir(download_path):
if file_name.lower().endswith('.webp'):
im = Image.open(r'%s\%s' % (download_path, file_name))
save_file = r'%s\%d.jpg' % (save_path, num)
im.save(save_file, 'JPEG')
os.remove(r'%s\%s' % (download_path, file_name))
num = num +1
#滚动页面加载图片
def scrollPage(imgPageNums):
times = 5
i = 0
while True:
if i >= times:
break
pyautogui.scroll(-200)
time.sleep(1)
element_pages = wd.find_elements(By.XPATH, '//*[@id="imgid"]/div[@class="imgpage"]')
if len(element_pages) != imgPageNums:
time.sleep(2)
break
i = i+1
if __name__ == '__main__':
opts = webdriver.ChromeOptions()
opts.add_experimental_option('detach', True) #加上这句话浏览器不会闪退
opts.add_experimental_option('excludeSwitches', ['enable-logging'])
c_service = Service(r'E:\chromedriver_win32\chromedriver.exe')
wd = webdriver.Chrome(service=c_service, options=opts)
wd.implicitly_wait(10)
wd.maximize_window()
wd.get('https://www.baidu.com')
element = wd.find_element(By.ID, 'kw')
element.send_keys(query_key)
submit_btn = wd.find_element(By.XPATH, '//div[@id="head_wrapper"]//form[@id="form"]/span[@class="bg s_btn_wr"]/input')
submit_btn.click()
element_pic = wd.find_element(By.XPATH, '//div[@id="s_tab"]/div//a[@class="s-tab-item s-tab-item_1CwH- s-tab-pic_p4Uej s-tab-pic"]')
element_pic.click()
element_pages = wd.find_elements(By.XPATH, '//*[@id="imgid"]/div[@class="imgpage"]')
curr_pageNum = 0
overNums = download_num
while True:
element_pages = wd.find_elements(By.XPATH, '//*[@id="imgid"]/div[@class="imgpage"]')
element_image_lst = element_pages[curr_pageNum].find_element(By.TAG_NAME, 'ul').find_elements(By.TAG_NAME, 'li')
handle = wd.current_window_handle # 主窗口的句柄
loopNums = len(element_image_lst) if overNums > len(element_image_lst) else overNums
for i in range(0, loopNums, 1):
wd.switch_to.window(handle)
image_url = element_image_lst[i].get_attribute('data-objurl')
if image_url is not None:
wd.execute_script('window.open("%s")' % image_url)
wd.switch_to.window(wd.window_handles[1])
try:
element_image = wd.find_element(By.TAG_NAME, 'img') #打开的可能不是安全链接直接跳过
except Exception as reson:
print(str(reson.msg))
overNums = overNums + 1
wd.close()
continue
downloadFile(element_image)
wd.close() # 关闭第二个窗口句柄
overNums = overNums - len(element_image_lst)
if overNums <= 0:
break
#滚动页面加载更多的图片
wd.switch_to.window(handle)
scrollPage(curr_pageNum)
curr_pageNum = curr_pageNum + 1
#保存文件
saveFile()