利用paython自动爬取百度图片并将图片保存为jpg格式

最新推荐文章于 2024-06-17 17:35:06 发布

for(;;cout<<"love")

最新推荐文章于 2024-06-17 17:35:06 发布

阅读量117

点赞数

分类专栏： python 自动化测试 selenium 文章标签： python 开发语言自动化 selenium

本文链接：https://blog.csdn.net/qq_16658271/article/details/131037331

版权

python 同时被 3 个专栏收录

1 篇文章 0 订阅

订阅专栏

自动化测试

1 篇文章 0 订阅

订阅专栏

selenium

1 篇文章 0 订阅

订阅专栏

前言

笔者刚刚入门学习python之前是搞c++的，本片就当作学习的分享，如果有错误或者更好的方案请留言，一起学习

环境

python 3.8.xxx

需要安装的库

selenium、pyautogui、PIL、谷歌浏览器驱动

如果下载过程中慢，可以指定国内地址：pip install selenium -i https://pypi.douban.com/simple/

谷歌浏览器驱动下载地址：https://chromedriver.storage.googleapis.com/index.html

需要注意的是，浏览器的驱动版本号要保持浏览器的版本一直，比如电脑上安装的浏览器版本是：

114.0.5735.91，驱动下载114.0.5735.xx版本的

库的作用

selenium库主要是，用来自动化操作html页面元素的，可以查找元素，模拟点击事件，发送文本信息等，需要一点html的基础

pyautogui库主要是，模拟鼠标和键盘的事件，在本例当中是模拟鼠标点击另存为和确定保存动作

PIL库只要是用来讲下载的xx.webp格式转化为jpg格式，当然也可以转化为其他格式

代码：

以下参数根据实际情况修改。

query_key = '美女' #下载关键字
download_num = 100  #限制下载数量
save_path=r'D:\testspace\20230603\image'  #保存路径
download_path = r'C:\Users\QC\Downloads' #默认的下载路径

import os

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver import Keys
import selenium.common.exceptions

import pyautogui
from PIL import Image

import time
wd = None
query_key = '美女' #下载关键字
download_num = 100  #限制下载数量
save_path=r'D:\testspace\20230603\image'  #保存路径
download_path = r'C:\Users\QC\Downloads' #默认的下载路径

#下载文件
def downloadFile(element):
    ActionChains(wd).context_click(element).perform()
    time.sleep(1)
    pyautogui.typewrite(['v'])
    time.sleep(1)
    pyautogui.typewrite(['enter'])

#保存文件
def saveFile():
    num = 0
    for file_name in os.listdir(download_path):
        if file_name.lower().endswith('.webp'):
            im = Image.open(r'%s\%s' % (download_path, file_name))
            save_file = r'%s\%d.jpg' % (save_path, num)
            im.save(save_file, 'JPEG')
            os.remove(r'%s\%s' % (download_path, file_name))
            num = num +1

#滚动页面加载图片
def scrollPage(imgPageNums):
    times = 5
    i = 0
    while True:
        if i >= times:
            break
        pyautogui.scroll(-200)
        time.sleep(1)
        element_pages = wd.find_elements(By.XPATH, '//*[@id="imgid"]/div[@class="imgpage"]')
        if len(element_pages) != imgPageNums:
            time.sleep(2)
            break
        i = i+1

if __name__ == '__main__':
    opts = webdriver.ChromeOptions()
    opts.add_experimental_option('detach', True)  #加上这句话浏览器不会闪退
    opts.add_experimental_option('excludeSwitches', ['enable-logging'])
    c_service = Service(r'E:\chromedriver_win32\chromedriver.exe')
    wd = webdriver.Chrome(service=c_service, options=opts)
    wd.implicitly_wait(10)
    wd.maximize_window()

    wd.get('https://www.baidu.com')
    element = wd.find_element(By.ID, 'kw')
    element.send_keys(query_key)
    submit_btn = wd.find_element(By.XPATH, '//div[@id="head_wrapper"]//form[@id="form"]/span[@class="bg s_btn_wr"]/input')
    submit_btn.click()
    element_pic = wd.find_element(By.XPATH, '//div[@id="s_tab"]/div//a[@class="s-tab-item s-tab-item_1CwH- s-tab-pic_p4Uej s-tab-pic"]')
    element_pic.click()

    element_pages = wd.find_elements(By.XPATH, '//*[@id="imgid"]/div[@class="imgpage"]')

    curr_pageNum = 0
    overNums = download_num
    while True:
        element_pages = wd.find_elements(By.XPATH, '//*[@id="imgid"]/div[@class="imgpage"]')
        element_image_lst = element_pages[curr_pageNum].find_element(By.TAG_NAME, 'ul').find_elements(By.TAG_NAME, 'li')

        handle = wd.current_window_handle  # 主窗口的句柄
        loopNums = len(element_image_lst) if overNums > len(element_image_lst) else overNums
        for i in range(0, loopNums, 1):
            wd.switch_to.window(handle)
            image_url = element_image_lst[i].get_attribute('data-objurl')
            if image_url is not None:
                wd.execute_script('window.open("%s")' % image_url)
                wd.switch_to.window(wd.window_handles[1])
                try:
                    element_image = wd.find_element(By.TAG_NAME, 'img') #打开的可能不是安全链接直接跳过
                except Exception as reson:
                    print(str(reson.msg))
                    overNums = overNums + 1
                    wd.close()
                    continue

                downloadFile(element_image)
                wd.close()  # 关闭第二个窗口句柄

        overNums = overNums - len(element_image_lst)
        if overNums <= 0:
            break

        #滚动页面加载更多的图片
        wd.switch_to.window(handle)
        scrollPage(curr_pageNum)
        curr_pageNum = curr_pageNum + 1

    #保存文件
    saveFile()

for(;;cout<<"love")

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
利用paython自动爬取百度图片并将图片保存为jpg格式

selenium库主要是，用来自动化操作html页面元素的，可以查找元素，模拟点击事件，发送文本信息等，需要一点html的基础。笔者刚刚入门学习python之前是搞c++的，本片就当作学习的分享，如果有错误或者更好的方案请留言，一起学习。download_path = r'C:\Users\QC\Downloads' #默认的下载路径。pyautogui库主要是，模拟鼠标和键盘的事件，在本例当中是模拟鼠标点击另存为和确定保存动作。query_key = '美女' #下载关键字。以下参数根据实际情况修改。
复制链接

扫一扫