必应bing搜索下载高清原图

最新推荐文章于 2024-07-25 18:55:58 发布
夹心饼干1100
最新推荐文章于 2024-07-25 18:55:58 发布
阅读量113
点赞数
文章标签： python
本文链接：https://blog.csdn.net/weixin_44646605/article/details/132146605
版权
需要谷歌浏览器，以及相对应的版本的驱动，这是前提条件。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options

from urllib import error
from urllib import request
import os
import time
import sys

# by jiaxin
# replace for yours
# url = "https://www.google.com"
url = "https://global.bing.com/?mkt=en-us"
explorer = "Chrome"
# directory
imgs_dir = "./BMP-2_images" #图片保存文件夹
downitem = "(印度|india|indian) BMP-2" #搜索关键词，空格是且，|是或
keyname = "BMP-2_"
startnum  = 24 #图片命名起始编号

# report hook with three parameters passed
# count_of_blocks  The number of blocks transferred
# block_size The size of block
# total_size Total size of the file
def progress_callback(count_of_blocks, block_size, total_size):
    # determine current progress
    progress = int(50 * (count_of_blocks * block_size) / total_size)
    if progress > 50:
        progress = 50
    # update progress bar
    sys.stdout.write("\r[%s%s] %d%%" % ('█' * progress, '  ' * (50 - progress), progress * 2))
    sys.stdout.flush()


class CrawlSelenium:

    def __init__(self, explorer="Chrome", url="https://www.google.com"):
        self.url = url
        self.explorer = explorer

    def set_loading_strategy(self, strategy="normal"):
        self.options = Options()
        self.options.add_argument("--edge-webview-switches=--webview-distribution=canary")
        self.options.add_argument ('--ignore-certificate-errors')
        self.options.add_argument ('--ignore-ssl-errors')
        self.options.page_load_strategy = strategy


    def crawl(self,downitem):
        # instantiate driver according to corresponding explorer
        if self.explorer == "Chrome":
            self.driver = webdriver.Chrome(options=self.options)
        if self.explorer == "Opera":
            self.driver = webdriver.Opera(options=self.options)
        if self.explorer == "Firefox":
            self.driver = webdriver.Firefox(options=self.options)
        if self.explorer == "Edge":
            self.driver = webdriver.Edge(options=self.options)

        # search on google
        # navigate to url
        self.driver.get(self.url)
        time.sleep(15)
        # locate input field
        search_input = self.driver.find_element(By.NAME, 'q')
        time.sleep(3)
        # emulate user input and enter to search
        webdriver.ActionChains(self.driver).move_to_element(search_input).send_keys(downitem + Keys.ENTER).perform()
        time.sleep(3)
        

        # navigate to google image
        # find navigation buttons
        # self.driver.find_element(By.LINK_TEXT, '国际版').click()
        self.driver.find_element(By.LINK_TEXT, '图片').click()
        time.sleep(5)

        # load more images as many as possible
        # scrolling to bottom
        self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")

        # 
        time.sleep(3)
        try:
            for i in range(10):
                # do according to message
                # message = self.driver.find_element(By.CSS_SELECTOR, 'div.OuJzKb.Bqq24e').get_attribute('textContent')
                # print(message)
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
                time.sleep(5)
        except Exception as err:
            print(err)
        # get button
        # show_more_button = self.driver.find_element(By.CSS_SELECTOR, "input[value='查看更多图片']")

        # 临时注释
        try:
            self.driver.find_element(By.LINK_TEXT, '查看更多图片').click()
        except:
            print("找不到更多图片按钮，不是什么错误，只是想多翻点图片")
            pass
        time.sleep(5)
        try:
            for i in range(8):
                # do according to message
                # message = self.driver.find_element(By.CSS_SELECTOR, 'div.OuJzKb.Bqq24e').get_attribute('textContent')
                # print(message)
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
                time.sleep(5)
        except Exception as err:
            print(err)

        time.sleep(15)
        imgs = self.driver.find_elements(By.CSS_SELECTOR, "img.mimg")
        print("获取到图片数量",len(imgs))
        img_count = startnum
        time.sleep(15)
        for img in imgs:
            try:
                # image per second
                
                print('\ndownloading image ' + str(img_count) + ': ')
                time.sleep(5)
                img_url = img.get_attribute("src")
                try:
                    img.click()
                    print("打开图片成功，兄弟们稍安勿躁，等15秒让图片加载")
                    time.sleep(8)
                    self.driver.switch_to.frame("OverlayIFrame")
                    time.sleep(5)
                    jiaxin = self.driver.find_elements(By.CSS_SELECTOR, "img.nofocus")
                    # url = jiaxin[0].get_attribute("src")
                    
                    for i in jiaxin:
                        try:
                            t= i.get_attribute("src")
                            if 'bing' in t:
                                continue
                            else:
                                img_url = t
                                print("老天保佑，原图连接获取成功")
                        except:
                            print("获取清晰src的时候出错了")
                            continue
                    
                    print("连接：",img_url)
                    # time.sleep(5)


                except:
                    print("点不开，下载原来的")
                    pass
                
                if img_url == None:
                    print("他妈的，居然空的")
                    continue
                path = os.path.join(imgs_dir, str(keyname)+str(img_count) + "_img.jpg")
                request.urlretrieve(url = img_url, filename = path, reporthook = progress_callback, data = None)
                img_count = img_count + 1
                time.sleep(5)
                print("\n狗日的，终于下载成功一张")
                try:
                    self.driver.find_element(By.XPATH, "//div[@class='close nofocus']").click()
                except:
                    pass
                self.driver.switch_to.parent_frame()
            except error.HTTPError as http_err:
                print(http_err)
                try:
                    self.driver.find_element(By.XPATH, "//div[@class='close nofocus']").click()
                except:
                    pass
                self.driver.switch_to.parent_frame()
            except Exception as err:
                print(err)
                try:
                    self.driver.find_element(By.XPATH, "//div[@class='close nofocus']").click()
                except:
                    pass
                self.driver.switch_to.parent_frame()



def main():
    # setting
    crawl_s = CrawlSelenium(explorer, url)
    crawl_s.set_loading_strategy("normal")
    # make directory
    if not os.path.exists(imgs_dir):
        os.mkdir(imgs_dir)
    # crawling
    crawl_s.crawl(downitem = downitem)


if __name__ == "__main__":
    main()