利用selenium获取图片并下载;基于pixabay,获取图片
# -*- coding:utf-8 -*-
import requests
from urllib.parse import quote
from time import sleep
import threading
from tqdm import tqdm
from selenium import webdriver
import time
def get_type(type):
options = webdriver.ChromeOptions()
"#配置忽略HTTPS安全证书提示"
options.add_argument('--ignore-certificate-errors')
# Chrome 正在受到自动化软件控制删除
options.add_argument("--disable_infobars")
# options.add_argument('--headless')
driver = webdriver.Chrome(r"填写自己谷歌的驱动器的地址",
chrome_options=options)
url = "https://pixabay.com/zh/photos/search/" + quote(type) # 转为assic码
driver.get(url)
driver.maximize_window()
# driver.refresh()
sleep(2)
photos_numbers = driver.find_element_by_xpath('//*[@id="content"]/div/div[3]/div/h1').text
photos_numbers = str(photos_numbers).split(" ")
print("一共检测照片数量", photos_numbers[0])
pages = (int(photos_numbers[0]) // 100) + int(1)
page = int(input("输入的页码:"))
while page <= pages:
photo_dic = {}
for i in range(1, page + 1):
print("正在获取页面信息......")
# 执行这段代码,会获取到当前窗口总高度
js = "return action=document.body.scrollHeight"
# 初始化现在滚动条所在高度为0
height = 0
# 当前窗口总高度
new_height = driver.execute_script(js)
while height < new_height:
# 将滚动条调整至页面底部
for i in tqdm(range(height, new_height, 100)): # 速度为没像素100的速度进行
driver.execute_script('window.scrollTo(0, {})'.format(i))
sleep(0.8)
height = new_height
sleep(1.5)
new_height = driver.execute_script(js)
photos_links = driver.find_elements_by_class_name('photo-result-image')
print(len(photos_links))
for photo_links in photos_links:
photo_link = photo_links.get_attribute("src")
photo_name = photo_links.get_attribute("alt")
photo_dic[photo_link] = photo_name # 字典存储信息,key存储下载地址,val存名称;可以考虑使用item迭代器,快一点
driver.implicitly_wait(2)
driver.find_element_by_xpath('//*[@id="content"]/div/a').click()
# 利用按钮判断存在下一页,然后同样的方式获取图片信息
return photo_dic
def download_photo(url, name):
try:
photo = requests.get(url, timeout=10)
except requests.exceptions.ConnectTimeout:
print("timeout")
# path= input("")
path = r"D:/python_code/python_test/photo_get/" + str(name) + ".jpg" # 文件记录路径
with open(path, "wb") as f:
f.write(photo.content)
if __name__ == "__main__":
photo_dict = get_type("夜空")
print("正在下载.....")
start = time.time()
for url, name in tqdm(photo_dict.items()):
# print(url)
t = threading.Thread(target=download_photo, args=(url, name))
t.start()
end = time.time()
print("下载总耗时: %s 分钟!" % (round((end - start) / 60, 1)))