调用selenium库,所以在使用的时候必须要配备:chrome和对应版本的chromedriver,对应的chrome放到和.py文件同一目录下即可。
1. 文件名重复异常处理,在使用OS.mkdir()时候,如果文件加名字重复会报错。
使用while循环与try ....expect....。
try...expect...语法是执行try后面的语句,如果该语句报错,则转至expect后面语句。
2.selenium的driver切换至下一页。
使用网页句柄(weindow_handle)来进行切换,具体为首先获取第一个网页的handle,点击按钮后出现新的页面后,再获取所有网页的handle,这个handle就是按照网页打开的顺序进行排列,总共打开几个网页就出现几个handle,此时将所有handle与之前获得的第一个handle依次比对,也可以获取handle以后使用第二个handle,再使用switch_window_handle()切换到第二个网页。
3.保存网页为图片
使用url.request.urlretrieve()直接保存
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'BDTP.ui'
#
# Created by: PyQt5 UI code generator 5.15.2
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
from selenium import webdriver
from urllib import request
import urllib
import time
import os
# 检测新建文件夹是否重名,重名就返回文件名(1),(2)...
def deal_same_dirname(dirname):
index = ''
while True:
try:
os.makedirs(dirname + index)
newdirname=dirname + index
break
except:
if index:
index = '(' + str(int(index[1:-1]) + 1) + ')' # Append 1 to number in brackets
else:
index = '(1)'
return newdirname
# 下载图片,依次输入下载内容,以及下载次数
def DownLoad(target, load_times):
dir = deal_same_dirname(target)
url = "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111110&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=" + target + "&oq=" + target + "&rsp=-1"
driver = webdriver.Chrome(r"chromedriver.exe")
driver.minimize_window()
driver.get(url)
current_handle = driver.current_window_handle
picture_herf = driver.find_element_by_xpath("//li[@class='imgitem']")
picture_herf.click()
all_handle = driver.window_handles
# 以上为进入一级网页
# 关键代码——将webdriver导向新网页
for handle in all_handle:
if handle != current_handle:
driver.switch_to_window(handle)
for i in range(load_times):
# 获取当前图片链接
picture = driver.find_element_by_xpath("//div[@class='img-wrapper']/img")
# 下载图片,命名为target\targetX.jpg
urllib.request.urlretrieve(picture.get_attribute("src"), dir+"\\" + target + str(i) + ".jpg")
# 点击下一张按钮
driver.find_element_by_xpath("//span[@class='img-next']").click()
time.sleep(0.5)
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(528, 298)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.pushButton = QtWidgets.QPushButton(self.centralwidget)
self.pushButton.setGeometry(QtCore.QRect(30, 180, 111, 51))
self.pushButton.setObjectName("pushButton")
self.label = QtWidgets.QLabel(self.centralwidget)
self.label.setGeometry(QtCore.QRect(30, 30, 101, 31))
self.label.setObjectName("label")
self.textEdit = QtWidgets.QTextEdit(self.centralwidget)
self.textEdit.setGeometry(QtCore.QRect(140, 20, 341, 51))
self.textEdit.setObjectName("textEdit")
self.textEdit_2 = QtWidgets.QTextEdit(self.centralwidget)
self.textEdit_2.setGeometry(QtCore.QRect(140, 110, 341, 51))
self.textEdit_2.setObjectName("textEdit_2")
self.label_2 = QtWidgets.QLabel(self.centralwidget)
self.label_2.setGeometry(QtCore.QRect(30, 110, 101, 41))
self.label_2.setObjectName("label_2")
self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
self.pushButton_2.setGeometry(QtCore.QRect(370, 180, 112, 51))
self.pushButton_2.setObjectName("pushButton_2")
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 528, 30))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.pushButton.clicked.connect(lambda: DownLoad(self.textEdit.toPlainText(), int(self.textEdit_2.toPlainText())))
self.pushButton_2.clicked.connect(lambda :MainWindow.close())
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.pushButton.setText(_translate("MainWindow", "确认"))
self.label.setText(_translate("MainWindow", "搜索关键词"))
self.label_2.setText(_translate("MainWindow", "爬取数量"))
self.pushButton_2.setText(_translate("MainWindow", "取消"))
if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
MainWindow = QtWidgets.QMainWindow()
ui = Ui_MainWindow()
ui.setupUi(MainWindow)
MainWindow.show()
sys.exit(app.exec_())