import selenium
from selenium import webdriver
import time
from threading import Thread
import requests
import os
from selenium.webdriver.support import ui
from selenium.webdriver.common.by import By
def seek_link(browser, address, page):
container = browser.find_element(By.XPATH, '/html/body/div[3]/div/div/div[2]/div[2]/div[2]')
page_text = container.find_elements(By.CLASS_NAME, 'p_newhero_item')
number = 1
picture_list = {}
for i in page_text:
try:
picture_url = i.find_element(By.CSS_SELECTOR, 'a[rel]').get_attribute('href')
picture_name = i.find_element(By.CSS_SELECTOR, 'h4').text
for value in picture_list:
if picture_name == value:
picture_name = picture_name +str(number)
number+=1
picture_list[picture_name] = picture_url
except Exception as e:
print(e)
# 点击下一页
try:
button = browser.find_element(By.CSS_SELECTOR, 'a[class="downpage"][alt="下一页"]')
browser.execute_script("arguments[0].click();", button)
except Exception as e:
print(e)
# 下载图片
download_picture(picture_list=picture_list, address=address, page=page)
def download_picture(picture_list, address, page):
print("正在打印第%s页"%page)
# print(picture_list)
for picture_name, picture_url in picture_list.items():
# print(picture_name+" "+picture_url)
r = requests.get(picture_url)
print("正在保存" + picture_name)
try:
f = open(address+"/"+ picture_name + ".jpg", 'wb')
f.write(r.content)
f.close()
print("保存" + picture_name + "成功")
except Exception as e:
print("保存" + picture_name + "失败:")
print(e)
print("打印第%s页完成"%page)
def thread_main(browser, address, thread_list):
for page in range(pages):
try:
thread_01 = Thread(target=seek_link, args=(browser, address, page + 1))
# thread_01 =Thread(target=jishu)
thread_list.append(thread_01)
# click_downpage(browser=browser,page=page,address=address)
# time.sleep(2)
page += 1
except Exception as e:
print(e)
if __name__ == '__main__':
# 1.构造网址
url = r'https://pvp.qq.com/web201605/wallpaper.shtml'
# 新建目录
address = r"王者荣耀"
try:
os.system("mkdir %s"%address)
except Exception as e:
print(e)
# 页数
pages = 27
# 2.打开浏览器
browser = webdriver.Chrome()
browser.get(url)
# 3.主线程
thread_list = []
thread_main(browser, address, thread_list)
for t in thread_list:
t.start()
time.sleep(8)
for t in thread_list:
t.join() # 子线程全部加入,主线程等所有子线程运行完毕
print("全部完成")
browser.quit()
python 使用selenium框架爬取王者荣耀官网壁纸
最新推荐文章于 2022-03-02 03:29:45 发布