Python实现壁纸批量下载
最近想要一些新的壁纸,但是从网站上面找一张一张下载太慢了,所以写了个爬虫批量下载壁纸。
先看看实现的效果
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/275963e4c92087ebc8d42362d7686739.png)
需要的包:
requests、bs4 、os、re
源码:
# @Time : 2021/09/02 15:50
# @Author : LuYonghao
import requests
from bs4 import BeautifulSoup
import os
import re
class wallpaper:
def __init__(self):
self.typelist = []
self.urllist = []
self.root = ''
def gethtmlurl(self, url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def getpic(self, html):
soup = BeautifulSoup(html, 'html.parser')
all_img = soup.find("div", class_="contlistw mtw").find_all("img")
for img in all_img:
src = img['lazysrc']
img_url = src.split(".278")[0]
alt = img_url.split("/")[-1]
print(img_url)
print(alt)
path = self.root + alt
try: # 创建或判断路径图片是否存在并下载
if not os.path.exists(self.root):
os.mkdir(self.root)
if not os.path.exists(path):
r = requests.get(img_url)
print(path)
with open(path, 'wb') as f:
f.write(r.content)
f.close()
print("[+]文件保存成功")
else:
print("[-]文件已存在")
except Exception as e:
print("[-]爬取失败{}".format(e))
def gettype(self, html):
soup = BeautifulSoup(html, 'html.parser')
all_type = soup.find("div", class_="cl r").find_all("a")
for data in all_type:
url = data["href"]
type = re.findall(r'"catnametit">(.*?)<', str(data))[0]
number = re.findall(r'"cl">\((.*?)\)', str(data))[0]
self.typelist.append(type + number)
self.urllist.append(url)
for i, element in enumerate(self.typelist):
print(i, element)
try:
a = input("请输入分类编号:")
self.root = input("请输入文件存储路径:")
self.downpic(a)
except:
print("[+]退出程序")
def getpage(self, html):
soup = BeautifulSoup(html, 'html.parser')
all_pages = soup.find("div", id="pageNum")
for page in all_pages:
numlist = re.findall(r'html">(.*?)<', str(page))
return numlist[-2]
def downpic(self, typeindex):
typeurl = self.urllist[int(typeindex)]
html = self.gethtmlurl(typeurl)
numpages = self.getpage(html)
for i in range(1, int(numpages)):
url = "{}/index_{}.html".format(typeurl, i)
pichtml = (self.gethtmlurl(url))
self.getpic(pichtml)
def main():
url = "https://desk.3gbizhi.com"
html = wallpaper().gethtmlurl(url)
wallpaper().gettype(html)
if __name__ == '__main__':
main()
运行方式: