这里爬的是网站对外的一个推广接口,里面的图片均可以访问到
import requests as req
from bs4 import BeautifulSoup
import re
import os
is_proxy = True #是否加载代理
user = '23234'
password = 'zasdcx123456'
http = "http://{__user}:{__password}@10.191.131.43:3128".format(__user=user,__password=password)
proxy = {
"http":http ,
"https": http,
}
if(not is_proxy):
proxy = None
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
}
domain = "http://fullstar.zhhainiao.com/inst/promotion/kwp/get"
def set_dir(dir_path):
exists_flag = os.path.exists(dir_path)
if(not exists_flag):
os.makedirs(dir_path)
def get_file_name(file_url):
reg = r'[^/]+\.[^/]+$'
file_name = re.search(reg,file_url).group()
return file_name
def download_file(file_url,file_save_path):
file_res = req.get(file_url,proxies=proxy,headers=header)
file_byte = file_res.content
with open(file_save_path,mode="wb") as f:
f.write(file_byte)
print("成功写入"+file_save_path)
def err_log(msg):
err = open("err.log","a+")
err.write(msg+'\n')
for index in range(4,249):#4-248
try:
data = {
"tid1":index,
"tid2":1,
"tod1":1
}
dy_picture_res = req.post(domain,proxies=proxy,headers=header,json=data).json()
root = os.path.dirname(os.path.abspath(__file__))
wallpaper = dy_picture_res["screen1_wallpapers"]
series_dir = os.path.join(root,dy_picture_res["name"])
set_dir(series_dir)
for item in wallpaper:
try:
item_dir = os.path.join(series_dir,item["name"])
set_dir(item_dir)
img_path = os.path.join(item_dir,get_file_name(item["image"]))
video_path = os.path.join(item_dir,get_file_name(item["video"]))
download_file(item["image"],img_path)
download_file(item["video"],video_path)
except Exception as err:
err_log(item["image"]+';'+img_path+'\n'+item["video"]+video_path)
except Exception as err:
pass
print("end")
http://fullstar.zhhainiao.com/inst/promotion/kwp/get
网址是元气动态壁纸的一个对外推广的banner接口
下载下来的壁纸共有243类,1400张