闲着也是闲着:
目标网站:http://image.so.com
代码:
# _*_ coding:utf-8 _*_
import os
import requests
from time import sleep
from urllib.parse import urlencode
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
'Referer': 'https://image.so.com/z?ch=beauty',
# 指从哪个网址跳转过来的,一般的都会有这个请求头的限定
'Host': 'image.so.com'
# 这个是指要访问的主机
}
def parse_cover_page(page):
html = requests.get(url='https://image.so.com/zj?ch=beauty&sn={page}&listtype=new&temp=1'.format(page=page),headers=headers,verify=False)
sleep(2)
json_res = html.json()
if 'list' in json_res:
for cover_image in json_res.get('list'):
#cover_imgurl = cover_image.get('cover_imgurl')
# 套图的封面图片链接
group_title = cover_image.get('group_title')
# 这个是套图的标题
gro_id = cover_image.get('id')
# 提取这个套图的id
params = {'ch':'beauty','id':gro_id}
params = urlencode(params)
# 构造链接的参数
url = 'https://image.so.com/zvj?'+params
print(url)
parse_one_group_pic(url,group_title)
def parse_one_group_pic(url,group_title):
html = requests.get(url=url,headers=headers,verify=False)
sleep(0.5)
if 'list' in html.json():
for pic in html.json().get('list'):
pic_url = pic.get('pic_url')
pic_index = pic.get('index')
if not os.path.exists('D:/pictures/{nam}'.format(nam=group_title)):
os.makedirs('D:/pictures/{nam}'.format(nam=group_title))
# 对文件夹的创建
with open('D:/pictures/{nam}/{pic_index}.jpg'.format(nam=group_title,pic_index=pic_index),'wb') as f:
f.write(requests.get(url=pic_url,headers=headers,verify=False).content)
sleep(0.2)
if __name__ == "__main__":
for page in range(0,91,30):
parse_cover_page(page)