网站介绍:是一个 Cos 网站,该类网站很容易 消失 在互联网中,为了让数据存储下来,我们盘它。
源代码:
import urllib.request
from urllib.parse import urljoin
from lxml import etree
import re
import requests
x1=re.compile(r'<li><a href="(.*?).html">')
c1= re.compile(r"<img src='(.*?)' id='bigimg'",re.S)
d1=re.compile(r'<title>(.*?)</title>')
i=1
for i in range(8,98):
baseurl="http://www.cosplay8.com/pic/chinacos/list_22_"
i=i+1
url=baseurl+str(i)+".html"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'}
res = urllib.request.Request(url=url, headers=headers)
try:
respone = urllib.request.urlopen(res, timeout=1000)
except Exception as err:
print("出现异常" + str(err))
respones = respone.read().decode('utf-8')
# print(respones)
x1 = re.compile(r'<li><a href="(.*?).html">')
x2 = re.findall(x1, respones) # 一页中所以详情页链接
# print(x2)
for x3 in x2:
# print(1)
x4 = x3
a1 = 1
for a1 in range(1, 10):
baseurl = 'http://www.cosplay8.com'
a1 = a1 + 1
lasturl = baseurl + x4 + '_' + str(a1) + '.html' # 詳情頁的url
url = lasturl
# print(lasturl)
# print('开始下载图片---请稍后')
# respones1 = requests.get(lasturl).content
# 获取图片链接
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'}
res = urllib.request.Request(url=url, headers=headers)
try:
respone = urllib.request.urlopen(res, timeout=100)
except Exception as err:
print("出现异常" + str(err))
respones = respone.read().decode('utf-8')
c2 = re.findall(c1, respones) # 一半的图片链接
d2 = re.findall(d1, respones) # 标题
# print(c2)
# print(d2)
for c3 in c2:
c4 = c3
url1 = "http://www.cosplay8.com" + c4
print(url1)
for d3 in d2:
d4 = d3
try:
respones1 = requests.get(url1, timeout=5).content
except Exception as err:
print("出现异常" + str(err))
try:
with open('cosplay\\' + d4 + '.jpg', mode='wb') as f:#保存路径自己设置
f.write(respones1)
print('正在保存壁纸')
print('图片下载' + str(a1) + '张')
except Exception as err:
print("出现异常" + str(err))
print('图片下载结束')
print('打印'+str(i)+'页')
下面展示一些成果: