# 导入re正则库,用于正则匹配像主名和像主id
import re
# 导入requests库,用于图片抓取和页面访问
import requests
# lxml库用于解析html页面
from lxml import etree
# 导入os库用于创建文件夹,以及保存图片
import os
# 伪造通讯协议头
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Referer': 'http://diglweb.zjlib.cn:8081/zjtsg/zgsmcgzx/cygltotal.jsp?page=67^&channelid=91740',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
# 图片下载函数
def request_download(filename, IMAGE_URL, num):
r = requests.get(IMAGE_URL)
path = f'./image/{filename}'
folder = os.path.exists(path)
# 如果文件夹已存在,则跳过此环节,如果不存在,则创建文件夹
if not folder:
os.makedirs(path)
with open(f'./image/{filename}/{num}.png', 'wb') as f:
f.write(r.content)
# 可以得到一个像主所有图片的函数
def getOnePeople(filename,keyNum):
# 提前设置一个大数组
# 每页页面有二十张图片链接,将一个像主的所有图片链接存放到一个List中,然后for循环下载
BIgImageList = []
# 通过循环和break来确保迭代所有页面
for i in range(1, 10000):
response = requests.get(f'http://diglweb.zjlib.cn:8081/zjtsg/zgsmcgzx/cypicgl.jsp?page={i}&channelid=91743&searchword=%CF%F1%D6%F7%B1%E0%BA%C5={keyNum}', headers=headers, verify=False)
html = etree.HTML(response.text)
imageList = html.xpath("//tr[@style='font-size:12px;']//img/@src")
count = len(imageList)
BIgImageList += imageList
if count < 1:
break
# 下载图片并给予编号
for n, one in enumerate(BIgImageList):
request_download(filename, one, n+1)
# 爬取ID 对应的像主,ID从1到1330
def getmap(record):
params = (
('channelid', '91741'),
('record', record),
)
# 访问对应URL
response = requests.get('http://diglweb.zjlib.cn:8081/zjtsg/zgsmcgzx/cyxl.jsp', headers=headers, params=params, verify=False)
text = response.text
# 通过正则表达式匹配这两个关键信息
keynum = re.findall('td width="285" valign="top">(.*?\d+)</td>', text)[0]
name = re.findall('<td width="285" align="left" valign="bottom">(.*?)</td>', text)[0]
print(name)
print(keynum)
# 运行关键函数
getOnePeople(name, keynum)
# 爬取所有ID,即1300个像主的所有图片
for i in range(1, 1331):
try:
getmap(i)
except:
pass
05-21
05-21
05-21
05-21
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交