Hippopx 网站高清原图下载
关于Hippopx
https://www.hippopx.com/, Hippopx是一个免版权图库网站,收录超过20万张以上的免费CC0授权相片,图片的质量很高。高清摄影图片包含人物、动物、风景、名胜、美食、旅游等,都可以免费下载。
准备工作
在开始之前,确保你已经安装了Python和以下库:
-
requests
-
lxml
-
execjs
pip install requests lxml PyExecJS
分析目标网站
在首页输入关键词mountain进入搜索结果页,结果页点击图片进入图片详情页,
点击下载原图跳到了一个新的地址:
https://i0.hippopx.com/photos/174/613/190/mountain-landscape-mountains-landscape-steinweg-3c4a787b5f1473f26f31e3234a764984.jpg
在详情页匹配到预览图地址:
https://i0.hippopx.com/photos/174/613/190/mountain-landscape-mountains-landscape-steinweg-preview.jpg
发现唯一的区别:原图地址替换了"preview",怀疑是加密参数,发现下载按钮有id参数"dl",
f12 在源代码页面“global.js”里面搜索dl,找到关键加密函数dld
function dld(f) {
var e = g("s").innerHTML.split("x");
var d = g("t").value;
var c = g("i").src;
var b = u(parseInt(d), parseInt(e[0]), parseInt(e[1]));
var a = site + "/" + lang + "/download/" + location.href.split("-").pop() + "-" + f + "-" + b;
location.href = a
}
打断点调试,发现b就是我们要找的加密参数,于是改写函数,扣js,一步步调试,完成该加密参数的构造。下面是完整加密的js
function k(d, a) {
var c = (65535 & d) + (65535 & a);
return (d >> 16) + (a >> 16) + (c >> 16) << 16 | 65535 & c
}
function z(c, a) {
return c << a | c >>> 32 - a
}
function M(r, h, m, a, p, d) {
return k(z(k(k(h, r), k(a, d)), p), m)
}
function E(s, d, e, m, a, p, h) {
return M(d & e | ~d & m, s, d, a, p, h)
}
function j(s, d, e, m, a, p, h) {
return M(d & m | e & ~m, s, d, a, p, h)
}
function O(s, d, e, m, a, p, h) {
return M(d ^ e ^ m, s, d, a, p, h)
}
function L(s, d, e, m, a, p, h) {
return M(e ^ (d | ~m), s, d, a, p, h)
}
function N(f) {
var a, c = [];
for (c[(f.length >> 2) - 1] = void 0,
a = 0; a < c.length; a += 1) {
c[a] = 0
}
var d = 8 * f.length;
for (a = 0; a < d; a += 8) {
c[a >> 5] |= (255 & f.charCodeAt(a / 8)) << a % 32
}
return c
}
function I(f, c) {
f[c >> 5] |= 128 << c % 32,
f[14 + (c + 64 >>> 9 << 4)] = c;
var C, s, R, Q, t, p = 1732584193, A = -271733879, S = -1732584194, o = 271733878;
for (C = 0; C < f.length; C += 16) {
s = p,
R = A,
Q = S,
t = o,
A = L(A = L(A = L(A = L(A = O(A = O(A = O(A = O(A = j(A = j(A = j(A = j(A = E(A = E(A = E(A = E(A, S = E(S, o = E(o, p = E(p, A, S, o, f[C], 7, -680876936), A, S, f[C + 1], 12, -389564586), p, A, f[C + 2], 17, 606105819), o, p, f[C + 3], 22, -1044525330), S = E(S, o = E(o, p = E(p, A, S, o, f[C + 4], 7, -176418897), A, S, f[C + 5], 12, 1200080426), p, A, f[C + 6], 17, -1473231341), o, p, f[C + 7], 22, -45705983), S = E(S, o = E(o, p = E(p, A, S, o, f[C + 8], 7, 1770035416), A, S, f[C + 9], 12, -1958414417), p, A, f[C + 10], 17, -42063), o, p, f[C + 11], 22, -1990404162), S = E(S, o = E(o, p = E(p, A, S, o, f[C + 12], 7, 1804603682), A, S, f[C + 13], 12, -40341101), p, A, f[C + 14], 17, -1502002290), o, p, f[C + 15], 22, 1236535329), S = j(S, o = j(o, p = j(p, A, S, o, f[C + 1], 5, -165796510), A, S, f[C + 6], 9, -1069501632), p, A, f[C + 11], 14, 643717713), o, p, f[C], 20, -373897302), S = j(S, o = j(o, p = j(p, A, S, o, f[C + 5], 5, -701558691), A, S, f[C + 10], 9, 38016083), p, A, f[C + 15], 14, -660478335), o, p, f[C + 4], 20, -405537848), S = j(S, o = j(o, p = j(p, A, S, o, f[C + 9], 5, 568446438), A, S, f[C + 14], 9, -1019803690), p, A, f[C + 3], 14, -187363961), o, p, f[C + 8], 20, 1163531501), S = j(S, o = j(o, p = j(p, A, S, o, f[C + 13], 5, -1444681467), A, S, f[C + 2], 9, -51403784), p, A, f[C + 7], 14, 1735328473), o, p, f[C + 12], 20, -1926607734), S = O(S, o = O(o, p = O(p, A, S, o, f[C + 5], 4, -378558), A, S, f[C + 8], 11, -2022574463), p, A, f[C + 11], 16, 1839030562), o, p, f[C + 14], 23, -35309556), S = O(S, o = O(o, p = O(p, A, S, o, f[C + 1], 4, -1530992060), A, S, f[C + 4], 11, 1272893353), p, A, f[C + 7], 16, -155497632), o, p, f[C + 10], 23, -1094730640), S = O(S, o = O(o, p = O(p, A, S, o, f[C + 13], 4, 681279174), A, S, f[C], 11, -358537222), p, A, f[C + 3], 16, -722521979), o, p, f[C + 6], 23, 76029189), S = O(S, o = O(o, p = O(p, A, S, o, f[C + 9], 4, -640364487), A, S, f[C + 12], 11, -421815835), p, A, f[C + 15], 16, 530742520), o, p, f[C + 2], 23, -995338651), S = L(S, o = L(o, p = L(p, A, S, o, f[C], 6, -198630844), A, S, f[C + 7], 10, 1126891415), p, A, f[C + 14], 15, -1416354905), o, p, f[C + 5], 21, -57434055), S = L(S, o = L(o, p = L(p, A, S, o, f[C + 12], 6, 1700485571), A, S, f[C + 3], 10, -1894986606), p, A, f[C + 10], 15, -1051523), o, p, f[C + 1], 21, -2054922799), S = L(S, o = L(o, p = L(p, A, S, o, f[C + 8], 6, 1873313359), A, S, f[C + 15], 10, -30611744), p, A, f[C + 6], 15, -1560198380), o, p, f[C + 13], 21, 1309151649), S = L(S, o = L(o, p = L(p, A, S, o, f[C + 4], 6, -145523070), A, S, f[C + 11], 10, -1120210379), p, A, f[C + 2], 15, 718787259), o, p, f[C + 9], 21, -343485551),
p = k(p, s),
A = k(A, R),
S = k(S, Q),
o = k(o, t)
}
return [p, A, S, o]
}
function P(f) {
var a, c = "", d = 32 * f.length;
for (a = 0; a < d; a += 8) {
c += String.fromCharCode(f[a >> 5] >>> a % 32 & 255)
}
return c
}
function b(a) {
return unescape(encodeURIComponent(a))
}
function J(a) {
return P(I(N(a), 8 * a.length))
}
function K(f) {
var a, c, d = "";
for (c = 0; c < f.length; c += 1) {
a = f.charCodeAt(c),
d += "0123456789abcdef".charAt(a >>> 4 & 15) + "0123456789abcdef".charAt(15 & a)
}
return d
}
function G(a) {
return J(b(a))
}
function D(a) {
return K(G(a))
}
function q(c, a) {
return H(b(c), b(a))
}
function w(c, a) {
return K(q(c, a))
}
function ec(d, a, c) {
return a ? c ? q(a, d) : w(a, d) : c ? G(d) : D(d)
}
function u(a, f, c) {
var d = "";
if (a > 0 && f > 0 && c > 0) {
var n = ec(a);
var k = ec(f + c);
var j = n.split("").reverse();
var e = k.split("");
for (var b = 0; b < 32; b++) {
if (b % 2 == 0) {
d += j[b]
} else {
d += e[b]
}
}
}
return d
}
function dld(f, d) {
// // 图片尺寸
// var f= "7599x3215"
// // 上传日期后的隐藏数字 //span//input/@value
// var d = "1510317657";
var e = f.split("x");
var b = u(parseInt(d), parseInt(e[0]), parseInt(e[1]));
return b
}
python调用js
import execjs
def encrypt_download_url(preview_url, reso, hidden_value):
with open('global.js', encoding='utf-8') as f:
js_code = f.read()
context = execjs.compile(js_code)
ret = context.call("dld", reso, hidden_value)
download_url = preview_url.replace('preview', ret)
return download_url
if __name__ == '__main__':
print(encrypt_download_url(
preview_url="https://i0.hippopx.com/photos/88/296/962/clouds-dark-landscape-mountain-range-preview.jpg",
reso="7599x3215",
hidden_value="1510317657"
))
批量下载
import os.path
import traceback
import requests
from lxml import etree
from encrypt import encrypt_download_url
from spider import Spider
def run(query, page, save_dir):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}
url = "https://www.hippopx.com/en/search"
params = {
'q': query,
'page': page
}
resp = requests.get(url, params=params, headers=headers)
html = etree.HTML(resp.text)
src_list = html.xpath('//ul//a[@itemprop="url"]/@href')
print(src_list)
task_list = []
for src in src_list:
resp = requests.get(url=src, headers=headers)
html = etree.HTML(resp.text)
preview_url = html.xpath('//*[@id="i"]/@src')[0]
reso = html.xpath('//*[@id="s"]/text()')[0]
size_str = html.xpath('//ul[@class="info_list"]/li[2]/span[@class="info_detail"]/text()')[0].strip()
hidden_value = html.xpath('//*[@id="t"]/@value')[0]
download_url = encrypt_download_url(preview_url, reso, hidden_value)
save_name = download_url.split('/')[-1]
print(download_url, size_str)
save_path = os.path.join(save_dir, save_name)
task_list.append((download_url, save_path))
Spider(task_list=task_list, thread_num=3).run()
if __name__ == '__main__':
keyword = "mountain"
for page in range(1, 334):
print(f'下载第{page}页数据')
try:
run(
query=keyword,
page=page,
save_dir=f'./{keyword}'
)
except:
print(traceback.format_exc())
continue