使用requests库爬取1688联系方式数据

1688

目的: 爬取供应商界面下的联系方式信息

1

人工步骤

step 1

1

step 2

1

step 3

1

爬取步骤

1. 根据关键字来爬取所有的商品数据

思路

第一步: 我们可以通过按F12打开开发者工具,之后观察数据

发现这个get_premium_offer_list.json符合我们的需求

1

**第二步:**通过向下滑,我们会发现同样回去请求 get_premium_offer_list.json 。比对:

可发现向下滑之后,请求参数的asyncreq会加1

1

操作

我们可以根据api调用工具来测试(记得加上cookie,以及Referer)

1

1

在这里我发现请求参数中只需要beginpage、asyncreq、keywords这几个关键字就可以成功请求

代码
# 请求头部分
payload = ''

# 获取商品数据参数
conn = http.client.HTTPSConnection("data.p4psearch.1688.com")
headers = {
    'Referer': 'https://p4psearch.1688.com/',
    'Cookie': "记得填cookie",
    'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
    'Accept': '*/*',
    'Host': 'data.p4psearch.1688.com',
    'Connection': 'keep-alive'
}

# 页数
pageindex = 1
# 滑动更新页
asyncreq = 1
# 参数data编码
sdata = parse.quote(“关键词”)  # quote()将字符串进行编码

conn.request("GET", "/data/ajax/get_premium_offer_list.json?"
                            "beginpage={}"
                            "&asyncreq={}"
                            "&keywords={}"
                            "&ptid=hr6cfd8516d30252"
                            "&hpageId="
                            "&provinceValue="
                            "&p_rs=true"
                            "&exp="
                            "&spm="
                            "&pageid="
                            "&p4pid="
                            "&salt="
                            "&sign="
                            "&callback="
                            "&_=1696829023464".format(pageindex, asyncreq, sdata), payload, headers)
res = conn.getresponse()
data = res.read()
data = data.decode("utf-8")
return data

2. 根据商品的memberId来获取联系方式

思路

通过观察请求此url,需要sign这个数字签名

1

js逆向

1

观察js如果生成的sign

token在Cookie中

1

重要的sign生成部分(js代码)

1


function u(e) {
    function t(e, t) {
        return e << t | e >>> 32 - t
    }

    function n(e, t) {
        var n, o, r, i, s;
        return r = 2147483648 & e,
            i = 2147483648 & t,
            n = 1073741824 & e,
            o = 1073741824 & t,
            s = (1073741823 & e) + (1073741823 & t),
            n & o ? 2147483648 ^ s ^ r ^ i : n | o ? 1073741824 & s ? 3221225472 ^ s ^ r ^ i : 1073741824 ^ s ^ r ^ i : s ^ r ^ i
    }

    function o(e, t, n) {
        return e & t | ~e & n
    }

    function r(e, t, n) {
        return e & n | t & ~n
    }

    function i(e, t, n) {
        return e ^ t ^ n
    }

    function s(e, t, n) {
        return t ^ (e | ~n)
    }

    function a(e, r, i, s, a, p, u) {
        return e = n(e, n(n(o(r, i, s), a), u)),
            n(t(e, p), r)
    }

    function p(e, o, i, s, a, p, u) {
        return e = n(e, n(n(r(o, i, s), a), u)),
            n(t(e, p), o)
    }

    function u(e, o, r, s, a, p, u) {
        return e = n(e, n(n(i(o, r, s), a), u)),
            n(t(e, p), o)
    }

    function c(e, o, r, i, a, p, u) {
        return e = n(e, n(n(s(o, r, i), a), u)),
            n(t(e, p), o)
    }

    function d(e) {
        for (var t, n = e.length, o = n + 8, r = (o - o % 64) / 64, i = 16 * (r + 1), s = new Array(i - 1), a = 0, p = 0; n > p;)
            t = (p - p % 4) / 4,
                a = p % 4 * 8,
                s[t] = s[t] | e.charCodeAt(p) << a,
                p++;
        return t = (p - p % 4) / 4,
            a = p % 4 * 8,
            s[t] = s[t] | 128 << a,
            s[i - 2] = n << 3,
            s[i - 1] = n >>> 29,
            s
    }

    function l(e) {
        var t, n, o = "", r = "";
        for (n = 0; 3 >= n; n++)
            t = e >>> 8 * n & 255,
                r = "0" + t.toString(16),
                o += r.substr(r.length - 2, 2);
        return o
    }

    function f(e) {
        e = e.replace(/\r\n/g, "\n");
        for (var t = "", n = 0; n < e.length; n++) {
            var o = e.charCodeAt(n);
            128 > o ? t += String.fromCharCode(o) : o > 127 && 2048 > o ? (t += String.fromCharCode(o >> 6 | 192),
                t += String.fromCharCode(63 & o | 128)) : (t += String.fromCharCode(o >> 12 | 224),
                t += String.fromCharCode(o >> 6 & 63 | 128),
                t += String.fromCharCode(63 & o | 128))
        }
        return t
    }

    var m, h, g, _, y, v, R, S, w, O = [], E = 7, A = 12, q = 17, b = 22, T = 5, x = 9, N = 14, C = 20, k = 4, J = 11,
        P = 16, L = 23, I = 6, D = 10, j = 15, W = 21;
    for (e = f(e),
             O = d(e),
             v = 1732584193,
             R = 4023233417,
             S = 2562383102,
             w = 271733878,
             m = 0; m < O.length; m += 16)
        h = v,
            g = R,
            _ = S,
            y = w,
            v = a(v, R, S, w, O[m + 0], E, 3614090360),
            w = a(w, v, R, S, O[m + 1], A, 3905402710),
            S = a(S, w, v, R, O[m + 2], q, 606105819),
            R = a(R, S, w, v, O[m + 3], b, 3250441966),
            v = a(v, R, S, w, O[m + 4], E, 4118548399),
            w = a(w, v, R, S, O[m + 5], A, 1200080426),
            S = a(S, w, v, R, O[m + 6], q, 2821735955),
            R = a(R, S, w, v, O[m + 7], b, 4249261313),
            v = a(v, R, S, w, O[m + 8], E, 1770035416),
            w = a(w, v, R, S, O[m + 9], A, 2336552879),
            S = a(S, w, v, R, O[m + 10], q, 4294925233),
            R = a(R, S, w, v, O[m + 11], b, 2304563134),
            v = a(v, R, S, w, O[m + 12], E, 1804603682),
            w = a(w, v, R, S, O[m + 13], A, 4254626195),
            S = a(S, w, v, R, O[m + 14], q, 2792965006),
            R = a(R, S, w, v, O[m + 15], b, 1236535329),
            v = p(v, R, S, w, O[m + 1], T, 4129170786),
            w = p(w, v, R, S, O[m + 6], x, 3225465664),
            S = p(S, w, v, R, O[m + 11], N, 643717713),
            R = p(R, S, w, v, O[m + 0], C, 3921069994),
            v = p(v, R, S, w, O[m + 5], T, 3593408605),
            w = p(w, v, R, S, O[m + 10], x, 38016083),
            S = p(S, w, v, R, O[m + 15], N, 3634488961),
            R = p(R, S, w, v, O[m + 4], C, 3889429448),
            v = p(v, R, S, w, O[m + 9], T, 568446438),
            w = p(w, v, R, S, O[m + 14], x, 3275163606),
            S = p(S, w, v, R, O[m + 3], N, 4107603335),
            R = p(R, S, w, v, O[m + 8], C, 1163531501),
            v = p(v, R, S, w, O[m + 13], T, 2850285829),
            w = p(w, v, R, S, O[m + 2], x, 4243563512),
            S = p(S, w, v, R, O[m + 7], N, 1735328473),
            R = p(R, S, w, v, O[m + 12], C, 2368359562),
            v = u(v, R, S, w, O[m + 5], k, 4294588738),
            w = u(w, v, R, S, O[m + 8], J, 2272392833),
            S = u(S, w, v, R, O[m + 11], P, 1839030562),
            R = u(R, S, w, v, O[m + 14], L, 4259657740),
            v = u(v, R, S, w, O[m + 1], k, 2763975236),
            w = u(w, v, R, S, O[m + 4], J, 1272893353),
            S = u(S, w, v, R, O[m + 7], P, 4139469664),
            R = u(R, S, w, v, O[m + 10], L, 3200236656),
            v = u(v, R, S, w, O[m + 13], k, 681279174),
            w = u(w, v, R, S, O[m + 0], J, 3936430074),
            S = u(S, w, v, R, O[m + 3], P, 3572445317),
            R = u(R, S, w, v, O[m + 6], L, 76029189),
            v = u(v, R, S, w, O[m + 9], k, 3654602809),
            w = u(w, v, R, S, O[m + 12], J, 3873151461),
            S = u(S, w, v, R, O[m + 15], P, 530742520),
            R = u(R, S, w, v, O[m + 2], L, 3299628645),
            v = c(v, R, S, w, O[m + 0], I, 4096336452),
            w = c(w, v, R, S, O[m + 7], D, 1126891415),
            S = c(S, w, v, R, O[m + 14], j, 2878612391),
            R = c(R, S, w, v, O[m + 5], W, 4237533241),
            v = c(v, R, S, w, O[m + 12], I, 1700485571),
            w = c(w, v, R, S, O[m + 3], D, 2399980690),
            S = c(S, w, v, R, O[m + 10], j, 4293915773),
            R = c(R, S, w, v, O[m + 1], W, 2240044497),
            v = c(v, R, S, w, O[m + 8], I, 1873313359),
            w = c(w, v, R, S, O[m + 15], D, 4264355552),
            S = c(S, w, v, R, O[m + 6], j, 2734768916),
            R = c(R, S, w, v, O[m + 13], W, 1309151649),
            v = c(v, R, S, w, O[m + 4], I, 4149444226),
            w = c(w, v, R, S, O[m + 11], D, 3174756917),
            S = c(S, w, v, R, O[m + 2], j, 718787259),
            R = c(R, S, w, v, O[m + 9], W, 3951481745),
            v = n(v, h),
            R = n(R, g),
            S = n(S, _),
            w = n(w, y);
    var H = l(v) + l(R) + l(S) + l(w);
    return H.toLowerCase()
}

代码

生成sign数字签名

# token值
token = re.findall('_m_h5_tk=(.*?)_', headers['Cookie'], re.S)[0]
# 固定参数
s = '12574478'
# 时间戳
a = str(int(time.time() * 1000))
# 参数整理
p = (token + "&" + self.a + "&" + s + "&" + data)
# 加载js
with open('./venv/1688.js', 'r', encoding='utf-8') as f:
    ctx = execjs.compile(f.read())
# 执行sign生成函数获取sign值
sign = ctx.call('u', p)

获取联系方式

# 请求头部分
payload = ''

# 获取联系方式数据参数
conn_phone = http.client.HTTPSConnection("h5api.m.1688.com")
headers_phone = {
    'Referer': 'https://shop1ss72686n0610.1688.com/',
    'Cookie': "",
    'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
    'Accept': '*/*',
    'Host': 'h5api.m.1688.com',
    'Connection': 'keep-alive'
}
# sign生成参数之一 o.data
data = '{"componentKey":"wp_pc_contactsmall","params":"{\\"memberId\\":\\"%s\\"}"}' % \
                   self.name_memberId_dict[key]
    
# token值
token = re.findall('_m_h5_tk=(.*?)_', headers['Cookie'], re.S)[0]
# 固定参数
s = '12574478'
# 时间戳
a = str(int(time.time() * 1000))
# 参数整理
p = (token + "&" + self.a + "&" + s + "&" + data)
# 加载js
with open('./venv/1688.js', 'r', encoding='utf-8') as f:
    ctx = execjs.compile(f.read())
# 执行sign生成函数获取sign值
sign = ctx.call('u', p)
    
parms = '/h5/mtop.alibaba.alisite.cbu.server.pc.moduleasyncservice/1.0/?jsv=2.7.0&appKey=12574478&t={}&sign={}&api=mtop.alibaba.alisite.cbu.server.pc.ModuleAsyncService&v=1.0&type=jsonp&valueType=string&dataType=jsonp&timeout=10000&callback=mtopjsonp1&'.format(a, sign)

sdata = parse.quote(str(data))  # quote()将字符串进行编码
url = parms + 'data=' + sdata

conn_phone.request("GET", url,payload, headers_phone)
res = conn_phone.getresponse()
data = res.read()
data = data.decode("utf-8") // 成功获取
关于# sign生成参数之一 o.data

通过在这里打断点获取模板o.data

1

  • 4
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值