爬取企名片 数据保存

企名片用了 加密文字数据 

JS代码  文件名  qm.js

function s(e) { return JSON.parse(o("5e5062e82f15fe4ca9d24bc5",decode(e), 0, 0, "012345677890123", 1)) } function decode(t) { var l = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' var f = /[\t\n\f\r ]/g var e = (t = String(t).replace(f, "")).length; e % 4 == 0 && (e = (t = t.replace(/==?$/, "")).length), (e % 4 == 1 || /[^+a-zA-Z0-9/]/.test(t)) && u("Invalid character: the string to be decoded is not correctly encoded."); for (var n, r, i = 0, o = "", a = -1; ++a < e; ) r = l.indexOf(t.charAt(a)), n = i % 4 ? 64 * n + r : r, i++ % 4 && (o += String.fromCharCode(255 & n >> (-2 * i & 6))); return o } function o(e, t, i, n, a, o) { var s, c, r, l, d, u, h, p, f, m, v, g, y, b, C = new Array(16843776,0,65536,16843780,16842756,66564,4,65536,1024,16843776,16843780,1024,16778244,16842756,16777216,4,1028,16778240,16778240,66560,66560,16842752,16842752,16778244,65540,16777220,16777220,65540,0,1028,66564,16777216,65536,16843780,4,16842752,16843776,16777216,16777216,1024,16842756,65536,66560,16777220,1024,4,16778244,66564,16843780,65540,16842752,16778244,16777220,1028,66564,16843776,1028,16778240,16778240,0,65540,66560,0,16842756), _ = new Array(-2146402272,-2147450880,32768,1081376,1048576,32,-2146435040,-2147450848,-2147483616,-2146402272,-2146402304,-2147483648,-2147450880,1048576,32,-2146435040,1081344,1048608,-2147450848,0,-2147483648,32768,1081376,-2146435072,1048608,-2147483616,0,1081344,32800,-2146402304,-2146435072,32800,0,1081376,-2146435040,1048576,-2147450848,-2146435072,-2146402304,32768,-2146435072,-2147450880,32,-2146402272,1081376,32,32768,-2147483648,32800,-2146402304,1048576,-2147483616,1048608,-2147450848,-2147483616,1048608,1081344,0,-2147450880,32800,-2147483648,-2146435040,-2146402272,1081344), w = new Array(520,134349312,0,134348808,134218240,0,131592,134218240,131080,134217736,134217736,131072,134349320,131080,134348800,520,134217728,8,134349312,512,131584,134348800,134348808,131592,134218248,131584,131072,134218248,8,134349320,512,134217728,134349312,134217728,131080,520,131072,134349312,134218240,0,512,131080,134349320,134218240,134217736,512,0,134348808,134218248,131072,134217728,134349320,8,131592,131584,134217736,134348800,134218248,520,134348800,131592,8,134348808,131584), k = new Array(8396801,8321,8321,128,8396928,8388737,8388609,8193,0,8396800,8396800,8396929,129,0,8388736,8388609,1,8192,8388608,8396801,128,8388608,8193,8320,8388737,1,8320,8388736,8192,8396928,8396929,129,8388736,8388609,8396800,8396929,129,0,0,8396800,8320,8388736,8388737,1,8396801,8321,8321,128,8396929,129,1,8192,8388609,8193,8396928,8388737,8193,8320,8388608,8396801,128,8388608,8192,8396928), x = new Array(256,34078976,34078720,1107296512,524288,256,1073741824,34078720,1074266368,524288,33554688,1074266368,1107296512,1107820544,524544,1073741824,33554432,1074266112,1074266112,0,1073742080,1107820800,1107820800,33554688,1107820544,1073742080,0,1107296256,34078976,33554432,1107296256,524544,524288,1107296512,256,33554432,1073741824,34078720,1107296512,1074266368,33554688,1073741824,1107820544,34078976,1074266368,256,33554432,1107820544,1107820800,524544,1107296256,1107820800,34078720,0,1074266112,1107296256,524544,33554688,1073742080,524288,0,1074266112,34078976,1073742080), T = new Array(536870928,541065216,16384,541081616,541065216,16,541081616,4194304,536887296,4210704,4194304,536870928,4194320,536887296,536870912,16400,0,4194320,536887312,16384,4210688,536887312,16,541065232,541065232,0,4210704,541081600,16400,4210688,541081600,536870912,536887296,16,541065232,4210688,541081616,4194304,16400,536870928,4194304,536887296,536870912,16400,536870928,541081616,4210688,541065216,4210704,541081600,0,541065232,16,16384,541065216,4210704,16384,4194320,536887312,0,541081600,536870912,4194320,536887312), A = new Array(2097152,69206018,67110914,0,2048,67110914,2099202,69208064,69208066,2097152,0,67108866,2,67108864,69206018,2050,67110912,2099202,2097154,67110912,67108866,69206016,69208064,2097154,69206016,2048,2050,69208066,2099200,2,67108864,2099200,67108864,2099200,2097152,67110914,67110914,69206018,69206018,2,2097154,67108864,67110912,2097152,69208064,2050,2099202,69208064,2050,67108866,69208066,69206016,2099200,0,2,69208066,0,2099202,69206016,2048,67108866,67110912,2048,2097154), N = new Array(268439616,4096,262144,268701760,268435456,268439616,64,268435456,262208,268697600,268701760,266240,268701696,266304,4096,64,268697600,268435520,268439552,4160,266240,262208,268697664,268701696,4160,0,0,268697664,268435520,268439552,266304,262144,266304,262144,268701696,4096,64,268697664,4096,266304,268439552,64,268435520,268697600,268697664,268435456,262144,268439616,0,268701760,262208,268435520,268697600,268439552,268439616,0,268701760,266240,266240,4160,4160,262208,268435456,268701696), $ = function(e) { for (var t, i, n, a = new Array(0,4,536870912,536870916,65536,65540,536936448,536936452,512,516,536871424,536871428,66048,66052,536936960,536936964), o = new Array(0,1,1048576,1048577,67108864,67108865,68157440,68157441,256,257,1048832,1048833,67109120,67109121,68157696,68157697), s = new Array(0,8,2048,2056,16777216,16777224,16779264,16779272,0,8,2048,2056,16777216,16777224,16779264,16779272), c = new Array(0,2097152,134217728,136314880,8192,2105344,134225920,136323072,131072,2228224,134348800,136445952,139264,2236416,134356992,136454144), r = new Array(0,262144,16,262160,0,262144,16,262160,4096,266240,4112,266256,4096,266240,4112,266256), l = new Array(0,1024,32,1056,0,1024,32,1056,33554432,33555456,33554464,33555488,33554432,33555456,33554464,33555488), d = new Array(0,268435456,524288,268959744,2,268435458,524290,268959746,0,268435456,524288,268959744,2,268435458,524290,268959746), u = new Array(0,65536,2048,67584,536870912,536936448,536872960,536938496,131072,196608,133120,198656,537001984,537067520,537004032,537069568), h = new Array(0,262144,0,262144,2,262146,2,262146,33554432,33816576,33554432,33816576,33554434,33816578,33554434,33816578), p = new Array(0,268435456,8,268435464,0,268435456,8,268435464,1024,268436480,1032,268436488,1024,268436480,1032,268436488), f = new Array(0,32,0,32,1048576,1048608,1048576,1048608,8192,8224,8192,8224,1056768,1056800,1056768,1056800), m = new Array(0,16777216,512,16777728,2097152,18874368,2097664,18874880,67108864,83886080,67109376,83886592,69206016,85983232,69206528,85983744), v = new Array(0,4096,134217728,134221824,524288,528384,134742016,134746112,16,4112,134217744,134221840,524304,528400,134742032,134746128), g = new Array(0,4,256,260,0,4,256,260,1,5,257,261,1,5,257,261), y = e.length > 8 ? 3 : 1, b = new Array(32 * y), C = new Array(0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0), _ = 0, w = 0, k = 0; k < y; k++) { var x = e.charCodeAt(_++) << 24 | e.charCodeAt(_++) << 16 | e.charCodeAt(_++) << 8 | e.charCodeAt(_++) , T = e.charCodeAt(_++) << 24 | e.charCodeAt(_++) << 16 | e.charCodeAt(_++) << 8 | e.charCodeAt(_++); x ^= (n = 252645135 & (x >>> 4 ^ T)) << 4, x ^= n = 65535 & ((T ^= n) >>> -16 ^ x), x ^= (n = 858993459 & (x >>> 2 ^ (T ^= n << -16))) << 2, x ^= n = 65535 & ((T ^= n) >>> -16 ^ x), x ^= (n = 1431655765 & (x >>> 1 ^ (T ^= n << -16))) << 1, x ^= n = 16711935 & ((T ^= n) >>> 8 ^ x), n = (x ^= (n = 1431655765 & (x >>> 1 ^ (T ^= n << 8))) << 1) << 8 | (T ^= n) >>> 20 & 240, x = T << 24 | T << 8 & 16711680 | T >>> 8 & 65280 | T >>> 24 & 240, T = n; for (var A = 0; A < C.length; A++) C[A] ? (x = x << 2 | x >>> 26, T = T << 2 | T >>> 26) : (x = x << 1 | x >>> 27, T = T << 1 | T >>> 27), T &= -15, t = a[(x &= -15) >>> 28] | o[x >>> 24 & 15] | s[x >>> 20 & 15] | c[x >>> 16 & 15] | r[x >>> 12 & 15] | l[x >>> 8 & 15] | d[x >>> 4 & 15], i = u[T >>> 28] | h[T >>> 24 & 15] | p[T >>> 20 & 15] | f[T >>> 16 & 15] | m[T >>> 12 & 15] | v[T >>> 8 & 15] | g[T >>> 4 & 15], n = 65535 & (i >>> 16 ^ t), b[w++] = t ^ n, b[w++] = i ^ n << 16 } return b }(e), L = 0, S = t.length, z = 0, I = 32 == $.length ? 3 : 9; p = 3 == I ? i ? new Array(0,32,2) : new Array(30,-2,-2) : i ? new Array(0,32,2,62,30,-2,64,96,2) : new Array(94,62,-2,32,64,2,30,-2,-2), 2 == o ? t += " " : 1 == o ? i && (r = 8 - S % 8, t += String.fromCharCode(r, r, r, r, r, r, r, r), 8 === r && (S += 8)) : o || (t += "\0\0\0\0\0\0\0\0"); var B = "" , F = ""; for (1 == n && (f = a.charCodeAt(L++) << 24 | a.charCodeAt(L++) << 16 | a.charCodeAt(L++) << 8 | a.charCodeAt(L++), v = a.charCodeAt(L++) << 24 | a.charCodeAt(L++) << 16 | a.charCodeAt(L++) << 8 | a.charCodeAt(L++), L = 0); L < S; ) { for (u = t.charCodeAt(L++) << 24 | t.charCodeAt(L++) << 16 | t.charCodeAt(L++) << 8 | t.charCodeAt(L++), h = t.charCodeAt(L++) << 24 | t.charCodeAt(L++) << 16 | t.charCodeAt(L++) << 8 | t.charCodeAt(L++), 1 == n && (i ? (u ^= f, h ^= v) : (m = f, g = v, f = u, v = h)), u ^= (r = 252645135 & (u >>> 4 ^ h)) << 4, u ^= (r = 65535 & (u >>> 16 ^ (h ^= r))) << 16, u ^= r = 858993459 & ((h ^= r) >>> 2 ^ u), u ^= r = 16711935 & ((h ^= r << 2) >>> 8 ^ u), u = (u ^= (r = 1431655765 & (u >>> 1 ^ (h ^= r << 8))) << 1) << 1 | u >>> 31, h = (h ^= r) << 1 | h >>> 31, c = 0; c < I; c += 3) { for (y = p[c + 1], b = p[c + 2], s = p[c]; s != y; s += b) l = h ^ $[s], d = (h >>> 4 | h << 28) ^ $[s + 1], r = u, u = h, h = r ^ (_[l >>> 24 & 63] | k[l >>> 16 & 63] | T[l >>> 8 & 63] | N[63 & l] | C[d >>> 24 & 63] | w[d >>> 16 & 63] | x[d >>> 8 & 63] | A[63 & d]); r = u, u = h, h = r } h = h >>> 1 | h << 31, h ^= r = 1431655765 & ((u = u >>> 1 | u << 31) >>> 1 ^ h), h ^= (r = 16711935 & (h >>> 8 ^ (u ^= r << 1))) << 8, h ^= (r = 858993459 & (h >>> 2 ^ (u ^= r))) << 2, h ^= r = 65535 & ((u ^= r) >>> 16 ^ h), h ^= r = 252645135 & ((u ^= r << 16) >>> 4 ^ h), u ^= r << 4, 1 == n && (i ? (f = u, v = h) : (u ^= m, h ^= g)), F += String.fromCharCode(u >>> 24, u >>> 16 & 255, u >>> 8 & 255, 255 & u, h >>> 24, h >>> 16 & 255, h >>> 8 & 255, 255 & h), 512 == (z += 8) && (B += F, F = "", z = 0) } if (B = (B += F).replace(/\0*$/g, ""), !i) { if (1 === o) { var j = 0; (S = B.length) && (j = B.charCodeAt(S - 1)), j <= 8 && (B = B.substring(0, S - j)) } B = decodeURIComponent(escape(B)) } return B } 

py代码  

from bs4 import BeautifulSoup
import  execjs
import requests
# 随机设备
from sjsb import User_Agent

#这个函数是打开一个文件,并以写入模式和utf-8编码方式打开。它将返回一个文件对象,可以使用该对象来向文件中写入内容
fp = open('./企名片.txt','w',encoding='utf-8')

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh,zh-CN;q=0.9,zh-HK;q=0.8',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Origin': 'https://www.qimingpian.com',
    'Pragma': 'no-cache',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'cross-site',
    'User-Agent':User_Agent(),
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

data = {
    'unionid': '',
}

response = requests.post('https://vipapi.qimingpian.cn/HomePage/recommendInfo', headers=headers, data=data).json()
# 加密字符串
encrypt_data = response['encrypt_data']

# 调用JS 文件名路径
with open('./qm.js', 'r', encoding='utf-8') as f:
    jscode = f.read()
ctx = execjs.compile(jscode).call('s',encrypt_data)
# ctx = ctx[0]
# pprint(ctx)
for title in ctx:
    URL_s = title['link_url']
    # print(URL_s)
    if URL_s.startswith("https://media"):
        headers = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'Accept-Language': 'zh,zh-CN;q=0.9,zh-HK;q=0.8',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Pragma': 'no-cache',
            'Sec-Fetch-Dest': 'document',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Site': 'none',
            'Sec-Fetch-User': '?1',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': User_Agent(),
            'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
        }
        request = requests.get(url=URL_s, headers=headers, )
        # 中文乱码ä¼åçæèµç®¡çç³»ç» 解决办法
        request.encoding = 'utf-8'
        # css解析
        soup = BeautifulSoup(request.text, 'lxml')
        #          class="article"定位 直接到article标签 [0].get_text()所有文本内容
        neirong = (soup.select('.article  article')[0].get_text())

        bid = {
            '标头': title['content'],
            '详细URL': title['link_url'],
            '时间': title['open_time'],
            '图片URL': title['link_img'],
            '内容': neirong,
        }
        fp.write(f"{bid}" + '\n')

    elif URL_s.startswith("https://mp"):
        headers = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'Accept-Language': 'zh,zh-CN;q=0.9,zh-HK;q=0.8',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Pragma': 'no-cache',
            'Sec-Fetch-Dest': 'document',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Site': 'none',
            'Sec-Fetch-User': '?1',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': User_Agent(),
            'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
        }
        response = requests.get(url=URL_s, headers=headers, )

        # css解析
        soup = BeautifulSoup(response.text, 'lxml')
        # wei = ((soup.select('.rich_media_wrp  section')[1].get_text()))
        # for title in soup:  //*[@id="js_content"]
        neirong = ((soup.select('.rich_media_wrp')[0].get_text()))

        bid = {
            '标头': title['content'],
            '详细URL': title['link_url'],
            '时间': title['open_time'],
            '图片URL': title['link_img'],
            '内容': neirong,
        }
        fp.write(f"{bid}" + '\n')
    else:
        print("url不对,跳过")

py随机设备  文件名  sjsb.py

#随机设备的使用模块
import random

# 定义随机选择User-Agent的字典
browsers = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',

    'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.2228.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36',
    'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.1234.567.890 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_3_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.123 Safari/537.36',
    'Mozilla/5.0 (Windows NT 7.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.123.45',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1 Safari/605.1.15',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/90.1234.567.890 Chrome/91.0.4472.164 Safari/537.36',
    'Mozilla/5.0 (Linux; Android 12; Pixel 5 Build/RQ3A.211001.001) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.105 Mobile Safari/537.36',
    'Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/92.123.456.78 Mobile/15E148 Safari/604.1',
    'Mozilla/5.0 (iPad; CPU OS 14_4_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Coast/604.3.6 Mobile/15E148 Safari/7654.3.2',
    'Mozilla/5.0 (Linux; Android 11; SM-T510 Build/RP1A.200720.012) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/14.2 Chrome/87.0.4280.141 Safari/537.36',
    'Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.123.456',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:34.0) Gecko/20100101 Firefox/34.123.456 SeaMonkey/2.31',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
    'Mozilla/5.0 (Linux; U; Android 9; en-gb; Nokia 6.1 Plus Build/PKQ1.180716.001) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/66.0.3359.126 Mobile Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36 Edge/88.0.705.81',
    'Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 930) like Gecko',
    'Mozilla/5.0 (Linux; Android 8.0.0; SM-G950F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.2 Chrome/68.0.3440.106 Mobile Safari/537.36',
    'Mozilla/5.0 (Windows NT 5.1; Trident/7.0; rv:11.0) like Gecko',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.123.456.78'
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'

]

def User_Agent():
    """
    随机选择并返回一个浏览器User-Agent字符串。
    """
    browser = random.choice(browsers)
    return browser

  • 6
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值