python-字体反爬(一)

目标网站:http://glidedsky.com/level/web/crawler-font-puzzle-1

网上关于这网页的详细解析挺多的,就不一一说明了。

1.ttf文件是被加密,需要解密再下载到本地。

2.观察得到编码是英文的one,two.....,需要转变为数字0,1.....,然后取数字列表的下标。

 

直接上代码。

import base64
from fontTools.ttLib import TTFont
import requests
import re

def spider(i):
    url = f"http://glidedsky.com/level/web/crawler-font-puzzle-1?page={i}"
    headers = {
        'Cookie': '_ga=GA1.2.1509539893.1587014930; Hm_lvt_020fbaad6104bcddd1db12d6b78812f6=1587014930,1587108479; _gid=GA1.2.1660844838.1587200384; footprints=eyJpdiI6InhiY0dWRVQ0dFM2XC9QYjZsTDZNOG5RPT0iLCJ2YWx1ZSI6IlBLNjUyZmFwU2pnaFBXekJySllONkx4alhrV2xJSDNiajV3N29oR0xwdXlmeDlzK1R4S1wvdHZ5UGliRGk4RjZwIiwibWFjIjoiZmFmMmQ3NTU1MzhmYmJkODkyZmM2NTRlNjRkMmViM2VhZGMxYWUzYjYwNjJlZWQzYjViMTA3MjVlNDYxYjJiMiJ9; remember_web_59ba36addc2b2f9401580f014c7f58ea4e30989d=eyJpdiI6IlI1TUZsQ0NSVkpxYWZTeWhZSXVYeUE9PSIsInZhbHVlIjoiMWs5S0p3NUhic0UxdjU3OHdXVkR5NVwvMXFDZ3NUTmdrQ0E5TzE5N2MzamR0a1pLVzBJVXU0cmVUVHRLbFJFOFwvV1NuYnpEQVBQVnVLS2ZtOHo5RlwvdmlFWmd2UXF5TFE0MVMzV0FkckVRbmUxVlNDek1NK3cwRjkyWjRUejlPRlk3S0VoNHJycG5yRWNjRTBiTHRQZjk3RUFkZkxvSWFSZTFLQjBpVmRjU2RZPSIsIm1hYyI6ImNlZDUxYWJhNmVhODY1MzllYWNiYjQyOWUxOWE1OTk2NGY3NjdkMjFkOTc4NjBlZWZlNmFmMDAwZDg2MTJmNTgifQ%3D%3D; XSRF-TOKEN=eyJpdiI6InpuMFFUZmY1RHhrYlwva24rTUdYQmR3PT0iLCJ2YWx1ZSI6InpaSDRWUTk1MFVQeG1Mc1FydllzRkVaQ0tURTBaWktIcWVDcmZMWGZXRUt1NVBtRlFrUmhvS2w1Y1NPSlRwUUQiLCJtYWMiOiI4NzNkMjA0MDBlMGUzNTljZWViYWQ4MWIxOTc1ZTc0MTVhZjFjMzdiNjhkZjA1NGQ5NmRjN2NmYWI3YzVmMjYyIn0%3D; glidedsky_session=eyJpdiI6ImVESlVHR3ZkMktkZ0NncTcyVzZraUE9PSIsInZhbHVlIjoiRXZqV0lcL3pBdjAwOGJnYkt3d3hUbXJBcnVZMFZqTWh2S0ExQ1NzZ1RjREtsd3cxd0cwSTgzZG95VGw1MzRtMjgiLCJtYWMiOiIyYzE1MTAxOWE3MjMxMDdkNDdlNzg0NzEyNWQyNDY3ZWI0MzdhMjYyN2RlZTNjODcyZTFkNjRjYjM0ZmVjMWFhIn0%3D; _gat_gtag_UA_75859356_3=1; Hm_lpvt_020fbaad6104bcddd1db12d6b78812f6=1587291491'

    }
    resp = requests.get(url,headers=headers).text
    # 得到加密的字符串
    encrypted = re.search(r'\(data:font;charset=utf-8;base64,(.*?)\) format',resp,re.S).group(1)
    # 进行解码
    data = base64.b64decode(encrypted)
    with open('gs-1.ttf','wb') as f:
        f.write(data)
    # 解析字体库
    fonts = TTFont('gs-1.ttf')
    # fonts.saveXML('gs-1.xml')
    # 得到编码
    enNum_list = fonts.getGlyphOrder()[1:]
    # print(enNum_list)     # ['five', 'three', 'nine', 'zero', 'eight', 'seven', 'one', 'four', 'six', 'two']
    base_dict = { 'zero':0, 'one':1, 'two':2, 'three':3, 'four':4, 'five':5,'six':6, 'seven':7, 'eight':8, 'nine':9}
    num = [base_dict[i] for i in enNum_list]
    # print(num)            # [5, 3, 9, 0, 8, 7, 1, 4, 6, 2]
    online_dict = {str(j):str(i) for i,j in enumerate(num)}
    # print(online_dict)    # {'5': '0', '3': '1', '9': '2', '0': '3', '8': '4', '7': '5', '1': '6', '4': '7', '6': '8', '2': '9'}

    base_num = re.findall(r'<div class="col-md-1">.*?(\d+).*?</div>',resp,re.S)
    sum1 = 0
    for num in base_num:
        rel_num = ''
        for i in num:
            rel_num += online_dict[i]
        sum1 += int(rel_num)
    return sum1

if __name__ == '__main__':
    sums = 0
    for i in range(1,1001):
        sum1 = spider(i)
        sums += sum1
        print(i,sums)

但是速度真的不敢恭维。

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值