实习僧网站信息获取之字体解密
'''
@Author:风夏
@Gender:man
@Hobby:coding
@Time:2020-5-8 15:59
思路:
1.先爬取想要获取的信息页数,这里演示了2页
2.读取源代码,解析字体文件,得出code与字体的对应关系
3.将字体替换到之前爬取到的页面文件中
4.再从本地文件中对要获取的信息进行解析
5.本次获取7页数据并将获取的图片保存
'''
import re
import requests
from fontTools.ttLib import TTFont
import time
from lxml import etree
from urllib.request import urlretrieve
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
'Cookie': '__jsluid_s=5779fb0e1a075da3949b1e2bec8906d6; gr_user_id=9d9fe708-6d4d-47db-a208-bcaaf20bd294; MEIQIA_TRACK_ID=1lQGkxmu7QV60w6sUbREPF1XFUQ; SXS_XSESSION_ID=2|1:0|10:1607700819|15:SXS_XSESSION_ID|48:N2RjYjkyZTUtMzRmNS00YjIzLWJkMjItZDU0ZDZkZTYwOTZj|43d765c144beef7ef26c7bc53e173ecd9d3bdcd5ca4ad4109196756fd5ec26db; SXS_XSESSION_ID_EXP=2|1:0|10:1607700819|19:SXS_XSESSION_ID_EXP|16:MTYxMDI5MjgxOQ==|d6f6a9c89261743190141d48a4445473bec9550122079adf7774b0fc6bec08a5; affefdgx=usr_ynz64mjjtrzz; sxs_usr=2|1:0|10:1607700819|7:sxs_usr|24:dXNyX3luejY0bWpqdHJ6eg==|642eb1c9de29541a77bb7c1c8ed61ae8ed4d211770bf6ef13c41c2c191aa0fe2; xyz_usr=2|1:0|10:1607700819|7:xyz_usr|40:bzhSbncwQzUyS3V4T3FueFdTMUc1bVlTRF84bw==|f4a4e0fa6bc5ef5bf3cbc60393297f32c94e02e653ca7f156d3578b1a613fa97; utm_source_first=pc_baidusem_pinpai; uuid=2eb07cef-1183-2617-a6b7-4c596e11d9db; Hm_lvt_03465902f492a43ee3eb3543d81eba55=1607522041,1607608843,1607761339; uid1=null; uid2=1fccd5c8-d971-5683-a5d6-50d2d8a75e3a; search=; Hm_lpvt_03465902f492a43ee3eb3543d81eba55=1607761346; SXS_VISIT_XSESSION_ID_V3.0="2|1:0|10:1607761346|26:SXS_VISIT_XSESSION_ID_V3.0|48:MDg1NmYyYTktMjBiMi00ZDlmLWI1NDgtNzhkMGRlZWJmZDYw|3abbc24551777ca4344a731c26c88efa97253f099a1a22ccf290061f7b56ba49"; SXS_VISIT_XSESSION_I