猫眼价格信息使用了woff字体,每次请求都会返回解析当前页面字体使用的woff文件,并且每次响应中woff文件都不同。
web平台返回woff文件地址:
@font-face {
font-family: stonefont;
src: url('//vfile.meituan.net/colorstone/b63689f6e4c98b8a05a78c87e6f251f23168.eot');
src: url('//vfile.meituan.net/colorstone/b63689f6e4c98b8a05a78c87e6f251f23168.eot?#iefix') format('embedded-opentype'),
url('//vfile.meituan.net/colorstone/5946834faed5b9cbe282b72a68b5f04e2080.woff') format('woff'); #woff文件地址
}
.stonefont {
font-family: stonefont;
}
h5平台直接返回woff文件内容,不过需要注意的是h5中woff内容是通过ajax加载的,在解析价格前一定要等待font-face信息加载完成:
@font-face {
font-family: stonefont;
src: url(data:application/font-woff;charset=utf-8;base64,d09GRgABAAAAAAggAAsAAAAAC7gAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAABHU1VCAAABCAAAADMAAABCsP6z7U9TLzIAAAE8AAAARAAAAFZW7lAMY21hcAAAAYAAAAC2AAACTCTQbBFnbHlmAAACOAAAA5cAAAQ0l9+jTWhlYWQAAAXQAAAALwAAADYT2aJCaGhlYQAABgAAAAAcAAAAJAeKAzlobXR4AAAGHAAAABIAAAAwGhwAAGxvY2EAAAYwAAAAGgAAABoG3AX8bWF4cAAABkwAAAAfAAAAIAEZADxuYW1lAAAGbAAAAVcAAAKFkAhoC3Bvc3QAAAfEAAAAXAAAAI++R+6ueJxjYGRgYOBikGPQYWB0cfMJYeBgYGGAAJAMY05meiJQDMoDyrGAaQ4gZoOIAgCKIwNPAHicY2Bk0mWcwMDKwMHUyXSGgYGhH0IzvmYwYuRgYGBiYGVmwAoC0lxTGBwYKj5MZdb5r8MQw6zDcAUozAiSAwDh1wuXeJzFkjsOgzAQRGfDJx9SpOQEVOk5Fx0lVU4QLhIJKeI8hsKWJW5AxixNJGiTtZ6lnbU81q4BJAAicicxIG8IQryoyqJHuCx6jAfzHDcqJ9RGTDHYsbO9y13jS19N7TzzxH5lK4Q3bq1Qyeh5pvuBfimO9OVrJd256Qch/7P+juuyP9csI/UKn2hWgm4KJcx2sAp7irFTwmxtr7DPcLnCjsM1CnsPXyqcAnylhH8ztQqSD26qQTwAAHicPZPfcxJXFMfvXRw2EiREdlkhCiyQvQskYbM/A2wAIaD5SUmAEKIYMoqYVk0zxkbNtEp/zGinf4C+ONOHvjh98N3OdOpTq6N56B/Qmb72rc74kiG9C6T7cGfPmTn3fM/ney6AABz9A0RAAQIATaIpD8UD/Bn5f+ER8R5EcN4DJVGVERfwkxojqoqM/8wkUjVVEhkPpMykB9KUOeDnUHvogposo5DuDlpssbWUKs1YqvZYvBQXJxVxMnXhcevK/snf5zOVfcRbFmFiWkglM0O16KT7THVj3jl0KX/56+2aocGENXwgANZAAS8YB8DBKbIqOmmq186PjFijnJKoqTLnN5soJ4MDtRe9/Gz71c5WJtf+83w6L2RkIcBmm+fP+kf9IZ9Eh0pfFuE3/NYnN24vtHjnlczl/aTeyNd/klM+bz2b7jxGOcpBU+jhchFrIQwtsIO1+MAYxsN1YZhJG6STGA7WZIN4fEXuInEykOoyUwwh8JmVDsphX5ixnvKtS6v78auZW08Wsp+XVcXaeYpynFos3C0RTpkZZbyxcyvq5ES7mb0z/fzVQX1ZmCh13oyVI7XF2dWK4QvufkS8Bg7MQ2FpmwmrCCShpmr4iMKDQHZGcrgG1uGw3ZvwpFniVjkXbNx7kK59Gm7qe7djl7i+vx+IE8RvmOyxvz1THSzNkv2ZSGwp4tB3llk1Xa1kI1lqJQevdv5GvplA/VEs98XmdHLgdS6z+bTCeS1wu/Srk3l0fePiqjpVO2Z22PcPOGTNuBbDwTcb5Iy7sXuiakccZhl2uVtLO4mzdrvVNnKtcF3P14r3V8L8g+A4bLTnlkrr4bR+M9VESytz1Tcv7+zCjURcyvT7dPckAKL9aTCLJJyGMjIbMxh7IonG0vS6ItgdlqYY3P37QV0IJ5DNTEJXdExbu//V5syunrhbKMuqBbaWpxKVUPhe4WddGU0qbnVk4IQ57HY/3Lr57fwP7Sc/lieiZZhYWKsv5UOR1f7bwccu1mMFgA0M2yCpdN2R4G7V1+Jnp0b4wRgheHR72S+6BOb/mo+4JgjAKM1iViaj5NiVJBT71EjsOPzYQYOWET7GxQt0aF5PLcDayb13e2yEygq8yJweKJW8Hlc0qviEuXNT12bn8pbmjZ3y+KLIpHh2/AxzqrfTuOch8RZYAI5ZhVWgNCzRARoNm2C28wfMX2w0qn+9KMKDjlB8cYhzvwDwH1tb4MIAeJxjYGRgYADivfV3H8fz23xl4GZhAIEbsfquCPr/GxYGpvNALgcDE0gUAEJjCskAeJxjYGRgYNb5r8MQw8IAAkCSkQEV8AAAM2IBzXicY2EAghQGBiYd4jAAN4wCNQAAAAAAAAAMAE4AlADYAPwBLgFiAaoBxAH+AhoAAHicY2BkYGDgYTBgYGYAASYg5gJCBob/YD4DAA6DAVYAeJxlkbtuwkAURMc88gApQomUJoq0TdIQzEOpUDokKCNR0BuzBiO/tF6QSJcPyHflE9Klyyekz2CuG8cr7547M3d9JQO4xjccnJ57vid2cMHqxDWc40G4Tv1JuEF+Fm6ijRfhM+oz4Ra6eBVu4wZvvMFpXLIa40PYQQefwjVc4Uu4Tv1HuEH+FW7i1mkKn6Hj3Am3sHC6wm08Ou8tpSZGe1av1PKggjSxPd8zJtSGTuinyVGa6/Uu8kxZludCmzxMEzV0B6U004k25W35fj2yNlCBSWM1paujKFWZSbfat+7G2mzc7weiu34aczzFNYGBhgfLfcV6iQP3ACkSaj349AxXSN9IT0j16JepOb01doiKbNWt1ovippz6sVYYwsXgX2rGVFIkq7Pl2PNrI6qW6eOshj0xaSq9mpNEZIWs8LZUfOouNkVXxp/d5woqebeYIf4D2J1ywQB4nG2LOQ6AMBAD1+EIBPhLArkoiQh/oaFD4vmIbIub0cg2CeIo+s8IgQo1GrSQ6NBDYcCIifDI+zrzlrbCPZhCO/uPh15d8RQ190tgN7HQZf75xDurDdELIFYXkw==) format("woff");
}
.stonefont {
font-family: 'stonefont';
}
先下载woff文件并解析,可使用百度Font查看woff文件,由于每次请求woff文件都会改变,因此我们需要建立woff字体编码与数字图像的映射关系。
首先下载一个woff文件保存成base.woff, 用百度Font查看映射关系,作为基础映射信息。
#用fonttolls解析woff文件
base_font = ttLib.TTFont(os.path.expanduser('~') + '/base.woff')
base_unicode_list = base_font.getGlyphOrder()
#通过百度font查看base.woff中图像编码与数字映射关系
base_unicode2num = {'x': '.', 'uniE541': '0', 'uniE820': '1', 'uniEBF1': '2', 'uniF5BE': '3', 'uniF0AE': '4',
'uniE406': '5', 'uniF74D': '6',
'uniF5F7': '7', 'uniE21D': '8', 'uniF4D9': '9'}
通过与base.woff中图像进行对比,为每次请求返回的woff建立图像编码与数字映射关系。
class MTFontParser:
mt_unicode2num = dict()
def __init__(self, woff):
#下载woff文件
self.woff_file = self.get_woff_file(woff)
mt_font = ttLib.TTFont(self.woff_file)
#获取woff文件中图像编码
mt_unicode_list = mt_font.getGlyphOrder()
for mt_unicode in mt_unicode_list:
mt_glyph = mt_font['glyf'][mt_unicode]
for base_unicode in base_unicode_list:
#将woff文件中图像与base中图像进行对比,查找图像编码对应的数字
base_glyph = base_font['glyf'][base_unicode]
if base_glyph == mt_glyph:
self.mt_unicode2num[mt_unicode] = base_unicode2num.get(base_unicode)
break
def __del__(self):
if os.path.exists(self.woff_file):
os.remove(self.woff_file)
def get_woff_file(self, woff):
if woff.startswith('http'):
r = requests.get(woff)
code = r.content
else:
code = base64.b64decode(woff)
woff_file = 'mt_font_{}.woff'.format(random.randint(0, sys.maxsize))
with open(woff_file, 'wb') as f:
f.write(code)
return woff_file
页面中价格信息由多个字符组成,通过parse_price解密:
def parse_price(self, price_code):
price_str = ''
for c in price_code:
if c != '.':
i = hex(ord(c))
key = 'uni' + str(i).lstrip('0x').upper()
price_str += self.mt_unicode2num.get(key) #从图像编码与数字的映射关系中查找
else:
price_str += '.'
return price_str
这样动态变化的woff字体价格信息也就可以动态解密了。
MTFontParser源码:
https://github.com/improvejin/hyspider/tree/master/hyspider/utils/font_util.py