import base64
import re
from io import BytesIO
import requests
from fontTools.ttLib import TTFont
headers={
'User-Agent' :'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0'
}
response=requests.get('http://piaofang.maoyan.com/?ver=normal',headers=headers)
font_str=re.findall(r'base64,(.*?)\)',response.text)[0]
def make_font_bin(font_string):
font_bin=base64.decodebytes(font_string.encode())
# with open('maoyan1.bin','wb') as f:
# f.write(font_bin)
return font_bin
base_str='d09GRgABAAAAAAgcAAsAAAAAC7gAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAABHU1VCAAABCAAAADMAAABCsP6z7U9TLzIAAAE8AAAARAAAAFZW7lUqY21hcAAAAYAAAAC5AAACTEUIzOlnbHlmAAACPAAAA5MAAAQ0l9+jTWhlYWQAAAXQAAAALwAAADYUfVpyaGhlYQAABgAAAAAcAAAAJAeKAzlobXR4AAAGHAAAABIAAAAwGhwAAGxvY2EAAAYwAAAAGgAAABoGRgVGbWF4cAAABkwAAAAfAAAAIAEZADxuYW1lAAAGbAAAAVcAAAKFkAhoC3Bvc3QAAAfEAAAAWAAAAI/ZSQTPeJxjYGRgYOBikGPQYWB0cfMJYeBgYGGAAJAMY05meiJQDMoDyrGAaQ4gZoOIAgCKIwNPAHicY2Bk0mWcwMDKwMHUyXSGgYGhH0IzvmYwYuRgYGBiYGVmwAoC0lxTGBwYKr5uZtb5r8MQw6zDcAUozAiSAwDkdQu6eJzFkr0Ng0AMhd+FvySkSJkhMgpiDVZggkyQhp4JQpMtWIEGUSAjUVCTZ0wTCdrEp+8kv7Nsyz4AAQCP3IkPuDcc1F5U3aJ7OC+6jwf9G65UjsibsXV9IqlkUks3lFMxVfPMiP2XLXPMuHX0JWDNGAecELIHrRpRDncy/cDc/0p/22W5n6sXk3yFLTajobttnaG77RND4yU1OF1IZnDOkNrgxCGdofsfSkP/ylQY3AemykD0AVoWRsgAAAB4nD1TTW/aZhx/HlNB6lBCho0LaQFjYhtIguM3AjiG4kCbV0YChJCWhqilNFvbLGq6tI22lm6T2mkfoLtU6mGXaofeO2laT1unNYd9gEm77rZJvURkj50XHyw9j+X/7/UPIAAH/wAREAADICmRRIDgAXrQ6eA/DGDvAQMS6EsASqIOkzqchDJnd9iZMKvIqiQGIEm4IBPmWA7KLBO2kwTlFdVv+zUhluFcdgf0JUaSKw++XJ/a1jL3SlVZxWFncSJTi8bul37UlGFd8atDfafsMb//0catr2e/6z57WR1LVGFmbqW5UIzGl00+0Hp9QHwiAAyTtJzUbYhNUrVAHZwORS9J2B0u6HDZHPBDj+vHh/gUmy6R0VktOwcbp3f+2KHjhCHwIvVRX6USDPgSCSUkzJyfuD49U8TbN7eqo/MileXp0bPUmRPMbYTpBIBmBtFsRTVBJbhdD3X46Ykhvj+FCQHNXQ2LPoE69A39s4/9DnCAZtAKrUBpUCIZkhu0QaP3GyxearXqf70qw72eUH61j+5+OvYb9hBWCIygCSyHdDlMPaSOrPceuow8TyLTKS8FCTMRVZHZsB1+7yQjciwUo5xnQqvS8m76Wv72sznjs6qqOHvPuQKrlkv3KphXpoapYOr8kjo+1m0bdydfvNlrLgpjld67kWq8MT+9XDvisY94ECAIgAf5DC3AI3fNpJMEStjNsYhfzOfvLGxlzrndTtfQ9dINrdgoP1iK8Q8jo7DVnVmorMZy2q1sm1tYmqm/e313G65l0lLe8hb16xT2i4lites4SQ9N0o4jzWbPULO+wafVXL1mxA1iqQCv9f7mQlNM83Gq8Pn6pN73tpBff15jgzjcrPzspR7fWLu0rE40jvM7wN4CD1Ki0CRqht3BmK0xU0zAPcaYkjy+vlU46A5mAjkau10tRFr3H+Yan8Ta2s6d1GX2sAcH/6I570H8hKsVT5KyErBoq1YwAYhcMnfCXBCuO3BR1atcVPNHcFdqJatKU3jdnUpX0uK4Io5nLz7tXN09/etsvrbL8fg8zEwKWT0/0EiM+8/W12a9A5eLV55sNkwZtpN9NHMZRXrM/bM8suCQTeiMkpFEcyfCdhuBMkNMD0+vP918s7WRL3T/vJArCnlZYGijfeFceDgcDUlktPJFGX7Fb3x8885ch/dezV/Z1bVWsfmDnA0Fm0au95QrEB6S4B4tlgH4Hx0s4MIAeJxjYGRgYABit49lB+P5bb4ycLMwgMCN9dyxCPr/GxYGpvNALgcDE0gUADc1CscAeJxjYGRgYNb5r8MQw8IAAkCSkQEV8AAAM2IBzXicY2EAghQGBiYd4jAAN4wCNQAAAAAAAAAMAFQAjgCoAMQBCAE8AW4BkgHUAhoAAHicY2BkYGDgYTBgYGYAASYg5gJCBob/YD4DAA6DAVYAeJxlkbtuwkAURMc88gApQomUJoq0TdIQzEOpUDokKCNR0BuzBiO/tF6QSJcPyHflE9Klyyekz2CuG8cr7547M3d9JQO4xjccnJ57vid2cMHqxDWc40G4Tv1JuEF+Fm6ijRfhM+oz4Ra6eBVu4wZvvMFpXLIa40PYQQefwjVc4Uu4Tv1HuEH+FW7i1mkKn6Hj3Am3sHC6wm08Ou8tpSZGe1av1PKggjSxPd8zJtSGTuinyVGa6/Uu8kxZludCmzxMEzV0B6U004k25W35fj2yNlCBSWM1paujKFWZSbfat+7G2mzc7weiu34aczzFNYGBhgfLfcV6iQP3ACkSaj349AxXSN9IT0j16JepOb01doiKbNWt1ovippz6sVYYwsXgX2rGVFIkq7Pl2PNrI6qW6eOshj0xaSq9mpNEZIWs8LZUfOouNkVXxp/d5woqebeYIf4D2J1ywQB4nG3IOQ6AIBRF0f9wQBH3ghE0lDLtxcbOxOUb+a23OcklQZyi/zQEGrTo0ENiwAiFCRoz4ZH3deZkczUfturMUjWFTbF8FhdW1nv+285mNthI9AIoHxfm'
base_num=['9','2','1','4','6','0','5','7','3','8']
base_code=['uniED4E','uniEEA4','uniE501','uniE0F1','uniEDCF'
,'uniF5B3','uniF599','uniED67','uniEDE7','uniEB4C']
base_font=TTFont(BytesIO(make_font_bin(base_str)))
match_font=TTFont(BytesIO(make_font_bin(font_str)))
# print(base_font.getGlyphOrder())
# print(base_font.getBestCmap())
# print(match_font.getGlyphOrder())
# print(match_font.getBestCmap())
def match_font_base(font_s):
match_font=TTFont(BytesIO(make_font_bin(font_s)))
match_names=match_font.getGlyphOrder()
font_result={}
for match_name in match_names:
for base_f in base_code:
if base_font['glyf'][base_f] == match_font['glyf'][match_name]:
num_index=base_code.index(base_f)
font_result[match_name]=base_num[num_index]
continue
return font_result
def decode_text(test_str,decode_map:{}):
font_cmp=match_font.getBestCmap()
results = []
for i_str in test_str:
decode_str = i_str.encode("unicode-escape").decode()
if not decode_str.startswith('\\u'):
results.append(i_str)
continue
#将一个乱码字符转换为整形,例如:61365
encode_int = int(decode_str[2:],16)
#从编码表查询改字符对应的字体名
font_name = font_cmp[encode_int]
#通过字体名查询解码表decode_map,获取真正的对应数字
results.append(decode_map[font_name])
return results
d_map=match_font_base(font_str)
import lxml.html
parse_result=lxml.html.fromstring(response.text)
day_time=parse_result.cssselect('#dayStr')
print(day_time[0].text)
ul_elements=parse_result.cssselect('ul.canTouch')
for ul_element in ul_elements:
'''电影名称'''
file_name=ul_element.cssselect('li.c1 b')[0].text
'''票房信息'''
ticket_num=ul_element.cssselect('li.c2 i.cs')
split_item = ticket_num[0].text.split(".")
a=''.join(decode_text(split_item[0], d_map))
b=''.join(decode_text(split_item[1], d_map))
c=a+'.'+b
'''票房占比'''
ticket_percent=ul_element.cssselect('li.c3 i.cs')
split_item1 = ticket_percent[0].text.split(".")
a1 = ''.join(decode_text(split_item1[0], d_map))
b1 = ''.join(decode_text(split_item1[1], d_map))
c1 = a1 + '.' + b1
'''排片占比'''
movie_percent=ul_element.cssselect('li.c4 i.cs')
split_item2 = movie_percent[0].text.split(".")
a2 = ''.join(decode_text(split_item2[0], d_map))
b2 = ''.join(decode_text(split_item2[1], d_map))
c2 = a2 + '.' + b2
'''上座率'''
seat_percent=ul_element.cssselect('li.c5 i.cs')
split_item3 = seat_percent[0].text.split(".")
a3 = ''.join(decode_text(split_item3[0], d_map))
b3 = ''.join(decode_text(split_item3[1], d_map))
c3 = a3 + '.' + b3
print('电影名称:',file_name)
print('票房为%s万元'% c )
print('票房占比',c1)
print('排片占比',c2)
print('上座率',c3)
print('*'*50)
# ticket_nums=parse_result.cssselect('li.c2 i.cs')
#
#
#
# for ticket_num in ticket_nums:
# print("item is :::", ticket_num.text)
# split_item = ticket_num.text.split(".")
# a=''.join(decode_text(split_item[0], d_map))
# b=''.join(decode_text(split_item[1], d_map))
# c=a+'.'+b
# print('票房为%s万元'% c )
爬虫之字体解密(猫眼同58同城租房字形不变)
最新推荐文章于 2022-05-06 11:19:17 发布