##声明:
破解思路由同事提供一些思路完成破解,原文章是他整理的我拷贝过来的,进一步完善封装的的代码我已附上,如有引用或者转发请附上地址或经由我2人其一人同意即可,谢谢~
打开一个页面, 发现字体文件地址是动态的, 这个倒是好说, 写个正则, 就可以动态匹配出来
先下载下来一个新页面的字体文件, 做一下对比, 如图
头脑风暴ing.gif
(与伙伴对话ing...)
不着急, 还是要冷静下来, 再想想哪里还有突破点
同一个页面的字体文件地址是动态的, 但是, 里面的字体编码和顺序是不会变的呀
可以使用某一个页面的字体文件做一个标准的字体映射表呀!
好像发现了新世界的大门, 可门还没开开, 就被自己堵死了, 就想 做出来映射表然后呢!(又要奔腾了)
想呀想呀想呀想, 最后叫上小伙伴一起想
突然就想到了, 虽然那么多不一样, 但是, 但是, 相同文字的坐标点相同呀 ! 突然又打开了大门
首先排除特别的文字的情况下, 只是在这个字体文件的情况下, 60%的字坐标点一样
那剩下的怎么办呢! 先不管了, 先把这60%给弄出来
def extract_ttf_file(self, file_name, get_word_map=
True): _font =
TTFont(file_name) uni_list = _font.getGlyphOrder()[1
:]
#
被替换的字体的列表 word_list =
[
"
坏
",
"
少
",
"
远
",
"
大
",
"
九
",
"
左
",
"
近
",
"
呢
",
"
十
",
"
高
",
"
着
"
,
"
矮
",
"
八
",
"
二
",
"
右
",
"
是
",
"
得
",
"
的
",
"
小
",
"
短
",
"
很
",
"
一
",
"
了
"
,
"
地
",
"
好
",
"
多
",
"
七
",
"
不
",
"
长
",
"
低
",
"
三
",
"
五
",
"
六
",
"
下
",
"
更
"
,
"
和
",
"
四
",
"
上
"
] utf_word_map =
{} utf_coordinates_map =
{}
for index, uni_code
in
enumerate(uni_list): utf_word_map[uni_code] =
word_list[index] utf_coordinates_map[uni_code] = list(_font[
'
glyf
'
][uni_code].coordinates)
if
get_word_map:
return
utf_word_map, utf_coordinates_map
return
utf_coordinates_map
#
self.local_utf_word_map, self.local_utf_coordinates_map = self.extract_ttf_file(self.local_ttf_name)
# According one font file to get font code and font, then use them to make a dict map named font_rule like: {font_code: font} (all the new font code will All data will be based on this table to take the corresponding information))
font_rule = {
"edd2":"坏",...
"eca5":"四",
"ede5":"上"}
下载要破解的字体文件, 并替换标准编码字体映射表
会得到22个字体的映射表, 共38个:
接下来, 就用坐标点来解决, 以下为思路
使用两点坐标差来判断, 但是这个偏差值拿不准
相同文字, 坐标点数量必须一致, 即所有坐标点“(y-x)的平方差的绝对值”的和最小的就为同一个字。
公式:(x1-x2)**2 + (y1-y2)**2
来先试试
然后在重组标准编码, 标准坐标, 新的编码, 和新坐标
(这是想, 找出最相近的坐标, 使用新坐标提取出标准编码, 然后用标准编码提取对应的文字, 在替换成使用本页用的编码映射表 )
提取所有坐标点加起来最小的元素
替换, 生成新的标准映射表
在以上替换60%的字体映射表再加入一个判断, 改成如下
输出一个标准的坐标值, 这里我就不上图进行对比了, 经过对比, 发现没什么问题
#
-*- coding: utf-8 -*-
#
@Author: Mehaei
#
@Date: 2020-01-10 14:51:53
#
@Last Modified by: Mehaei
#
@Last Modified time: 2020-01-13 10:10:13
import
re
import
os
import
requests
from lxml
import
etree
from fontTools.ttLib
import
TTFont
class
NotFoundFontFileUrl(Exception):
pass
class
CarHomeFont(object):
def
__init__(self, url, *args, **
kwargs): self.local_ttf_name =
"
norm_font.ttf
"
self.download_ttf_name =
'
new_font.ttf
'
self.new_unicode_map =
{} self._making_local_code_map() self._download_ttf_file(url, self.download_ttf_name)
def
_download_ttf_file(self, url, file_name): self.page_html = self.download(url)
or
""
#
获取字体的连接文件 font_file_name = (re.findall(r
"
,url\('(//.*\.ttf)?'\) format
", self.page_html)
or [
""
])[0]
if
not
font_file_name:
raise NotFoundFontFileUrl(
"
not found font file name
"
)
#
下载字体文件 file_content = self.download(
"
https:%s
" % font_file_name, content=
True)
#
讲字体文件保存到本地 with open(file_name,
'
wb
'
) as f: f.write(file_content)
print(
"
font file download success
"
)
def
_making_local_code_map(self):
if
not
os.path.exists(self.local_ttf_name):
#
这个url为标准字体文件地址, 如要更改, 请手动更改字体列表 url =
"
https://club.autohome.com.cn/bbs/thread/62c48ae0f0ae73ef/75904283-1.html
"
self._download_ttf_file(url, self.local_ttf_name) self.local_utf_word_map, self.local_utf_coordinates_map =
self.extract_ttf_file(self.local_ttf_name)
print(
"
local ttf load done
"
)
def
get_distence(self, norm_coordinate, new_coordinate): distance_total =
0
for index, coordinate_point
in
enumerate(norm_coordinate): distance_total += abs(new_coordinate[index][0] - coordinate_point[0]) + abs(new_coordinate[index][1] - coordinate_point[1
])
return
distance_total
def
handle_subtraction(self, coordinate_equal_list): coordinate_min_list =
[]
for coordinate_equal
in
coordinate_equal_list: n = self.get_distence(coordinate_equal.get(
'
norm_coordinate
'), coordinate_equal.get(
'
new_coordinate
'
)) coordinate_min_list.append(n)
return
coordinate_equal_list[coordinate_min_list.index(min(coordinate_min_list))]
def
replace_ttf_map(self): unicode_mlist_map =
[] new_utf_coordinates_map = self.extract_ttf_file(self.download_ttf_name, get_word_map=
False)
for local_unicode, local_coordinate
in
self.local_utf_coordinates_map.items(): coordinate_equal_list =
[]
for new_unicode, new_coordinate
in
new_utf_coordinates_map.items():
if len(new_coordinate) ==
len(local_coordinate): coordinate_equal_list.append({
"
norm_key
": local_unicode,
"
norm_coordinate
": local_coordinate,
"
new_key
": new_unicode,
"
new_coordinate
"
: new_coordinate})
if len(coordinate_equal_list) == 1
: unicode_mlist_map.append(coordinate_equal_list[0])
elif len(coordinate_equal_list) > 1
: min_word =
self.handle_subtraction(coordinate_equal_list) unicode_mlist_map.append(min_word)
for unicode_dict
in
unicode_mlist_map: self.new_unicode_map[unicode_dict[
"
new_key
"]] = self.local_utf_word_map[unicode_dict[
"
norm_key
"
]]
print(
"
new unicode map extract success\n
"
, self.new_unicode_map)
def extract_ttf_file(self, file_name, get_word_map=
True): _font =
TTFont(file_name) uni_list = _font.getGlyphOrder()[1
:]
#
被替换的字体的列表 word_list =
[
"
坏
",
"
少
",
"
远
",
"
大
",
"
九
",
"
左
",
"
近
",
"
呢
",
"
十
",
"
高
",
"
着
"
,
"
矮
",
"
八
",
"
二
",
"
右
",
"
是
",
"
得
",
"
的
",
"
小
",
"
短
",
"
很
",
"
一
",
"
了
"
,
"
地
",
"
好
",
"
多
",
"
七
",
"
不
",
"
长
",
"
低
",
"
三
",
"
五
",
"
六
",
"
下
",
"
更
"
,
"
和
",
"
四
",
"
上
"
] utf_word_map =
{} utf_coordinates_map =
{}
for index, uni_code
in
enumerate(uni_list): utf_word_map[uni_code] =
word_list[index] utf_coordinates_map[uni_code] = list(_font[
'
glyf
'
][uni_code].coordinates)
if
get_word_map:
return
utf_word_map, utf_coordinates_map
return
utf_coordinates_map
def
repalce_source_code(self): replaced_html =
self.page_html
for utf_code, word
in
self.new_unicode_map.items(): replaced_html = replaced_html.replace(
"
&#x%s;
" % utf_code[3
:].lower(), word)
return
replaced_html
def
get_subject_content(self): normal_html =
self.repalce_source_code()
#
使用xpath 获取 主贴 xp_html =
etree.HTML(normal_html) subject_text =
''.join(xp_html.xpath(
'
//div[@xname="content"]//div[@class="tz-paragraph"]//text()
'
))
return
subject_text
def download(self, url, *args, try_time=5, method=
"
GET
", content=False, **
kwargs): kwargs.setdefault(
"
headers
"
, {}) kwargs[
"
headers
"].update({
"
User-Agent
":
"
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36
"
})
while
try_time:
try
: response = requests.request(method.upper(), url, *args, **
kwargs)
if
response.ok:
if
content:
return
response.content
return
response.text
else
:
continue
except
Exception as e: try_time -= 1
print(
"
download error: %s
" %
e)
if
__name__ ==
"
__main__
"
: url =
"
https://club.autohome.com.cn/bbs/thread/34d6bcc159b717a9/85794510-1.html#pvareaid=6830286
"
car =
CarHomeFont(url) car.replace_ttf_map() text =
car.get_subject_content()
print(text)
1 import os
2
3 from fontTools.ttLib import TTFont
4 from utils.font_rule import font_rule
5 import logging
6 logging.getLogger("chardet").setLevel(logging.WARNING)
7 logging.getLogger("fontTools").setLevel(logging.WARNING)
8
9
10 def get_distance(t_lst, t_lst2):
11 distance_lst = []
12 for i, el in enumerate(t_lst):
13 distance = (t_lst2[i][0] - el[0]) ** 2 + (t_lst2[i][1] - el[1]) ** 2
14 distance_lst.append(distance)
15 # print('[distance_lst]', distance_lst)
16 num = 0
17 for n in distance_lst:
18 num += n
19 return num
20
21
22 def handle_subtraction(lst):
23 n_lst = []
24 for d in lst:
25 old_index = d.get('old_index')
26 new_index = d.get('new_index')
27 # n = test(old_index, new_index)
28 n = get_distance(old_index, new_index)
29 n_lst.append(n)
30 # print('[n_lst]', n_lst)
31 min_n = min(n_lst)
32 # print('[min_n]', min_n)
33 return lst[n_lst.index(min_n)]
34 # for i, num in enumerate(n_lst):
35 # if num == min_n:
36 # r_dic = lst[i]
37 # return r_dic
38
39
40 def get_font_map_lst(old_font_lst, new_font_lst, old_font, new_font):
41 font_map_lst = []
42 for el in old_font_lst:
43 u_lst = []
44 coordinates = list(old_font['glyf'][el].coordinates)
45 for j in new_font_lst:
46 new_coordinates = list(new_font['glyf'][j].coordinates)
47 if len(coordinates) == len(new_coordinates):
48 dic = {'old_key': f'{el[-4:]}', 'new_key': f'{j[-4:]}', 'old_index': coordinates,
49 'new_index': new_coordinates}
50 u_lst.append(dic)
51 if len(u_lst) > 1:
52 r_lst = handle_subtraction(u_lst)
53 font_map_lst.append(r_lst)
54 elif len(u_lst) == 1:
55 font_map_lst.append(u_lst.pop())
56 # print('[len_map_font]', len(font_map_lst))
57 return font_map_lst
58
59
60 def get_font_map(font_map_lst):
61 font_dic = {}
62 for map_dic in font_map_lst:
63 old_key = map_dic.get('old_key')
64 new_key = map_dic.get('new_key')
65 font = font_rule.get(old_key.lower())
66 if not font:
67 print(map_dic)
68 font_dic[new_key.lower()]=str(font)
69 return font_dic
70
71
72 def decrypt_font(new_file_name):
73 old_file_name = 'old_font.ttf'
74 if not os.path.exists(old_file_name):
75 old_file_name = './utils/old_font.ttf'
76 new_file_name = new_file_name
77 old_font = TTFont(old_file_name)
78 # print(old_font)
79 new_font = TTFont(new_file_name)
80 old_font_lst = old_font.getGlyphOrder()[1:]
81 print('[old_font_lst]', old_font_lst)
82 new_font_lst = new_font.getGlyphOrder()[1:]
83 print('[new_font_lst]', new_font_lst)
84 font_map_lst = get_font_map_lst(old_font_lst, new_font_lst, old_font, new_font)
85 # print('font_map_lst', font_map_lst)
86 font_map = get_font_map(font_map_lst)
87 # print('[new_font_map]', font_map)
88 return font_map
89
90
91 if __name__ == '__main__':
92 # old_file_name = 'old_font.ttf'
93 new_file_name = 'new.ttf'
94 # old_font = TTFont(old_file_name)
95 # new_font = TTFont(new_file_name)
96 # #
97 # old_font_lst = old_font.getGlyphOrder()[1:]
98 # new_font_lst = new_font.getGlyphOrder()[1:]
99 # print(old_font_lst)
100 # print(new_font_lst)
101 # for obvious
102 # old_font.saveXML("old_font.xml")
103 # new_font.saveXML('new.xml')
104 # font_map_lst = get_font_map_lst(old_font_lst, new_font_lst)
105 # font_map = get_font_map(font_map_lst)
106 font_map = decrypt_font(new_file_name)
107 print(font_map)
108 print('[len_font_map]', len(font_map))
进一步封装完善代码