from xml import etree
import requests
import re
import base64
from fontTools.ttLib import TTFont, BytesIO
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "max-age=0",
# 拿cookie是因为现在需要登陆状态才行
"cookie": 'f=n; commontopbar_new_city_info=5%7C%E8%8B%8F%E5%B7%9E%7Csu; id58=UvTEUl/zCqajUJaybRC9bw==; 58tj_uuid=7009bbe1-76f8-4b73-8376-1f7a69d5f496; als=0; wmda_new_uuid=1; wmda_uuid=c67559bfe142299dbd374d9dab028df8; xxzl_deviceid=KaV0c6cV6yrVONO9yTVEfuxpPimKScDGVOLYxOl3UXSNS5GD%2BMfPxb30M3qNp5Fi; xxzl_smartid=e80e9b7ddd64b4b4d1a2cd8041cebe27; gr_user_id=b371d8ff-2cf7-43ca-9097-e7dd72fc6224; xxzl_cid=e61ae401e08e40388d40a8e34b29956a; xzuid=7f33cba3-b8fd-43f1-839e-fa2c1cc9cafa; Hm_lvt_3bb04d7a4ca3846dcc66a99c3e861511=1610121167,1610166089,1610166404,1610369641; Hm_lvt_e15962162366a86a6229038443847be7=1610121167,1610166090,1610166405,1610369641; Hm_lvt_e2d6b2d0ec536275bb1e37b421085803=1610120643,1610166104,1610369657; ppStore_fingerprint=43A259D40092AFB43E772FCC9350C463700EAEBE5D26276C%EF%BC%BF1610369657573; 58home=su; f=n; commontopbar_new_city_info=5%7C%E8%8B%8F%E5%B7%9E%7Csu; city=su; new_uv=6; utm_source=12345; spm=164128813067.zhaopin_baidu; wmda_session_id_1731916484865=1615820398660-8edb2386-ae73-b2ed; wmda_visited_projects=%3B11187958619315%3B10104579731767%3B7790950805815%3B3381039819650%3B1409632296065%3B1731916484865; commontopbar_ipcity=su%7C%E8%8B%8F%E5%B7%9E%7C0; sessionid=99a3f5e1-24e3-4673-9285-97c9ffdd3501; param8716kop=1; wmda_session_id_10104579731767=1615820515565-2d473dec-cfe9-e637; new_session=0; init_refer=; crmvip=""; dk_cookie=""; isSmartSortTipShowed=true; PPU="UID=57606948143127&UN=%E9%83%AD%E6%88%90%E4%BC%9F18567839070&TT=393add6da5f4879dfc0ba40e1c64c563&PBODY=cdiJiPxHFQ_N7xQnaMGLnXGVihtEYl982at6jeFRhTVsWMOJY2Lsy9f_XmThf5RZgWrPwQLMw33MnptMvXSDwDNlQsMGZAcHd4iBcopZtNFHDI_n_0cswApG1qXvW9Ksb81wcjYnrbyslFqGz6Yi4-O0jUZQCJXbnY1tCMafLjE&VER=1"; www58com="UserID=57606948143127&UserName=%E9%83%AD%E6%88%90%E4%BC%9F18567839070"; 58cooper="userid=57606948143127&username=%E9%83%AD%E6%88%90%E4%BC%9F18567839070"; 58uname=%E9%83%AD%E6%88%90%E4%BC%9F18567839070; Hm_lvt_a3013634de7e7a5d307653e15a0584cf=1615820690,1615820708,1615820829; param8616=0; JSESSIONID=A819D6FD17545AB36C96E42D3923EA40; jl_list_left_banner=11; Hm_lpvt_a3013634de7e7a5d307653e15a0584cf=1615821167',
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
}
# x的差和y的差不会变,根据这两个的值组成字典的key,来映射真是数据
data_map = {
(0, 1549): "B", # 这里省略,需要一个字一个字的去对照计算
}
url = "https://su.58.com/searchjob/"
response = requests.get(url=url, headers=headers).text
result = re.search(r"base64,(.*?)\)", response, flags=re.S).group(1)
woff = base64.b64decode(result)
tf = TTFont(BytesIO(woff))
# print(tf.getGlyphNames())
# ['glyph00000', 'uniE025', 'uniE0CD', 'uniE11F', 'uniE1BE', 'uniE2D2', 'uniE311', 'uniE343',
# 'uniE423', 'uniE4CA', 'uniE5BE', 'uniE602', 'uniE6EA', 'uniE82B', 'uniE82C', 'uniE8AE',
# 'uniE8CA', 'uniE953', 'uniE99B', 'uniE9FE', 'uniEA35', 'uniEADF', 'uniEB8B', 'uniEBBA',
# 'uniEC2B', 'uniEC6B', 'uniEDF0', 'uniEEC8', 'uniEEE0', 'uniEF34', 'uniEFCC', 'uniEFF8',
# 'uniF217', 'uniF291', 'uniF2BF', 'uniF32A', 'uniF364', 'uniF398', 'uniF477', 'uniF584',
# 'uniF5A1', 'uniF5A2', 'uniF5B9', 'uniF5E9', 'uniF62C', 'uniF83E', 'x']
# print(woff)
with open("ztku01.woff", "wb") as f:
f.write(woff)
fonts = TTFont("ztku01.woff")
fonts.saveXML("ztku01.xml") # 借助TTFont库把字体转换成xml格式的文件
font_map = dict()
for i in tf.getGlyphNames()[1:-1]:
# print(i) # uniE025
# print(tf["glyf"]) # <'glyf' table at 18ca4419b20>
# print(tf["glyf"][i]) # <fontTools.ttLib.tables._g_l_y_f.Glyph object at 0x000002165B9838B0>
# print(tf["glyf"][i].coordinates) # GlyphCoordinates([(1990, 244),(1960, 205),...)
# print(tf["glyf"][i].coordinates[0], tf["glyf"][i].coordinates[1]) # (2002, 277) (1929, 188)
temp = tf["glyf"][i].coordinates
x1, y1 = temp[0]
x2, y2 = temp[1]
position = (x2 - x1, y2 - y1)
# print(position) # (-73, -89)
key = str(i).replace("uni", "&#x").lower()
# print(key) #  网页源码字符串
# 以xe10b为键 以位置获取字典中的真是汉字
font_map[key] = data_map[position]
html = ""
for i in font_map:
print(i + ";")
html = response.replace(i + ";", font_map[i])
print(html)
data = etree.HTML(html)
《封号码罗》python爬虫之某同城字体反爬(十二)
最新推荐文章于 2023-04-27 11:18:24 发布