目标网站:http://www.porters.vip/confusion/movie.html
目标网站的字体文件不更新可以写死。
源代码中数字变为&#x....就是字体反爬了
import requests from fontTools.ttLib import TTFont def spider(): url = 'http://www.porters.vip/confusion/movie.html' # woff_url = 'http://www.porters.vip/confusion/font/movie.woff' # resp_content = requests.get(woff_url).content # with open('示例7.woff','wb') as f: # f.write(resp_content) # fonts = TTFont('示例7.woff') # uni_list = fonts.getGlyphOrder()[2:] # print(uni_list) # ['uniE9C7', 'uniF57B', 'uniE7DF', 'uniE339', 'uniE624', 'uniEA16', 'uniF19A', 'uniEE76', 'uniF593', 'uniEFD4'] # 可用FontCreator软件打开 示例7.woff 即可得到对应关系,得到这个之后,以上程序并无作用了。 base_fonts_dict = {'uniE9C7':'7', 'uniF57B':'1', 'uniE7DF':'2', 'uniE339':'6', 'uniE624':'9', 'uniEA16':'5', 'uniF19A':'3', 'uniEE76':'0', 'uniF593':'4', 'uniEFD4':'8'} base_fonts_dict = {'&#x' + i[0][3:].lower():i[1] for i in base_fonts_dict.items()} # print(base_fonts_dict)# {'': 7, '': 1, '': 2, '': 6, '': 9, '': 5, '': 3, '': 0, '': 4, '': 8} resp = requests.get(url).text # 全文替换 for i in base_fonts_dict.keys(): resp = resp.replace(i,base_fonts_dict[i]) print(resp) spider()