from fontTools.ttLib import TTFont
import ddddocr
from io import BytesIO
from PIL import Image, ImageDraw, ImageFont
def convert_cmap_to_image(cmap_code, font_path):
img_size = 1024
img = Image.new("1", (img_size, img_size), 255)
draw = ImageDraw.Draw(img)
font = ImageFont.truetype(font_path, img_size)
character = chr(cmap_code)
bbox = draw.textbbox((0, 0), character, font=font)
width = bbox[2] - bbox[0]
height = bbox[3] - bbox[1]
draw.text(((img_size - width) // 2, (img_size - height) // 2), character, font=font)
return img
def extract_text_from_font(font_path):
font = TTFont(font_path)
font.saveXML("xxx.xml")
ocr = ddddocr.DdddOcr(beta=True, show_ad=False)
print("font.getBestCmap().items():", font.getBestCmap().items())
font_map = {}
for cmap_code, glyph_name in font.getBestCmap().items():
image = convert_cmap_to_image(cmap_code, font_path)
bytes_io = BytesIO()
image.save(bytes_io, "PNG")
text = ocr.classification(bytes_io.getvalue())
print("text:", text)
print(f"Unicode码点:{cmap_code} - Unicode字符:{glyph_name},识别结果:{text}")
font_map[cmap_code] = text
return font_map
font_file_path = "font.woff"
print(extract_text_from_font(font_file_path))