关键点:
- pdf2image.convert_from_path 文件转图
- 尺寸限制 img.thumbnail 保证比例
from pdf2image import convert_from_path
def process(split):
fnames = [fname for fname in os.listdir(split) if fname.endswith('.pdf')]
def process_pdf(fname):
uid = fname.replace('.pdf', '')
imgs = convert_from_path(f'{split}/{fname}')
for i, img in enumerate(imgs):
img.thumbnail((3840, 2160))
img.save(f'images/{split}_{uid}_{i}.png', quality=50)
for fname in tqdm.tqdm(fnames):
process_pdf(fname)