1、将我们的pdf转换成图片:
def main(self, pic_path, cropped_pic_path, pgn=None):
"""
主函数
:param pic_path: 被截取的图片路径
:param cropped_pic_path: 图片的截图的保存路径
:param pgn: 指定获取截图的对象的索引
:return:
"""
if pgn is not None:
doc_pdf = self.doc_pdfs[pgn]
doc_pic = self.doc_pics[pgn]
path = self.to_pic(doc_pic, 2, pgn, pic_path)
loc_name_pic, canvas_size = self.get_pic_loc(doc_pdf)
if loc_name_pic:
for i in loc_name_pic:
position = i[1]
cropped_pic_name = re.sub('/', '_', i[0])
self.get_crops(path, canvas_size, position, cropped_pic_name, cropped_pic_path)
2、通过正则匹配到图表对 应的位置:
for i in layout:
if hasattr(i, 'get_text'):
text = i.get_text().strip()
if zhongwenjiance.search(text) == None:
continue
if re.search(r'Fig.\d', text):
if panduan:
shifouduogetu = True
loc_top.append((value_bottom_bbox, text.split('\n')[1]))
else:
loc_top.append((value, text.split('\n')[1]))
loc_bottom.append((i.bbox, text))
value_bottom_bbox = i.bbox
value_bottom_text = text.split('\n')[1]
panduan = True
elif len(re.sub(' +', '', text)) > 100:
panduan = False
value = i.bbox
value_text = text
3、在原来的图片上进行截取:
def get_crops(self, pic_path, canvas_size, position, cropped_pic_name, cropped_pic_path):
"""
按给定位置截取图片
:param pic_path: 被截取的图片的路径
:param canvas_size: 图片为pdf时的尺寸, tuple, (0, 0, width, height)
:param position: 要截取的位置, tuple, (y1, y2)
:param cropped_pic_name: 截取的图片名称
:param cropped_pic_path: 截取的图片保存路径
:return:
"""
img = Image.open(pic_path)
pic_size = img.size
size_increase = 10
x1 = 0
x2 = pic_size[0]
y1 = pic_size[1] * (1 - (position[1] + size_increase)/canvas_size[3])
y2 = pic_size[1] * (1 - (position[0] - size_increase)/canvas_size[3])
cropped_img = img.crop((x1, y1, x2, y2))
path = os.path.join(cropped_pic_path.strip().replace('\n', '').replace(' ', ''), cropped_pic_name.strip().replace('\n', '').replace(' ', '')) + '.png'
print(path)
cropped_img.save(path)
print('成功截取图片:', cropped_pic_name)
4、效果图: