__author__ = 'Jeff.xie' # _*_ encoding:utf-8 _*_ import os,time,sys import fitz import xlsxwriter import openpyxl import shutil class Ref(): def __init__(self, source_file, Customer_ID, Ref_list): self.Customer_ID = Customer_ID self.source_file = source_file self.Ref_list = Ref_list class Shot(): ref_summary = [] sheet_list = [] Image_path = "" def get_file_shot(self, sourcefile, customer_id, ref_list, image_dir): doc = fitz.open(sourcefile) for ref in ref_list: if "eive money" in ref: continue self.ref_summary.append(ref) elif ref in self.ref_summary: continue else: self.ref_summary.append(ref) # print("start shot: ", ref) self.get_ref_shot(doc, customer_id, ref, image_dir) doc.close() def get_ref_shot(self, doc, customer_id, ref_text, image_dir): for pi in range(doc.pageCount): page = doc[pi] text_instances = page.searchFor(ref_text) # 数字越小,截取的高度越小 five_percent_height = (page.rect.br.y - page.rect.tl.y) * 0.02 two_percent_height = (page.rect.br.y - page.rect.tl.y) * 0.015 for inst in text_instances: highlight = page.addHighlightAnnot(inst) # define a suitable cropping box which spans the whole page # and adds padding around the highlighted text # print(page.rect.tl.x) # print(page.rect.br.x) tl_pt = fitz.Point(page.rect.tl.x+60, max(page.rect.tl.y, inst.tl.y - five_percent_height)) br_pt = fitz.Point(page.rect.br.x-50, min(page.rect.br.y, inst.br.y + two_percent_height)) hl_clip = fitz.Rect(tl_pt, br_pt) zoom_mat = fitz.Matrix(3.2, 3.2) # 数字越大,截取图片的清晰度越高 pix = page.getPixmap(matrix=zoom_mat, clip=hl_clip) file_name = customer_id + "_" + ref_text.strip() + ".png" pix.writePNG(image_dir + file_name) def write_picture(self,excel_dir,image_dir,customer_list): book = xlsxwriter.Workbook(excel_dir + "/picture_export.xlsx") # 保存的文件名 picture_files = os.listdir(image_dir) book.add_worksheet("Test Result") for c in customer_list: sheet_pic = book.add_worksheet(c) # print("Start to get picture for {}".format(c)) index = 0 for f in picture_files: if f.startswith(c): self.write_picture_to_excel(sheet_pic, os.path.join(image_dir, f), index) index += 1 book.close() def generate_picture(self,sheet,refs_list,wk,image_dir): for i in range(1, sheet.max_row): row = [item.value for item in list(sheet.rows)[i]] # print('第{}行值'.format(str(i)),row) refs = row[3].strip() refs = refs[1:len(refs) - 1] refss = refs.split(",") p = row[2].replace(r"/", "\\") abs_path = os.path.join(p, row[1]).strip() refs_list.append(Ref(abs_path, row[0].strip(), refss)) self.sheet_list.append(row[0].strip()) wk.close() for refs in refs_list: shot.get_file_shot(refs.source_file, refs.Customer_ID, refs.Ref_list, image_dir) def write_picture_to_excel(self, sheet, picture_file, index): # picture_file =r"D:\Project\e-Statement\estatement_pdf\2021_06_08_estatement\Image\8000013533_FT21141H18Q8.jpg" # sheet.insert_image('A12', picture_file, {'x_offset': 15, 'y_offset': 10}) # 存入表格的位置和图片的路径 sheet.insert_image('A{}'.format(index * 10+1), picture_file) # 存入表格的位置和图片的路径,位置只能从A1开始,没有A0 def main_shot(self, path): refs_list = [] wk = openpyxl.load_workbook(path) # sheet = wk.get_sheet_by_name('Result') #这种方式有warning sheet = wk['Result'] # row3=[item.value for item in list(sheet.rows)[2]] # print('第3行值',row3) col1 = [item.value for item in list(sheet.columns)[0]] # print('第1列值',col1) customer_list = col1[1:] excel_dir = os.path.split(path)[0] image_dir = os.path.split(path)[0] + "/Image/" # print(os.path.split(path)[0])#获取字符串中的文件夹绝对路径 # print(os.path.split(path)[1])#获取字符串中的文件名 if not os.path.exists(image_dir): os.mkdir(image_dir) else: shutil.rmtree(image_dir) time.sleep(0.2) os.mkdir(image_dir) print("image_dir is exist") time.sleep(0.2) self.generate_picture(sheet,refs_list,wk,image_dir) self.write_picture(excel_dir,image_dir,customer_list) if __name__ == '__main__': start_time = time.time() shot = Shot() try: ref_file_path = sys.argv[1] # ref_file_path = r"D:/Project/e-Statement/estatement_pdf/2021_07_27_estatement/Result_fail_Refs.xlsx" shot.main_shot(ref_file_path) except: print("did not execute") end_time = time.time() print("cost time: {}".format(end_time-start_time))
Python操作excel和pdf截图功能
最新推荐文章于 2024-10-24 17:31:49 发布