import fitz
import tkinter as tk
from tkinter import scrolledtext
from PIL import Image, ImageTk
import io
def extract_text_and_images_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
text = ""
images = []
for page in doc:
text += page.get_text("text")
for image in page.get_images():
xref = image[0]
base_image = doc.extract_image(xref)
image_data = base_image["image"]
image_mode = base_image["colorspace"]
if image_mode == 8: # Convert grayscale images to RGB
image_mode = "RGB"
image_data = bytes(image_data)
pil_image = Image.open(io.BytesIO(image_data))
if image_mode == 4: # Convert CMYK images to RGB
pil_image = pil_image.convert("RGB")
images.append(pil_image)
return text, images
def display_text_and_images_in_ui(text, images):
root = tk.Tk()
root.title("PDF 内容和图像")
root.geometry("1200x600")
text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, font=("Helvetica", 12))
text_area.insert(tk.INSERT, text)
text_area.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
canvas_frame = tk.Frame(root, width=400, height=600)
canvas_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True)
canvas = tk.Canvas(canvas_frame, width=400, height=600)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar = tk.Scrollbar(canvas_frame, orient=tk.VERTICAL, command=canvas.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
canvas.configure(yscrollcommand=scrollbar.set)
canvas.bind("<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")))
image_height = 10 # 图像区域的起始高度
photo_images = [] # 存储 PhotoImage 对象
for image in images:
image.thumbnail((300, 300))
photo = ImageTk.PhotoImage(image)
photo_images.append(photo)
for i, photo in enumerate(photo_images):
canvas.create_image(200, image_height + photo.height() // 2, image=photo)
image_height += photo.height() + 50 # 图像之间的间距
root.mainloop()
# 读取指定的 PDF 文件并提取文本内容和图像
pdf_file_path = "恐龙.pdf"
extracted_text, extracted_images = extract_text_and_images_from_pdf(pdf_file_path)
# 在 UI 界面上显示提取的文本内容和图像
display_text_and_images_in_ui(extracted_text, extracted_images)
python读取并在ui界面显示读取的文字和图像
最新推荐文章于 2023-10-24 16:24:33 发布