python读取并在ui界面显示读取的文字和图像

import fitz
import tkinter as tk
from tkinter import scrolledtext
from PIL import Image, ImageTk
import io

def extract_text_and_images_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    images = []
    for page in doc:
        text += page.get_text("text")
        for image in page.get_images():
            xref = image[0]
            base_image = doc.extract_image(xref)
            image_data = base_image["image"]
            image_mode = base_image["colorspace"]
            if image_mode == 8:  # Convert grayscale images to RGB
                image_mode = "RGB"
            image_data = bytes(image_data)
            pil_image = Image.open(io.BytesIO(image_data))
            if image_mode == 4:  # Convert CMYK images to RGB
                pil_image = pil_image.convert("RGB")
            images.append(pil_image)
    return text, images

def display_text_and_images_in_ui(text, images):
    root = tk.Tk()
    root.title("PDF 内容和图像")
    root.geometry("1200x600")

    text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, font=("Helvetica", 12))
    text_area.insert(tk.INSERT, text)
    text_area.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

    canvas_frame = tk.Frame(root, width=400, height=600)
    canvas_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True)

    canvas = tk.Canvas(canvas_frame, width=400, height=600)
    canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

    scrollbar = tk.Scrollbar(canvas_frame, orient=tk.VERTICAL, command=canvas.yview)
    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

    canvas.configure(yscrollcommand=scrollbar.set)
    canvas.bind("<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")))

    image_height = 10  # 图像区域的起始高度

    photo_images = []  # 存储 PhotoImage 对象

    for image in images:
        image.thumbnail((300, 300))
        photo = ImageTk.PhotoImage(image)
        photo_images.append(photo)

    for i, photo in enumerate(photo_images):
        canvas.create_image(200, image_height + photo.height() // 2, image=photo)
        image_height += photo.height() + 50  # 图像之间的间距

    root.mainloop()

# 读取指定的 PDF 文件并提取文本内容和图像
pdf_file_path = "恐龙.pdf"
extracted_text, extracted_images = extract_text_and_images_from_pdf(pdf_file_path)

# 在 UI 界面上显示提取的文本内容和图像
display_text_and_images_in_ui(extracted_text, extracted_images)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

司南锤

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值