问题背景
媳妇平时需要制作课件,下载收费,自己做又太浪费时间。试用了几个下载器,大概摸清了工作原理,打算自己写个脚本。大部分文库网页显示的PPT都是纯图片(爬取源文件有一定技术难度),生成ppt的一种常用的方法是爬取这些图片保存到本地,再批量导入到ppt中。导入word的操作类似。爬取部分就不写了,已经写过几个类似的。因为word和ppt文档跨设备跨平台时会出现显示异常的情况,这时需要转为pdf再使用。在移动端(手机)上使用时,往往图片比文件方便快捷。实现导出pdf很简单,ppt也支持导出图片,调用com接口在后台导出即可。剩下要解决的便是如何实现docx、pdf转图片,快速浏览了下网友们分享的源码,暂时还没发现一个很好的解决方案。
既然有现成的转换器和下载器,为什么还要重复造轮子?
答:主要有两点原因,分辨率和使用次数有限制,解除限制需要充钱;有的会强制要求关注公众号、下载捆绑软件等。
思路
- 批量图片导入ppt
- 批量图片导入word
- ppt、word文档转pdf
- ppt转图片
- pdf转图片
- ppt转长图
以下是没有写完的代码,以后有时间再来完善,如果还有心思写文章的话。
未写完的代码
import os
# import sys
import win32com
from comtypes.client import CreateObject
from win32com.client import gencache, constants
from docx import Document
from docx.shared import Cm, Inches, Pt
from PIL import Image
# method one
class Conversion:
def __init__(self):
self.word_format_pdf = 17
self.word_to_pdf = CreateObject("Word.Application", dynamic=True)
self.ppt_format_pdf = 32
self.ppt_to_pdf = CreateObject("Powerpoint.Application", dynamic=True)
self.ratio = 29.7 / 21
# self.ppt_to_pdf.Visible = True
def conversion(self, load, mode=None):
files = os.listdir(load := os.path.abspath(load))
print(load,
basename_list := [_ for _ in files if _.endswith((".doc", ".docx") if mode is None else (".ppt", ".pptx"))])
for basename in basename_list:
input_absname = os.path.join(load, basename)
print(input_absname)
pdf_absname = os.path.join(load, os.path.splitext(basename)[0] + '.pdf')
if not os.path.exists(pdf_absname):
try:
document = self.word_to_pdf.Documents.Open(input_absname) if mode is None \
else self.ppt_to_pdf.Presentations.Open(input_absname)
document.SaveAs(pdf_absname, self.word_format_pdf if mode is None else self.ppt_format_pdf)
if type(mode) is str and mode.startswith(('im', 'image')):
document.SaveAs(f'{load}/{basename.rsplit(".")[0]}', 17)
document.Close()
except Exception as e:
print(e)
def merge(self, root=os.curdir, mode=None, page=(21, 29.7), margins=(0, 0, 0, 0)):
document = Document()
section = document.sections[0]
width, height = page
top, bottom, left, right = margins
section.page_width = Cm(width)
section.page_height = Cm(height)
section.top_margin = Cm(top)
section.bottom_margin = Cm(bottom)
section.left_margin = Cm(left)
section.right_margin = Cm(right)
print(pictures := [os.path.abspath(_) for _ in os.listdir(os.path.abspath(root))
if _.endswith(('.jpg', '.png'))])
for pic in pictures:
try:
w, d = Image.open(pic).size
print(f'adding the picture {pic} to document')
document.add_picture(pic, Top=(height - d) / 2, width=Cm(width - left - right)) if w / d >= self.ratio \
else document.add_picture(pic, height=Cm(height - top - bottom))
except Exception as e:
print(f'the picture {pic} is not recognized.\n ', e)
document.save('Merge.docx')
if mode is not None:
self.conversion('.')
# method two
def create_pdf(word_path, pdf_path):
word = gencache.EnsureDispatch("Word.Application")
doc = word.Documents.Open(word_path, ReadOnly=1)
doc.ExportAsFixedFormat(pdf_path,
constants.wdExportFormatPDF,
Item=constants.wdExportDocumentWithMarkup,
CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
word.Quit(constants.wdDoNotSaveChanges)
def ppt_to_img(root=os.path.abspath(os.curdir)):
# powerpoint = win32com.client.Dispatch("PowerPoint.Application")
powerpoint = gencache.EnsureDispatch("PowerPoint.Application")
powerpoint.Visible = True
ppt_files = [_ for _ in os.listdir(root) if _.endswith(('.ppt', '.pptx'))]
for name in ppt_files:
ppt = powerpoint.Presentations.Open(os.path.join(root, name))
basename = os.path.basename(name.rsplit('.')[0])
print(basename)
ppt.SaveAs(f'{root}/{basename}', 17)
ppt.SaveAs(f'{root}/{basename}.pdf', 32)
ppt.Close()
if __name__ == '__main__':
conver = Conversion()
# conver.conversion('.', 'imgs')
conver.merge(mode=2)
# create_pdf(r"G:\Desktop\ls\project 202007\Merge.docx", r"G:\Desktop\ls\project 202007\Merge.pdf")
# ppt_to_img()