采用com唤起 ppt进行转pdf,从pdf中提取图片
- 安装
bash
pip instamm PyMuPDF
pip install comtypes
- code
python
import os
import shutil
import comtypes.client
import fitz
# 逻辑
'''
1. 根据输入的文件夹提取ppt,转成pdf
2. pdf保留和ppt一样的文件夹结构
3. 从pdf提取图片,保留一样的结构,以ppt文件名命名文件夹
'''
def ensure_dir(path):
if not os.path.exists(path):
os.makedirs(path)
def ppt_to_pdf(ppt_path, pdf_path):
powerpoint = comtypes.client.CreateObject("PowerPoint.Application")
powerpoint.Visible = 1
try:
presentation = powerpoint.Presentations.Open(ppt_path, WithWindow=False)
presentation.SaveAs(pdf_path, 32) # 保存为PDF
presentation.Close()
finally:
powerpoint.Quit()
# 从pdf中提取图片,保存在文件名目录下
def extract_images_from_pdf(pdf_path, image_dir):
ensure_dir(image_dir)
doc = fitz.open(pdf_path)
img_count = 0
for page_index in range(len(doc)):
page = doc.load_page(page_index)
for img_index, img in enumerate(page.get_images(full=True), start=1):
xref = img[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
image_ext = base_image["ext"]
image_name = f"page{page_index+1:03d}_img{img_index:03d}.{image_ext}"
image_path = os.path.join(image_dir, image_name)
with open(image_path, "wb") as f:
f.write(image_bytes)
img_count += 1
doc.close()
return img_count
# 遍历整个文件夹
def process_ppt_directory(src_root, dst_root,error_dir):
for root, dirs, files in os.walk(src_root):
rel_path = os.path.relpath(root, src_root)
target_dir = os.path.join(dst_root, rel_path)
error_dir = os.path.join(error_dir, rel_path)
ensure_dir(target_dir)
for file in files:
if file.lower().endswith(('.ppt', '.pptx')):
ppt_path = os.path.join(root, file)
base_name = os.path.splitext(file)[0]
pdf_path = os.path.join(target_dir, base_name + ".pdf")
image_dir = os.path.join(target_dir, base_name + "_images")
print(f"▶ 转换: {ppt_path}")
try:
ppt_to_pdf(ppt_path, pdf_path)
print(f" ✅ 生成 PDF: {pdf_path}")
except Exception as e:
print(f"❌ PPT {ppt_path} 转 PDF 失败: {e}")
ensure_dir(error_dir)
shutil.copy2(ppt_path, error_dir)
print(f" ⚠️ 已复制失败文件到: {error_dir}")
continue
try:
count = extract_images_from_pdf(pdf_path, image_dir)
print(f" ✅ 提取图片 {count} 张 → {image_dir}")
except Exception as e:
print(f"❌ PDF {pdf_path} 提取图片失败: {e}")
ensure_dir(error_dir)
shutil.copy2(ppt_path, error_dir)
print(f" ⚠️ 已复制失败文件到: {error_dir}")
if __name__ == "__main__":
src_dir = r"C:\Users\84977\Desktop\ceshi" # ppt目录
dst_dir = r"C:\Users\84977\Desktop\123" # 输出目录
error_dir = r"C:\Users\84977\Desktop\345" # 失败的目录
process_ppt_directory(src_dir, dst_dir, error_dir)