python
import cv2
import numpy as np
from PIL import Image
import os
from reportlab.lib.pagesizes import A4, landscape
from reportlab.pdfgen import canvas
def crop_by_white_contour(img_path, save_path):
# 用PIL读取,解决中文路乱码问题
pil_img = Image.open(img_path).convert("RGB")
img = np.array(pil_img)
# PIL是RGB,OpenCV需要BGR
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
# 二值化:白色区域255,深色背景0
_, binary = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 取面积最大轮廓=PPT主体白色区域
max_contour = max(contours, key=cv2.contourArea)
x_min, y_min, w, h = cv2.boundingRect(max_contour)
x_max = x_min + w
y_max = y_min + h
# 裁剪
crop_array = img[y_min:y_max, x_min:x_max]
crop_pil = Image.fromarray(crop_array)
crop_pil.save(save_path)
def batch_screenshot_to_pdf(folder, pdf_name="PPT无黑边汇总.pdf"):
ext = (".jpg", ".jpeg", ".png", ".bmp")
file_list = sorted([f for f in os.listdir(folder) if f.lower().endswith(ext)])
if not file_list:
print("❌ 文件夹内未找到图片文件")
return
temp_dir = os.path.join(folder, "crop_temp_clean")
os.makedirs(temp_dir, exist_ok=True)
crop_paths = []
print("===== 轮廓识别裁剪,清除全部四周黑边 =====")
for idx, fname in enumerate(file_list):
src = os.path.join(folder, fname)
dst = os.path.join(temp_dir, f"fixed_{idx:03d}_{fname}")
try:
crop_by_white_contour(src, dst)
crop_paths.append(dst)
print(f"✅ 处理完成:{fname}")
except Exception as e:
print(f"⚠️ {fname} 处理异常,跳过:{str(e)}")
# 生成自适应横竖PDF
pdf = canvas.Canvas(pdf_name)
page_margin = 15
print("\n===== 生成横版自适应PDF =====")
for pic_p in crop_paths:
pic = Image.open(pic_p)
pw, ph = pic.size
# 宽图自动A4横向
if pw > ph:
page_w, page_h = landscape(A4)
else:
page_w, page_h = A4
pdf.setPageSize((page_w, page_h))
max_w = page_w - page_margin * 2
max_h = page_h - page_margin * 2
scale = min(max_w / pw, max_h / ph)
draw_w = pw * scale
draw_h = ph * scale
x = (page_w - draw_w) / 2
y = (page_h - draw_h) / 2
pdf.drawImage(pic_p, x, y, width=draw_w, height=draw_h)
pdf.showPage()
pdf.save()
print(f"\n🎉 PDF生成完毕,路径:{os.path.abspath(pdf_name)}")
if __name__ == "__main__":
WORK_FOLDER = r"./"
batch_screenshot_to_pdf(WORK_FOLDER)
该代码实现了一个批量处理PPT截图并生成PDF的工具。主要功能包括:1) 通过轮廓识别自动裁剪图片四周的黑边,保留PPT主体白色区域;2) 批量处理文件夹中的图片文件(支持JPG/PNG等格式);3) 智能判断图片方向,自适应生成横版或竖版A4尺寸的PDF文档,并保持图片居中显示。处理过程会先创建临时文件夹存储裁剪后的图片,最后将所有图片整合到一个PDF文件中输出。程序具有错误处理机制,对异常图片会跳过并提示。