将多张图片拼接成一个pdf文件输出

前言

最近因为工作需要，电脑上的照片需要拼接成一个pdf文件上传，但是线上的PDF转换担心图片内容泄露的问题，用微软的word也能转换，但就是要将图片逐张粘贴到word文档上去，比较耗时，故自己制作一个小程序在本地完成。

使用场景

多张图片拼接成1个pdf文件
选择适当的pdf文件输出大小，输出质量自己调节

准备工作

安装 Pillow 库：

bash 复制代码

pip install Pillow

代码

python 复制代码

import os
from PIL import Image

def images_to_pdf(image_folder, output_pdf, dpi=150, quality=85, max_width=None):
    """
    将指定文件夹下的所有图片合并为一个压缩后的PDF文件。

    :param image_folder: 包含图片的文件夹路径
    :param output_pdf: 输出的PDF文件路径
    :param dpi: 输出PDF的分辨率 (默认150，屏幕阅读足够，设为72可进一步减小体积)
    :param quality: JPEG压缩质量 (1-100，默认85。越低文件越小，画质越差)
    :param max_width: 最大宽度限制 (像素)。如果设置，超过此宽度的图片会被等比缩放。
                      设为 None 则保持原图宽度。
    """

    supported_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff')
    image_files = [f for f in os.listdir(image_folder)
                   if f.lower().endswith(supported_extensions)]

    if not image_files:
        print(f"在文件夹 '{image_folder}' 中未找到任何图片。")
        return

    # 简单的自然排序
    import re
    def natural_sort_key(s):
        return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]

    image_files.sort(key=natural_sort_key)

    print(f"找到 {len(image_files)} 张图片，开始处理 (目标DPI: {dpi}, 质量: {quality})...")

    image_list = []

    for i, filename in enumerate(image_files):
        file_path = os.path.join(image_folder, filename)
        try:
            img = Image.open(file_path)

            # 1. 处理图片模式 (确保为 RGB)
            if img.mode in ('RGBA', 'LA', 'P'):
                background = Image.new('RGB', img.size, (255, 255, 255))
                if img.mode == 'P':
                    img = img.convert('RGBA')
                # 处理透明通道
                if img.mode == 'RGBA':
                    background.paste(img, mask=img.split()[3])
                else:
                    background.paste(img)
                img = background
            elif img.mode != 'RGB':
                img = img.convert('RGB')

            # 2. 尺寸压缩 (可选)
            # 如果设置了 max_width 且当前图片宽度超过它，则进行等比缩放
            if max_width and img.width > max_width:
                ratio = max_width / float(img.width)
                new_height = int(float(img.height) * ratio)
                img = img.resize((max_width, new_height), Image.Resampling.LANCZOS)
                print(f"  -> 缩放 {filename}: {img.width}x{img.height}")

            image_list.append(img)

        except Exception as e:
            print(f"跳过文件 {filename}，发生错误: {e}")

    if not image_list:
        print("没有成功加载任何图片，无法生成 PDF。")
        return

    first_image = image_list[0]
    other_images = image_list[1:]

    try:
        # 关键压缩参数在这里设置
        first_image.save(
            output_pdf,
            "PDF",
            resolution=dpi,  # 降低分辨率
            save_all=True,
            append_images=other_images,
            optimize=True,  # 启用优化
            quality=quality  # 设置JPEG质量 (仅对嵌入的JPEG有效，Pillow内部处理)
        )

        # 获取文件大小
        file_size_mb = os.path.getsize(output_pdf) / (1024 * 1024)
        print(f"\n成功！PDF 已保存至: {output_pdf}")
        print(f"最终文件大小: {file_size_mb:.2f} MB")

    except Exception as e:
        print(f"\n生成 PDF 时出错: {e}")


if __name__ == "__main__":
    # === 配置区域 ===
    folder_path = "./images"
    output_filename = "merged_output_compressed.pdf"

    # === 压缩策略调整 ===
    # DPI 设置:
    # 72: 最小体积，适合纯屏幕阅读，打印模糊
    # 150: 平衡点，适合屏幕阅读和一般文档，体积适中 (推荐)
    # 300: 高质量，适合打印，体积大
    TARGET_DPI = 150

    # 质量设置 (1-100):
    # 60-75: 高压缩，体积很小，可能有轻微噪点
    # 85: 推荐，肉眼几乎看不出损失，体积减小明显
    # 95+: 接近无损，体积较大
    TARGET_QUALITY = 60

    # 最大宽度限制 (像素):
    # 标准A4纸在150 DPI下宽度约为 1240 像素 (210mm * 150/25.4)
    # 如果图片是手机拍的高清图 (如4000px)，限制到1500px能极大减小体积
    # 设为 None 则不限制宽度
    MAX_WIDTH = 1500

    if not os.path.exists(folder_path):
        print(f"文件夹 '{folder_path}' 不存在。请创建该文件夹并放入图片。")
    else:
        images_to_pdf(folder_path, output_filename, dpi=TARGET_DPI, quality=TARGET_QUALITY, max_width=MAX_WIDTH)