该脚本主要功能:
1、读取excel文件获取水印列表;
2、在pdf文档的每一页添加水印;
3、每个水印生成一个pdf文档。
1、下载所需第三方库
python
import pandas as pd
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import os
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.utils import ImageReader
import io
2、注册水印字体
python
def register_fonts():
"""注册中文字体和常用字体"""
try:
# Windows系统常见中文字体路径
FONTS_ROOT_PATH = "C:/Users/admin/AppData/Local/Microsoft/Windows/Fonts/"
windows_fonts = [
FONTS_ROOT_PATH + "仿宋_GB2312_0.TTF", # 仿宋_GB2312
FONTS_ROOT_PATH + "simsun.ttc", # 宋体
FONTS_ROOT_PATH + "simhei.ttf", # 黑体
FONTS_ROOT_PATH + "msyh.ttc", # 微软雅黑
FONTS_ROOT_PATH + "simkai.ttf", # 楷体
]
# 尝试注册字体
registered_fonts = []
# 首先尝试使用默认字体(避免字体依赖)
try:
pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc'))
registered_fonts.append('SimSun')
except:
pass
# 尝试Windows字体
for font_path in windows_fonts:
try:
if os.path.exists(font_path):
font_name = os.path.splitext(os.path.basename(font_path))[0]
pdfmetrics.registerFont(TTFont(font_name, font_path))
registered_fonts.append(font_name)
print(f"注册字体: {font_name}")
except Exception as e:
continue
# 如果没有注册到字体,使用ReportLab内置字体
if not registered_fonts:
print("警告: 未找到中文字体,使用默认字体")
return ['Helvetica']
return registered_fonts
except Exception as e:
print(f"字体注册失败: {str(e)}")
return ['Helvetica']
3、编写给PDF添加水印函数
python
def add_text_watermark(input_pdf_path, output_pdf_path, watermark_text, current_font,
font_size=40, opacity=0.3, rotation=45,
color=(0.5, 0.5, 0.5)):
"""
给PDF添加文本水印
input+pdf_path : 需要添加水印的PDF文件地址
output_pdf_path : 添加水印后的pdf文件的存储地址
watermark_text : 水印内容
current_font : 添加水印时使用的字体
"""
try:
# 读取原始PDF
reader = PdfReader(input_pdf_path)
writer = PdfWriter()
# 获取PDF页面尺寸
first_page = reader.pages[0]
page_width = float(first_page.mediabox.width)
page_height = float(first_page.mediabox.height)
# 为每一页添加水印
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
# 创建水印层
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(page_width, page_height))
# 设置水印属性
can.setFillColorRGB(*color) # 灰色
#can.setFont("Helvetica", font_size)
can.setFont(current_font, font_size)
can.setFillAlpha(opacity) # 设置透明度
# 计算水印位置(居中)
#text_width = can.stringWidth(watermark_text, "Helvetica", font_size)
text_width = can.stringWidth(watermark_text, current_font, font_size)
x = (page_width - text_width) / 2
y = page_height / 2
# 旋转并绘制水印
can.saveState()
can.translate(x + text_width/2, y)
can.rotate(rotation)
can.translate(-(x + text_width/2), -y)
can.drawString(x, y, watermark_text)
can.restoreState()
# 也可以添加多个水印
# 在页面四角添加水印
positions = [
(page_width/4, page_height/4),
(3*page_width/4, page_height/4),
(page_width/4, 3*page_height/4),
(3*page_width/4, 3*page_height/4)
]
for pos_x, pos_y in positions:
can.saveState()
can.translate(pos_x, pos_y)
can.rotate(rotation)
can.translate(-pos_x, -pos_y)
can.drawString(pos_x - text_width/2, pos_y, watermark_text)
can.restoreState()
can.save()
# 移动到数据开始位置
packet.seek(0)
watermark_pdf = PdfReader(packet)
watermark_page = watermark_pdf.pages[0]
# 合并水印到原始页面
page.merge_page(watermark_page)
writer.add_page(page)
# 保存带水印的PDF
with open(output_pdf_path, 'wb') as output_file:
writer.write(output_file)
print(f"成功生成: {output_pdf_path}")
return True
except Exception as e:
print(f"添加水印失败: {str(e)}")
return False
4、编写给突破添加水印函数(未调试)
python
def add_image_watermark(input_pdf_path, output_pdf_path, watermark_image_path,
opacity=0.3, position='center', scale=0.5):
"""
给PDF添加图片水印
"""
try:
reader = PdfReader(input_pdf_path)
writer = PdfWriter()
first_page = reader.pages[0]
page_width = float(first_page.mediabox.width)
page_height = float(first_page.mediabox.height)
# 读取水印图片
img = ImageReader(watermark_image_path)
img_width, img_height = img.getSize()
# 计算缩放后的尺寸
scaled_width = img_width * scale
scaled_height = img_height * scale
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(page_width, page_height))
# 根据位置参数计算坐标
if position == 'center':
x = (page_width - scaled_width) / 2
y = (page_height - scaled_height) / 2
elif position == 'top-left':
x = 50
y = page_height - scaled_height - 50
elif position == 'bottom-right':
x = page_width - scaled_width - 50
y = 50
else: # 默认居中
x = (page_width - scaled_width) / 2
y = (page_height - scaled_height) / 2
# 设置透明度并绘制图片
can.setFillAlpha(opacity)
can.drawImage(watermark_image_path, x, y,
width=scaled_width, height=scaled_height,
mask='auto')
can.save()
packet.seek(0)
watermark_pdf = PdfReader(packet)
watermark_page = watermark_pdf.pages[0]
page.merge_page(watermark_page)
writer.add_page(page)
with open(output_pdf_path, 'wb') as output_file:
writer.write(output_file)
print(f"成功生成: {output_pdf_path}")
return True
except Exception as e:
print(f"添加图片水印失败: {str(e)}")
return False
5、编写批量添加水印函数
python
def batch_generate_watermarked_pdfs(pdf_path, excel_path, output_folder='watermarked_pdfs',
watermark_type='text', watermark_columns_name='name',**kwargs):
"""
批量生成带水印的PDF
参数:
pdf_path: 原始PDF文件路径
excel_path: Excel文件路径
output_folder: 输出文件夹
watermark_type: 'text' 或 'image'
**kwargs: 传递给水印函数的参数
"""
# 创建输出文件夹
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 读取Excel数据
try:
df = pd.read_excel(excel_path, sheet_name=0) # 读取第一个sheet
# 检查是否存在name列
if watermark_columns_name not in df.columns:
print("错误: Excel文件中没有'name'列")
return False
# 获取所有水印文本
watermarks = df[watermark_columns_name].dropna().unique().tolist()
print(f"找到 {len(watermarks)} 个不同的水印")
# 注册字体(可选)
available_fonts = register_fonts()
current_font = available_fonts[1]
print(f"可用字体: {available_fonts}")
print(f"使用字体:{current_font}")
# 为每个水印生成PDF
success_count = 0
for i, watermark in enumerate(watermarks):
# 生成输出文件名
base_name = os.path.splitext(os.path.basename(pdf_path))[0]
output_filename = f"{base_name}_watermark_{i+1}_{watermark}.pdf"
output_path = os.path.join(output_folder, output_filename)
print(f"正在处理: {watermark}")
if watermark_type == 'text':
# 文本水印
success = add_text_watermark(
pdf_path,
output_path,
str(watermark),
current_font,
**kwargs
)
elif watermark_type == 'image':
# 图片水印(需要图片路径)
if 'watermark_image_path' not in kwargs:
print("错误: 图片水印需要指定watermark_image_path参数")
return False
success = add_image_watermark(
pdf_path,
output_path,
kwargs['watermark_image_path'],
**{k: v for k, v in kwargs.items() if k != 'watermark_image_path'}
)
else:
print(f"错误: 不支持的水印类型: {watermark_type}")
return False
if success:
success_count += 1
break
print(f"\n处理完成! 成功生成 {success_count}/{len(watermarks)} 个文件")
print(f"输出文件夹: {os.path.abspath(output_folder)}")
# 生成处理报告
generate_report(watermarks, output_folder, success_count)
return True
except Exception as e:
print(f"处理过程中出现错误: {str(e)}")
return False
6、编写控制台
python
# 使用示例
if __name__ == "__main__":
ROOT_PATH = "D:\\Python\\Python_Projects\\批量生成水印"
# 配置参数
PDF_PATH = os.path.join(ROOT_PATH, "初稿-20260127.pdf") # 替换为你的PDF文件路径
EXCEL_PATH = os.path.join(ROOT_PATH, "names.xlsx") # 替换为你的Excel文件路径
OUTPUT_FOLDER = os.path.join(ROOT_PATH, "加水印文件夹") # 输出文件夹
# 示例1: 使用文本水印
print("=== 示例1: 文本水印 ===")
batch_generate_watermarked_pdfs(
pdf_path=PDF_PATH,
excel_path=EXCEL_PATH,
output_folder=OUTPUT_FOLDER,
watermark_type='text',
watermark_columns_name="姓名",
font_size=40,
opacity=0.2,
rotation=45,
color=(0.3, 0.3, 0.3) # RGB颜色
)
# 示例2: 使用图片水印(如果有图片)
# print("\n=== 示例2: 图片水印 ===")
# batch_generate_watermarked_pdfs(
# pdf_path=PDF_PATH,
# excel_path=EXCEL_PATH,
# output_folder=OUTPUT_FOLDER + "_image",
# watermark_type='image',
# watermark_image_path="watermark.png", # 替换为你的水印图片路径
# opacity=0.3,
# position='center',
# scale=0.5
# )
7、代码汇总
python
import pandas as pd
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import os
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.utils import ImageReader
import io
def register_fonts():
"""注册中文字体和常用字体"""
try:
# Windows系统常见中文字体路径
FONTS_ROOT_PATH = "C:/Users/admin/AppData/Local/Microsoft/Windows/Fonts/"
windows_fonts = [
FONTS_ROOT_PATH + "仿宋_GB2312_0.TTF", # 仿宋_GB2312
FONTS_ROOT_PATH + "simsun.ttc", # 宋体
FONTS_ROOT_PATH + "simhei.ttf", # 黑体
FONTS_ROOT_PATH + "msyh.ttc", # 微软雅黑
FONTS_ROOT_PATH + "simkai.ttf", # 楷体
]
# macOS/Linux系统常见字体路径
# mac_linux_fonts = [
# "/System/Library/Fonts/PingFang.ttc", # 苹方 (macOS)
# "/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf", # Droid (Linux)
# "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", # 文泉驿微米黑 (Linux)
# ]
# 尝试注册字体
registered_fonts = []
# 首先尝试使用默认字体(避免字体依赖)
try:
pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc'))
registered_fonts.append('SimSun')
except:
pass
# 尝试Windows字体
for font_path in windows_fonts:
try:
if os.path.exists(font_path):
font_name = os.path.splitext(os.path.basename(font_path))[0]
pdfmetrics.registerFont(TTFont(font_name, font_path))
registered_fonts.append(font_name)
print(f"注册字体: {font_name}")
except Exception as e:
continue
# 尝试macOS/Linux字体
# for font_path in mac_linux_fonts:
# try:
# if os.path.exists(font_path):
# font_name = os.path.splitext(os.path.basename(font_path))[0]
# pdfmetrics.registerFont(TTFont(font_name, font_path))
# registered_fonts.append(font_name)
# print(f"注册字体: {font_name}")
# except:
# continue
# 如果没有注册到字体,使用ReportLab内置字体
if not registered_fonts:
print("警告: 未找到中文字体,使用默认字体")
return ['Helvetica']
return registered_fonts
except Exception as e:
print(f"字体注册失败: {str(e)}")
return ['Helvetica']
def add_text_watermark(input_pdf_path, output_pdf_path, watermark_text, current_font,
font_size=40, opacity=0.3, rotation=45,
color=(0.5, 0.5, 0.5)):
"""
给PDF添加文本水印
input+pdf_path : 需要添加水印的PDF文件地址
output_pdf_path : 添加水印后的pdf文件的存储地址
watermark_text : 水印内容
current_font : 添加水印时使用的字体
"""
try:
# 读取原始PDF
reader = PdfReader(input_pdf_path)
writer = PdfWriter()
# 获取PDF页面尺寸
first_page = reader.pages[0]
page_width = float(first_page.mediabox.width)
page_height = float(first_page.mediabox.height)
# 为每一页添加水印
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
# 创建水印层
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(page_width, page_height))
# 设置水印属性
can.setFillColorRGB(*color) # 灰色
#can.setFont("Helvetica", font_size)
can.setFont(current_font, font_size)
can.setFillAlpha(opacity) # 设置透明度
# 计算水印位置(居中)
#text_width = can.stringWidth(watermark_text, "Helvetica", font_size)
text_width = can.stringWidth(watermark_text, current_font, font_size)
x = (page_width - text_width) / 2
y = page_height / 2
# 旋转并绘制水印
can.saveState()
can.translate(x + text_width/2, y)
can.rotate(rotation)
can.translate(-(x + text_width/2), -y)
can.drawString(x, y, watermark_text)
can.restoreState()
# 也可以添加多个水印
# 在页面四角添加水印
positions = [
(page_width/4, page_height/4),
(3*page_width/4, page_height/4),
(page_width/4, 3*page_height/4),
(3*page_width/4, 3*page_height/4)
]
for pos_x, pos_y in positions:
can.saveState()
can.translate(pos_x, pos_y)
can.rotate(rotation)
can.translate(-pos_x, -pos_y)
can.drawString(pos_x - text_width/2, pos_y, watermark_text)
can.restoreState()
can.save()
# 移动到数据开始位置
packet.seek(0)
watermark_pdf = PdfReader(packet)
watermark_page = watermark_pdf.pages[0]
# 合并水印到原始页面
page.merge_page(watermark_page)
writer.add_page(page)
# 保存带水印的PDF
with open(output_pdf_path, 'wb') as output_file:
writer.write(output_file)
print(f"成功生成: {output_pdf_path}")
return True
except Exception as e:
print(f"添加水印失败: {str(e)}")
return False
def add_image_watermark(input_pdf_path, output_pdf_path, watermark_image_path,
opacity=0.3, position='center', scale=0.5):
"""
给PDF添加图片水印
"""
try:
reader = PdfReader(input_pdf_path)
writer = PdfWriter()
first_page = reader.pages[0]
page_width = float(first_page.mediabox.width)
page_height = float(first_page.mediabox.height)
# 读取水印图片
img = ImageReader(watermark_image_path)
img_width, img_height = img.getSize()
# 计算缩放后的尺寸
scaled_width = img_width * scale
scaled_height = img_height * scale
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(page_width, page_height))
# 根据位置参数计算坐标
if position == 'center':
x = (page_width - scaled_width) / 2
y = (page_height - scaled_height) / 2
elif position == 'top-left':
x = 50
y = page_height - scaled_height - 50
elif position == 'bottom-right':
x = page_width - scaled_width - 50
y = 50
else: # 默认居中
x = (page_width - scaled_width) / 2
y = (page_height - scaled_height) / 2
# 设置透明度并绘制图片
can.setFillAlpha(opacity)
can.drawImage(watermark_image_path, x, y,
width=scaled_width, height=scaled_height,
mask='auto')
can.save()
packet.seek(0)
watermark_pdf = PdfReader(packet)
watermark_page = watermark_pdf.pages[0]
page.merge_page(watermark_page)
writer.add_page(page)
with open(output_pdf_path, 'wb') as output_file:
writer.write(output_file)
print(f"成功生成: {output_pdf_path}")
return True
except Exception as e:
print(f"添加图片水印失败: {str(e)}")
return False
def batch_generate_watermarked_pdfs(pdf_path, excel_path, output_folder='watermarked_pdfs',
watermark_type='text', watermark_columns_name='name',**kwargs):
"""
批量生成带水印的PDF
参数:
pdf_path: 原始PDF文件路径
excel_path: Excel文件路径
output_folder: 输出文件夹
watermark_type: 'text' 或 'image'
**kwargs: 传递给水印函数的参数
"""
# 创建输出文件夹
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 读取Excel数据
try:
df = pd.read_excel(excel_path, sheet_name=0) # 读取第一个sheet
# 检查是否存在name列
if watermark_columns_name not in df.columns:
print("错误: Excel文件中没有'name'列")
return False
# 获取所有水印文本
watermarks = df[watermark_columns_name].dropna().unique().tolist()
print(f"找到 {len(watermarks)} 个不同的水印")
# 注册字体(可选)
available_fonts = register_fonts()
current_font = available_fonts[1]
print(f"可用字体: {available_fonts}")
print(f"使用字体:{current_font}")
# 为每个水印生成PDF
success_count = 0
for i, watermark in enumerate(watermarks):
# 生成输出文件名
base_name = os.path.splitext(os.path.basename(pdf_path))[0]
output_filename = f"{base_name}_watermark_{i+1}_{watermark}.pdf"
output_path = os.path.join(output_folder, output_filename)
print(f"正在处理: {watermark}")
if watermark_type == 'text':
# 文本水印
success = add_text_watermark(
pdf_path,
output_path,
str(watermark),
current_font,
**kwargs
)
elif watermark_type == 'image':
# 图片水印(需要图片路径)
if 'watermark_image_path' not in kwargs:
print("错误: 图片水印需要指定watermark_image_path参数")
return False
success = add_image_watermark(
pdf_path,
output_path,
kwargs['watermark_image_path'],
**{k: v for k, v in kwargs.items() if k != 'watermark_image_path'}
)
else:
print(f"错误: 不支持的水印类型: {watermark_type}")
return False
if success:
success_count += 1
break
print(f"\n处理完成! 成功生成 {success_count}/{len(watermarks)} 个文件")
print(f"输出文件夹: {os.path.abspath(output_folder)}")
# 生成处理报告
generate_report(watermarks, output_folder, success_count)
return True
except Exception as e:
print(f"处理过程中出现错误: {str(e)}")
return False
def generate_report(watermarks, output_folder, success_count):
"""生成处理报告"""
report_path = os.path.join(output_folder, "processing_report.txt")
with open(report_path, 'w', encoding='utf-8') as f:
f.write("PDF水印批量处理报告\n")
f.write("=" * 50 + "\n")
f.write(f"总水印数量: {len(watermarks)}\n")
f.write(f"成功生成: {success_count}\n")
f.write(f"失败数量: {len(watermarks) - success_count}\n")
f.write("\n水印列表:\n")
for i, watermark in enumerate(watermarks, 1):
f.write(f"{i}. {watermark}\n")
print(f"处理报告已生成: {report_path}")
# 使用示例
if __name__ == "__main__":
ROOT_PATH = "D:\\Python\\Python_Projects\\批量生成水印"
# 配置参数
PDF_PATH = os.path.join(ROOT_PATH, "初稿-20260127.pdf") # 替换为你的PDF文件路径
EXCEL_PATH = os.path.join(ROOT_PATH, "names.xlsx") # 替换为你的Excel文件路径
OUTPUT_FOLDER = os.path.join(ROOT_PATH, "加水印文件夹") # 输出文件夹
# 示例1: 使用文本水印
print("=== 示例1: 文本水印 ===")
batch_generate_watermarked_pdfs(
pdf_path=PDF_PATH,
excel_path=EXCEL_PATH,
output_folder=OUTPUT_FOLDER,
watermark_type='text',
watermark_columns_name="姓名",
font_size=40,
opacity=0.2,
rotation=45,
color=(0.3, 0.3, 0.3) # RGB颜色
)
# 示例2: 使用图片水印(如果有图片)
# print("\n=== 示例2: 图片水印 ===")
# batch_generate_watermarked_pdfs(
# pdf_path=PDF_PATH,
# excel_path=EXCEL_PATH,
# output_folder=OUTPUT_FOLDER + "_image",
# watermark_type='image',
# watermark_image_path="watermark.png", # 替换为你的水印图片路径
# opacity=0.3,
# position='center',
# scale=0.5
# )