使用Python编写脚本,为Excel表格添加水印

简介

这是本人实习中的一个小任务,经过无数努力,终于搞出来了。网上很多资料和博客都是lese,完全没有理清楚水印在excel中的定义是什么,插个图片就是水印吗?当然不是!如果帮助到佬们请点个赞吧。

Ecxel中水印的定义

与PDF,Word文件不同,由于Excel没有直接提供水印添加功能,所以目前而言在Excel中添加水印实际上就是为excel文件添加背景。

所以我们目前要做的其实就是如何将图片设置为excel背景。

解决方案

方案1

如果只是希望在windows环境下使用,那么可以直接使用:

python 复制代码
import win32com.client as win32
from PIL import Image, ImageDraw, ImageFont

# 水印创建
watermark_text = "123456789"
font_path = "arial.ttf"
font_size = 0.5 * 50
image_size = (1000, 800)
text_color = (0, 0, 0, 20)
watermark_image_path = r'D:\Projects\PY_Projects\wm\watermark.png'
watermark = Image.new("RGBA", image_size, (255, 255, 255, 0))
draw = ImageDraw.Draw(watermark)

# 字体选择
try:
    font = ImageFont.truetype(font_path, font_size)
except IOError:
    font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), watermark_text, font=font)
text_width, text_height = bbox[2] - bbox[0], bbox[3] - bbox[1]
y_text = 0
while y_text < image_size[1]:
    x_text = 0
    while x_text < image_size[0]:
        draw.text((x_text, y_text), watermark_text, font=font, fill=text_color)
        x_text += int(text_width * 2)
    y_text += int(text_height * 2)

# 水印操作
watermark = watermark.rotate(45, expand=True, resample=Image.Resampling.BICUBIC, fillcolor=(255, 255, 255, 0))
watermark.save(watermark_image_path)
# 打开excel
excel = win32.Dispatch('Excel.Application')
excel.Visible = False
workbook = excel.Workbooks.Open(r'D:\Projects\PY_Projects\wm\ex1.xlsx')
sheet = workbook.Sheets('Sheet1')

# 设置背景
sheet.SetBackgroundPicture(watermark_image_path)

# 关闭
workbook.Save()
workbook.Close()
excel.Quit()

相当于在后台开了一个客户端,然后在后台进行了背景设置。

虽然可以使用,但是在实际开发环境中,多为Linux系统,这样一来这种方法就不行了。

方案2

这个解决方案需要我们明白office文件底层是什么。本质上他们都是 `.zip` 文件。所有的具体数据信息和样式信息都存储在压缩包种的xml文中。通过对加水印和不加水印的底层xml文件进行比对,我得出我们总共需要对解压后的zip文件进行如下修改:

  • [Content_Types].xml:添加图片类型:

    XML 复制代码
    <Default Extension="png" ContentType="image/png"/> 
  • 添加media目录,用于存储水印图片

  • 在worksheet目录下,添加_rels目录,其中存在一个xml文件:

    XML 复制代码
    <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
    <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="../media/image1.png"/></Relationships>

    用于保存图片引用

  • 然后在worksheet目录下的sheet.xml文件中添加图片引用

    XML 复制代码
    <picture r:id="rId1"/>

然后我们再对解压后zip文件进行压缩改名,即可得到添加水印的xlsx文件,

代码实现如下:

python 复制代码
import shutil
import zipfile
import os
import xml.etree.ElementTree as ET
from PIL import Image, ImageDraw, ImageFont
import argparse
from datetime import datetime
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font

# csv 转xlsx
def csv_to_xlsx(csv_file_path, xlsx_file_path):
    # 读取 CSV 文件
    df = pd.read_csv(csv_file_path)
    df.to_excel(xlsx_file_path, index=False, sheet_name='Sheet1')
    # 移除样式
    workbook = load_workbook(xlsx_file_path)
    sheet = workbook['Sheet1']
    for row in sheet.iter_rows():
        for cell in row:
            cell.border = None  
    for cell in sheet[1]:  
        cell.font = Font(bold=False)  
    # 保存
    workbook.save(xlsx_file_path)

# 创建水印
def createwm(watermark_content):
    # 配置
    watermark_text = watermark_content
    font_path = "SimSun.ttc" 
    font_size = 80
    image_size = (400, 200)  
    text_color = (0, 0, 0, 128)  
    background_color = (255, 255, 255, 0)  
    # 创建空白图像
    watermark = Image.new("RGBA", image_size, background_color)
    draw = ImageDraw.Draw(watermark)
    try:
        font = ImageFont.truetype(font_path, font_size)
    except IOError:
        font = ImageFont.load_default()
    # 获取文本边界框
    bbox = draw.textbbox((0, 0), watermark_text, font=font)
    text_width, text_height = bbox[2] - bbox[0], bbox[3] - bbox[1]
    # 计算文本位置
    x_text = (image_size[0] - text_width) // 2
    y_text = (image_size[1] - text_height) // 2
    
    draw.text((x_text, y_text), watermark_text, font=font, fill=text_color)
    # 旋转并保存水印
    watermark = watermark.rotate(45, expand=True, resample=Image.Resampling.BICUBIC, fillcolor=background_color)
    watermark.save("watermark.png")

# 后缀修改
def cgSuffix(filePath, newSfx):
    base_name, ex_suffix = os.path.splitext(filePath)
    if not newSfx.startswith('.'):
        newSfx = '.' + newSfx
    newFilePath = base_name + newSfx    
    os.rename(filePath, newFilePath)

# 解压缩zip
def unzip_file(zip_path):
    zip_filename = os.path.basename(zip_path)
    base_name = os.path.splitext(zip_filename)[0]
    zip_dir = os.path.dirname(zip_path)
    
    # 创建同名的目录
    extract_to = os.path.join(zip_dir, base_name)
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)

    # 提取所有文件到指定目录
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # 删除原始ZIP文件
    try:
        os.remove(zip_path)
        print(f"已成功删除ZIP文件: {zip_path}")
    except Exception as e:
        print(f"删除ZIP文件时出错: {e}")

# 读取xml
def add_contenttype(xmlPath):
    # 解析xml
    tree = ET.parse(xmlPath)
    root = tree.getroot()
    # 设置命名空间
    ns = {'ct': 'http://schemas.openxmlformats.org/package/2006/content-types'}
    ET.register_namespace('', ns['ct'])

    new_default = ET.SubElement(root, f'{{{ns["ct"]}}}Default')
    new_default.set('Extension', 'png')
    new_default.set('ContentType', 'image/png')
    # 保存
    tree.write(xmlPath, encoding='utf-8', xml_declaration=True)

# 创建文件夹
def create_folder(folder_path):
    try:
        os.makedirs(folder_path, exist_ok=True)
        print(f"Folder '{folder_path}' created successfully.")
    except Exception as e:
        print(f"An error occurred while creating the folder: {e}")

# 移动图片文件
def move_image(image_name, media_folder):
    destination_path = os.path.join(media_folder, image_name)
    
    if os.path.exists(image_name):
        try:
            shutil.move(image_name, destination_path)
            print(f"Image '{image_name}' moved to '{destination_path}'.")
        except Exception as e:
            print(f"An error occurred while moving the image: {e}")
    else:
        print(f"Image '{image_name}' does not exist in the current directory.")

# 创建图片引用
def create_rels(relationships_folder, target_path, id):
    # 创建根元素和命名空间
    root = ET.Element('Relationships', xmlns="http://schemas.openxmlformats.org/package/2006/relationships")
    relationship = ET.SubElement(root, 'Relationship')
    relationship.set('Id', id)
    relationship.set('Type', 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image')
    relationship.set('Target', target_path)

    # 创建ElementTree对象并写入文件
    tree = ET.ElementTree(root)
    rels_file_path = os.path.join(relationships_folder, 'sheet1.xml.rels')
    tree.write(rels_file_path, encoding='utf-8', xml_declaration=True)
    print(f"File 'sheet1.xml.rels' created at '{rels_file_path}' with Target='{target_path}'.")

def modify_sheet_xml(sheet_xml_path, rid):
    # 解析 sheet.xml 文件
    tree = ET.parse(sheet_xml_path)
    root = tree.getroot()

    # 定义命名空间
    namespaces = {
        '': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',  
        'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
        'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
        'x14ac': 'http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac'
    }
    for prefix, uri in namespaces.items():
        ET.register_namespace(prefix, uri)

    # 插入 <picture> 元素
    new_picture = ET.Element('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}picture')
    new_picture.set('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id', rid)
    root.append(new_picture)  

    tree.write(sheet_xml_path, encoding='utf-8', xml_declaration=True)

# 压缩目录为zip
def zip_contents_of_directory(folder_path, output_zip_path):
    with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, start=folder_path)
                zipf.write(file_path, arcname)


def main():

    # cmd解析
    parser = argparse.ArgumentParser(description="Process some files.")
    # 添加参数
    parser.add_argument('-i', '--input', type=str, required=True, help='The input CSV file.')
    parser.add_argument('-u', '--user', type=str, required=True, help='The role of user.')
    parser.add_argument('-n', '--name', type=str, required=True, help='The real name associated with the user.')
    parser.add_argument('-o', '--output', type=str, required=True, help='The output XLSX file.')
    # 解析命令行参数
    args = parser.parse_args()
    input = args.input
    role = args.user
    name = args.name
    output = args.output


    strlist = input.split('.')
    path = strlist[0]
    csv_to_xlsx(input, path+'.xlsx')

    if os.path.exists(path+'.xlsx'):
        cgSuffix(path + '.xlsx', "zip")
    unzip_file(path + '.zip')
    # 分为6步:
    #   1. [Content_Types].xml:添加图片类型
    content_types_path = os.path.join(path, '[Content_Types].xml')
    add_contenttype(content_types_path)
    #   2. 添加media目录
    mediaFolder = path + '/xl/media'
    cur_time = datetime.now()
    fmted_time = cur_time.strftime("%Y-%m-%d %H:%M:%S")
    create_folder(mediaFolder)
    wm_content = role + '\n' + name + '\n' + fmted_time
    createwm(wm_content)
    image_name = 'watermark.png'
    move_image(image_name, mediaFolder)
    #   3. 在worksheet目录下,添加_ref目录,在其中添加一个xml文件的引用
    refFolder = path + '/xl/worksheets/_rels'
    rid = 'rId1'
    create_folder(refFolder)
    create_rels(refFolder, "../media/"+image_name, rid)
    #   4. 在worksheet目录下的sheet.xml文件中添加图片引用
    sheetPath = path + '/xl/worksheets/sheet1.xml'
    modify_sheet_xml(sheetPath, rid)
    #   5. 压缩成zip,并改名为xlsx
    folder_to_zip = path
    output_zip_file = path +'.zip'
    zip_contents_of_directory(folder_to_zip, output_zip_file)
    #   6. 改写后缀为xlsx
    if os.path.exists(output_zip_file):
            os.rename(output_zip_file, output)
    shutil.rmtree(path)

if __name__ == "__main__":
    main()

我是将csv转为xlsx然后再添加水印,如果是希望直接使用xlsx使用者自行修改即可(IG野区栓条🐕都能赢)。

相关推荐
枫哥和java1 小时前
python serializer, model drf通过序列化器, 模型获取mysql 一张表某个字段数据库现存的最大值
数据库·python·mysql
无忧无虑Coding2 小时前
pyinstall 打包Django程序
后端·python·django
ad禥思妙想5 小时前
如何运行python脚本
开发语言·python
威威猫的栗子5 小时前
用 Python 与 Turtle 创作属于你的“冰墩墩”!
开发语言·python·turtle
IT古董5 小时前
【机器学习】超简明Python基础教程
开发语言·人工智能·python·机器学习
qq_q9922502776 小时前
django基于python 语言的酒店推荐系统
后端·python·django
小李L6 小时前
Python3 Flask 应用中使用阿里短信发送
后端·python·flask
好看资源平台6 小时前
动态网站数据爬取——Selenium的使用
爬虫·python
威威猫的栗子6 小时前
Python Turtle绘图:重现汤姆劈树的经典瞬间
开发语言·python
沙度灬6 小时前
python之sklearn--鸢尾花数据集之数据降维(PCA主成分分析)
开发语言·python·sklearn