批量生成N天前的多word个文件，并用Python根据excel统计数据，修改word模板，最后合并多个word文件

1，需求

根据word模板文件，生成多个带日期后缀的word文件
根据excel-每日告警统计数量，逐个修改当日的文档

2，实现

shell脚本：根据word模板文件，生成多个带日期后缀的word文件

bash 复制代码

#!/bin/bash
# 生成近一年日期  日期格式：YYYYMMDD
#要复制的文档名称
baogao_doc_prename="巡检报告"
baogao_doc=$baogao_doc_prename".docx"


#新文件生产后的目录
dest_dir=".\\"

# 设置开始、结束时间
start_date=$(date -d "20250725" +%Y%m%d)  
end_date=$(date   -d "20250726" +%Y%m%d)  


# 定义节假日数组（需用户自行补充）
# 格式：("YYYYMMDD" "YYYYMMDD" ...)
holidays=(
  "20240101"  #"20250101" "20250201"   # 示例：元旦、春节（替换为实际节假日）
)

# 循环生成日期并过滤节假日
current_sec=$(date -d "$start_date" +%s)
end_sec=$(date -d "$end_date" +%s)
day_count=0

echo "近一年日期（排除节假日）:"
while [ "$current_sec" -le "$end_sec" ]; do
  current_date=$(date -d "@$current_sec" +%Y%m%d)
  #复制文件
  cp $baogao_doc   $dest_dir/$baogao_doc_prename"-$current_date.docx"

  # 检查当前日期是否在节假日列表中
  if [[ ! " ${holidays[@]} " =~ " $current_date " ]]; then
    echo "$current_date"
    ((day_count++))
  fi

  # 增加一天（86400秒）
  current_sec=$((current_sec + 86400))
done

echo "生成完成！有效日期数: $day_count"

python脚本：根据excel-每日告警统计数量，逐个修改当日的文档

bash 复制代码

import pandas as pd
from docx import Document
from docx.table import _Cell
from docx.text.paragraph import Paragraph
import os
import re
from datetime import datetime, timedelta

def get_previous_day_filename(filename):
    # 定义日期格式的正则表达式
    date_pattern = r'(\d{8})'  # 匹配8位数字的日期格式
    
    # 查找文件名中的日期部分
    match = re.search(date_pattern, filename)
    if not match:
        print("错误: 文件名中未找到日期部分!")
        return None
    
    # 提取日期字符串并转换为日期对象
    date_str = match.group(1)
    try:
        date_obj = datetime.strptime(date_str, '%Y%m%d')
    except ValueError:
        print(f"错误: 日期格式不正确，应为YYYYMMDD，但得到了{date_str}")
        return None
    
    # 计算前一天的日期
    previous_day = date_obj - timedelta(days=1)
    previous_day_str = previous_day.strftime('%Y%m%d')
    
    # 替换文件名中的日期部分
    previous_day_filename = re.sub(date_pattern, previous_day_str, filename)
    
    return previous_day_filename

#遍历段落并替换占位符
def replact_word_item( doc,  replacements ):
    for paragraph in doc.paragraphs:
        for key, value in replacements.items():
            if key in paragraph.text:
                # 替换文本内容（保留原有格式）
                inline = paragraph.runs
                for run in inline:
                    run.text = run.text.replace(key, str(value) )
                  
def edit_xjbaogao_table(doc, excel_row, word_filename):
        # 获取巡检报告表（根据文档结构定位第一个表格）
        inspection_table = doc.tables[0]
        total_result=[]
        
        # 遍历表格行（跳过表头行）
        for table_row in inspection_table.rows[1:]: 
            #excel_index=excel_row.index   #excel 表格字段： @timestamp	Test1告警	Test2告警
            #巡检报告word表格： 巡检项目（系统） 巡检内容（事项） 正常与否	备注
            # 获取功能点名称（第一列）--系统，第二列）--巡检内容
            system = table_row.cells[0].text.strip()  #系统
            feature = table_row.cells[1].text.strip() #巡检内容
            
            # Test1模块巡检
            if system == 'Test1':
                alertCnt=int(excel_row['Test1告警'])
                if  alertCnt > 0:
                    # 1. 更新（第4列，索引3）： 备注
                    result=str(alertCnt)+"次Test1告警"
                    table_row.cells[3].text = "有"+ result +"，均已反馈"#有xx告警，均已反馈                                      
                    # 2. 更新（第3列，索引2）： 巡检情况--- 判断值>0则勾选"不正常"              
                    table_row.cells[2].text = "\r正常□\r不正常☑"

                    total_result.append(result)
            # Test2模块巡检
            if system == 'Test2':
                alertCnt=int(excel_row['Test2告警'])
                if  alertCnt > 0:
                    # 1. 更新（第4列，索引3）： 备注
                    result=str(alertCnt)+"次Test2告警"
                    table_row.cells[3].text = "有"+ result +"，均已反馈"#有xx告警，均已反馈                                      
                    # 2. 更新（第3列，索引2）： 巡检情况--- 判断值>0则勾选"不正常"              
                    table_row.cells[2].text = "\r正常□\r不正常☑"

                    total_result.append(result)
                    
        # 巡检结论   
        strresult = "，".join(total_result)
        xunjian_result="有"+strresult+"，均已反馈。"# 有xx1告警，xx2告警，均已反馈  
                            
        if system == '巡检结论' and  "告警" in xunjian_result:            
            # 1. 更新（第2列，索引1）： 巡检结论  
            table_row.cells[1].text = xunjian_result   
            print("xunjian_result===>",xunjian_result) 
            
        #四、结论        
        #    + 拼接上次告警结论【上次出现的xx告警，xx2告警，均已处理。】 
        former_word_filename= get_previous_day_filename(word_filename) #xx巡检报告     
        former_result=''
        if os.path.exists(former_word_filename):
            print(f"获取前一天巡检报告文件{former_word_filename}")
            try:
                # 打开Word文档
                doc_former = Document(former_word_filename)
            except FileNotFoundError:
                print(f"未找到文件: {former_word_filename}，跳过处理")
                #continue   
                    # 获取巡检报告表（根据文档结构定位第一个表格）
            inspection_table_former = doc_former.tables[0]         
            # 遍历表格行（跳过表头行）
            for table_row in inspection_table_former.rows[1:]:                 
                #巡检报告word表格：巡检项目（系统）	巡检内容（事项）	    正常与否	备注
                # 获取功能点名称（第二列）--巡检内容
                system = table_row.cells[0].text.strip()  #系统
                if system == '巡检结论':            
                    # 1. 更新（第2列，索引1）： 巡检结论  
                    former_result=table_row.cells[1].text 
                    former_result=former_result.replace("有", "上次出现的").replace("均已反馈", "均已处理")
        xunjian_result+=  former_result  
        replacements = {
            "各业务模块运行正常。":  xunjian_result
        }          
        
        if "告警" in xunjian_result:
            #print(xunjian_result)
            replact_word_item( doc,  replacements )  
                       
def update_word_remarks(excel_path, word_dir, word_file_prefix):
    """
    从Excel读取数据，更新对应时间戳的Word文件备注栏
    :param excel_path: Excel文件路径
    :param word_dir: Word文件所在目录
    """
    # 读取Excel数据
    df = pd.read_excel(excel_path, sheet_name='sheet1')
    
    # 遍历Excel中的每一行数据
    for _, excel_row in df.iterrows():
        timestamp = str(excel_row['@timestamp'])
        # 构造对应Word文件名
        f1=word_file_prefix[0]
        
        #1, xx巡检报告    
        word_filename = f"{word_dir}/{f1}-{timestamp}.docx"   
        if os.path.exists(word_filename):
            print(f"处理文件{word_filename}")
            try:
                # 打开Word文档
                doc = Document(word_filename)
            except FileNotFoundError:
                print(f"未找到文件: {word_filename}，跳过处理")
                continue   
                
            # 匹配Excel中的字段并更新（第六列）：备注
            edit_xjbaogao_table(doc, excel_row, word_filename)

            # 保存修改后的Word文件
            doc.save(word_filename)

if __name__ == "__main__":
    # 配置文件路径（根据实际情况修改）
    EXCEL_PATH = "告警统计.xlsx"  # Excel文件路径
    WORD_DIRECTORY = "."       # Word文件所在目录（当前目录用"."）
    word_file_prefix = ["巡检报告" ]
    update_word_remarks(EXCEL_PATH, WORD_DIRECTORY,word_file_prefix)

3，结果

4，合并多个word文件

pip install docxcompose

bash 复制代码

import os,re
from docx import Document
from docxcompose.composer import Composer

def extract_date_from_filename(filename):
    """从文件名中提取日期，支持多种格式如：2024-03-15, 20240315, report_2024_03_15.docx"""
    basename = os.path.splitext(filename)[0]
    # 常见日期格式的正则
    patterns = [
        r'(\d{4})[-_]?(\d{2})[-_]?(\d{2})',  # 匹配 2024-03-15 或 20240315
    ]
    for pattern in patterns:
        match = re.search(pattern, basename)
        if match:
            year, month, day = match.groups()
            return f"{year}-{month}-{day}"
    return "未知日期"
    
def merge_word_files_with_titles(source_dir, output_file="merged_document.docx"):
    """
    合并指定目录下所有.docx文件，并在每个文档内容前添加原文件名作为标题
    :param source_dir: 包含待合并Word文件的目录路径
    :param output_file: 合并后的输出文件名
    """
    # 获取目录中所有.docx文件并按文件名排序
    files = [os.path.join(source_dir, f) for f in os.listdir(source_dir) 
             if f.endswith(".docx")]
    #files.sort()
    files.sort(key=lambda x: extract_date_from_filename(x))
    
    if not files:
        print("目录中未找到.docx文件")
        return

    # 创建主文档
    master = Document()
    composer = Composer(master)
    
    for file_path in files:
        # 提取原文件名（不含后缀）作为标题
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        
        # 创建临时文档添加标题
        title_doc = Document()
        title_doc.add_heading(file_name, level=1)  # 一级标题
        composer.append(title_doc)
        
        # 添加原文档内容
        content_doc = Document(file_path)
        composer.append(content_doc)
        
        # 添加分页符（非最后一个文档）
        if file_path != files[-1]:
            page_break = Document()
            page_break.add_page_break()
            composer.append(page_break)
    
    # 保存合并结果
    composer.save(output_file)
    print(f"合并完成！文件已保存至: {output_file}")

# 示例用法
if __name__ == "__main__":
    merge_word_files_with_titles(
        source_dir=r"./old2",  # 替换为实际路径
        output_file="./old-合并报告.docx"
    )