word格式规范检测+自动修改【python】

一、环境准备（必须）

只支持 .docx（.wps 先让 WPS 另存为 .docx 即可）

bash 复制代码

pip install python-docx

二、完整代码：检测 + 自动修改（公文常用规则）

python 复制代码

from docx import Document
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn

# ---------------------- 配置：你的公文标准 ----------------------
RULES = {
    "title": {          # 标题（第1段）
        "font": "黑体",
        "size": 22,     # 二号
        "color": (0, 0, 0),
        "bold": True,
        "align": WD_ALIGN_PARAGRAPH.CENTER
    },
    "header": {         # 一级标题（第2段起，如"一、xxxx"）
        "font": "黑体",
        "size": 16,     # 三号
        "color": (0, 0, 0),
        "bold": True,
        "align": WD_ALIGN_PARAGRAPH.LEFT
    },
    "content": {        # 正文
        "font": "仿宋_GB2312",
        "size": 12,     # 小四
        "color": (0, 0, 0),
        "bold": False,
        "align": WD_ALIGN_PARAGRAPH.LEFT,
        "first_line_indent": Inches(0.5),  # 首行缩进2字符
        "line_spacing": 1.5,                # 1.5倍行距
        "space_after": Pt(6)
    }
}

# ---------------------- 工具函数：检测单个run格式 ----------------------
def check_run(run, rule):
    issues = []
    # 字体
    if run.font.name != rule["font"]:
        issues.append(f"字体错误：{run.font.name} → 应为 {rule['font']}")
    # 字号
    if run.font.size != Pt(rule["size"]):
        issues.append(f"字号错误：{run.font.size.pt} → 应为 {rule['size']}")
    # 颜色
    if run.font.color.rgb != RGBColor(*rule["color"]):
        issues.append(f"颜色错误 → 应为 {rule['color']}")
    # 加粗
    if run.font.bold != rule["bold"]:
        issues.append(f"加粗错误 → 应为 {rule['bold']}")
    return issues

# ---------------------- 工具函数：修正单个run格式 ----------------------
def fix_run(run, rule):
    run.font.name = rule["font"]
    run._element.rPr.rFonts.set(qn('w:eastAsia'), rule["font"])  # 中文字体兼容
    run.font.size = Pt(rule["size"])
    run.font.color.rgb = RGBColor(*rule["color"])
    run.font.bold = rule["bold"]

# ---------------------- 工具函数：检测+修正段落格式 ----------------------
def check_and_fix_para(para, rule_type):
    rule = RULES[rule_type]
    issues = []
    # 对齐
    if para.alignment != rule["align"]:
        issues.append(f"对齐错误 → 应为 {rule['align']}")
        para.alignment = rule["align"]
    # 首行缩进（仅正文）
    if rule_type == "content":
        if para.paragraph_format.first_line_indent != rule["first_line_indent"]:
            issues.append("首行缩进错误 → 改为2字符")
            para.paragraph_format.first_line_indent = rule["first_line_indent"]
        # 行距
        if para.paragraph_format.line_spacing != rule["line_spacing"]:
            issues.append(f"行距错误 → 改为{rule['line_spacing']}倍")
            para.paragraph_format.line_spacing = rule["line_spacing"]
        # 段后间距
        if para.paragraph_format.space_after != rule["space_after"]:
            issues.append("段后间距错误 → 改为6磅")
            para.paragraph_format.space_after = rule["space_after"]
    return issues

# ---------------------- 主函数：批量检测+修正 ----------------------
def process_word(docx_path, output_path):
    doc = Document(docx_path)
    report = []

    for i, para in enumerate(doc.paragraphs):
        text = para.text.strip()
        if not text:
            continue

        # 判断段落类型
        if i == 0:
            rule_type = "title"
        elif text.startswith(("一、", "二、", "三、", "四、", "五、")):
            rule_type = "header"
        else:
            rule_type = "content"

        report.append(f"\n--- 第{i+1}段 [{rule_type}] ---")
        # 检测+修正段落格式
        para_issues = check_and_fix_para(para, rule_type)
        report.extend(para_issues)

        # 检测+修正每个run（字符格式）
        for run in para.runs:
            run_issues = check_run(run, RULES[rule_type])
            if run_issues:
                report.extend(run_issues)
                fix_run(run, RULES[rule_type])

    # 保存修正后的文档
    doc.save(output_path)
    # 输出报告
    print("=== 检测报告 ===")
    print("\n".join(report))
    print(f"\n✅ 修正完成，已保存到：{output_path}")

# ---------------------- 运行 ----------------------
if __name__ == "__main__":
    # 你的输入/输出路径
    INPUT_DOCX = r"C:\Users\49432\Desktop\降水专报.docx"
    OUTPUT_DOCX = r"C:\Users\49432\Desktop\降水专报_已修正.docx"
    process_word(INPUT_DOCX, OUTPUT_DOCX)

三、检测+修改哪些内容（全覆盖）

1）字符级（每个字）

✅ 字体（黑体/仿宋/宋体）
✅ 字号（二号/三号/小四）
✅ 颜色（强制黑色）
✅ 加粗（标题/一级标题加粗，正文不加）

2）段落级

✅ 对齐（标题居中、正文左对齐）
✅ 首行缩进（正文强制2字符）
✅ 行距（正文1.5倍）
✅ 段后间距（6磅）

3）自动分类

第1段 → 标题规则
以"一、二、三、"开头 → 一级标题规则
其他 → 正文规则

四、运行效果

控制台会输出：

复制代码

=== 检测报告 ===
--- 第1段 [title] ---
字体错误：宋体 → 应为 黑体
字号错误：12 → 应为 22
对齐错误 → 应为 CENTER
--- 第2段 [header] ---
加粗错误 → 应为 True
...
✅ 修正完成，已保存到：C:\Users\49432\Desktop\降水专报_已修正.docx