一、环境准备(必须)
只支持 .docx(.wps 先让 WPS 另存为 .docx 即可)
bash
pip install python-docx
二、完整代码:检测 + 自动修改(公文常用规则)
python
from docx import Document
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn
# ---------------------- 配置:你的公文标准 ----------------------
RULES = {
"title": { # 标题(第1段)
"font": "黑体",
"size": 22, # 二号
"color": (0, 0, 0),
"bold": True,
"align": WD_ALIGN_PARAGRAPH.CENTER
},
"header": { # 一级标题(第2段起,如"一、xxxx")
"font": "黑体",
"size": 16, # 三号
"color": (0, 0, 0),
"bold": True,
"align": WD_ALIGN_PARAGRAPH.LEFT
},
"content": { # 正文
"font": "仿宋_GB2312",
"size": 12, # 小四
"color": (0, 0, 0),
"bold": False,
"align": WD_ALIGN_PARAGRAPH.LEFT,
"first_line_indent": Inches(0.5), # 首行缩进2字符
"line_spacing": 1.5, # 1.5倍行距
"space_after": Pt(6)
}
}
# ---------------------- 工具函数:检测单个run格式 ----------------------
def check_run(run, rule):
issues = []
# 字体
if run.font.name != rule["font"]:
issues.append(f"字体错误:{run.font.name} → 应为 {rule['font']}")
# 字号
if run.font.size != Pt(rule["size"]):
issues.append(f"字号错误:{run.font.size.pt} → 应为 {rule['size']}")
# 颜色
if run.font.color.rgb != RGBColor(*rule["color"]):
issues.append(f"颜色错误 → 应为 {rule['color']}")
# 加粗
if run.font.bold != rule["bold"]:
issues.append(f"加粗错误 → 应为 {rule['bold']}")
return issues
# ---------------------- 工具函数:修正单个run格式 ----------------------
def fix_run(run, rule):
run.font.name = rule["font"]
run._element.rPr.rFonts.set(qn('w:eastAsia'), rule["font"]) # 中文字体兼容
run.font.size = Pt(rule["size"])
run.font.color.rgb = RGBColor(*rule["color"])
run.font.bold = rule["bold"]
# ---------------------- 工具函数:检测+修正段落格式 ----------------------
def check_and_fix_para(para, rule_type):
rule = RULES[rule_type]
issues = []
# 对齐
if para.alignment != rule["align"]:
issues.append(f"对齐错误 → 应为 {rule['align']}")
para.alignment = rule["align"]
# 首行缩进(仅正文)
if rule_type == "content":
if para.paragraph_format.first_line_indent != rule["first_line_indent"]:
issues.append("首行缩进错误 → 改为2字符")
para.paragraph_format.first_line_indent = rule["first_line_indent"]
# 行距
if para.paragraph_format.line_spacing != rule["line_spacing"]:
issues.append(f"行距错误 → 改为{rule['line_spacing']}倍")
para.paragraph_format.line_spacing = rule["line_spacing"]
# 段后间距
if para.paragraph_format.space_after != rule["space_after"]:
issues.append("段后间距错误 → 改为6磅")
para.paragraph_format.space_after = rule["space_after"]
return issues
# ---------------------- 主函数:批量检测+修正 ----------------------
def process_word(docx_path, output_path):
doc = Document(docx_path)
report = []
for i, para in enumerate(doc.paragraphs):
text = para.text.strip()
if not text:
continue
# 判断段落类型
if i == 0:
rule_type = "title"
elif text.startswith(("一、", "二、", "三、", "四、", "五、")):
rule_type = "header"
else:
rule_type = "content"
report.append(f"\n--- 第{i+1}段 [{rule_type}] ---")
# 检测+修正段落格式
para_issues = check_and_fix_para(para, rule_type)
report.extend(para_issues)
# 检测+修正每个run(字符格式)
for run in para.runs:
run_issues = check_run(run, RULES[rule_type])
if run_issues:
report.extend(run_issues)
fix_run(run, RULES[rule_type])
# 保存修正后的文档
doc.save(output_path)
# 输出报告
print("=== 检测报告 ===")
print("\n".join(report))
print(f"\n✅ 修正完成,已保存到:{output_path}")
# ---------------------- 运行 ----------------------
if __name__ == "__main__":
# 你的输入/输出路径
INPUT_DOCX = r"C:\Users\49432\Desktop\降水专报.docx"
OUTPUT_DOCX = r"C:\Users\49432\Desktop\降水专报_已修正.docx"
process_word(INPUT_DOCX, OUTPUT_DOCX)
三、检测+修改哪些内容(全覆盖)
1)字符级(每个字)
- ✅ 字体(黑体/仿宋/宋体)
- ✅ 字号(二号/三号/小四)
- ✅ 颜色(强制黑色)
- ✅ 加粗(标题/一级标题加粗,正文不加)
2)段落级
- ✅ 对齐(标题居中、正文左对齐)
- ✅ 首行缩进(正文强制2字符)
- ✅ 行距(正文1.5倍)
- ✅ 段后间距(6磅)
3)自动分类
- 第1段 → 标题规则
- 以"一、二、三、"开头 → 一级标题规则
- 其他 → 正文规则
四、运行效果
控制台会输出:
=== 检测报告 ===
--- 第1段 [title] ---
字体错误:宋体 → 应为 黑体
字号错误:12 → 应为 22
对齐错误 → 应为 CENTER
--- 第2段 [header] ---
加粗错误 → 应为 True
...
✅ 修正完成,已保存到:C:\Users\49432\Desktop\降水专报_已修正.docx