python
import os
class HtmlToPdfConverter:
"""
HTML转PDF工具类(基于wkhtmltopdf)
核心功能:自动检测HTML中是否包含表格,仅在有表格时注入分页优化CSS,避免表格跨页重叠
"""
def __init__(self):
"""初始化转换器,无需额外参数"""
pass
def _has_table(self, html_content):
"""
私有方法:判断HTML内容中是否包含表格标签
:param html_content: 读取的HTML文本内容
:return: bool - True(有表格)/False(无表格)
"""
# 转小写避免大小写标签问题(如<TABLE>、<table>)
return '<table' in html_content.lower()
def _inject_precise_css(self, html_path):
"""
私有方法:注入精准的分页CSS(仅防行断裂,不禁止表格分页)
:param html_path: HTML文件路径
"""
# 定义分页优化CSS
precise_css = """
<style>
tr {
page-break-inside: avoid !important;
page-break-after: auto !important;
}
td, th {
page-break-inside: avoid !important;
white-space: normal !important;
word-wrap: break-word !important;
}
table {
border-collapse: collapse !important;
page-break-inside: auto !important;
width: 100% !important;
}
body {
page-break-inside: auto !important;
margin: 0;
padding: 0;
}
</style>
"""
# 读取HTML内容
with open(html_path, 'r', encoding='utf-8') as f:
html_content = f.read()
# 仅当包含表格时才注入CSS
if self._has_table(html_content):
# 优先插入到<head>标签内,无<head>则加在开头
if '<head>' in html_content:
new_content = html_content.replace('<head>', f'<head>{precise_css}')
else:
new_content = precise_css + html_content
# 写回HTML文件
with open(html_path, 'w', encoding='utf-8') as f:
f.write(new_content)
def html2pdf_new(self, group_path, msg_path):
"""
对外公开方法:执行HTML转PDF(核心逻辑)
:param group_path: 存放report.html的目录路径
:param msg_path: 输出PDF文件的目录路径
"""
# 拼接文件完整路径
report_path = os.path.join(group_path, "report.html")
target_path = os.path.join(msg_path, "html_pdf.pdf")
# 仅当有表格时注入CSS,无表格则跳过
self._inject_precise_css(report_path)
# 构造wkhtmltopdf命令(保留核心优化参数)
cmd = (
"wkhtmltopdf "
"--encoding 'utf-8' " # 编码设置,防止中文乱码
"--disable-smart-shrinking " # 禁用智能压缩,避免表格挤压重叠
"--enable-local-file-access " # 允许加载本地文件(CSS/图片)
"{} {}".format(report_path, target_path)
)
# 执行命令行转换
os.system(cmd)
# ===================== 使用示例 =====================
if __name__ == "__main__":
# 1. 实例化转换器
converter = HtmlToPdfConverter()
# 2. 调用转换方法(传入实际路径)
# converter.html2pdf_new("你的report.html所在目录", "你要输出PDF的目录")
# 示例:
# converter.html2pdf_new(r"C:\data\group", r"C:\data\msg")