【python】网页批量转PDF

安装wkhtmltopdf

网站:wkhtmltopdf

python 复制代码
wkhtmltopdf http://www.baidu.com/ D:website1.pdf

安装pdfkit库

python 复制代码
pip install pdfkit

批量转换代码

python 复制代码
import os
import pdfkit
path_wkthmltopdf = r'E:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=path_wkthmltopdf)

def convert_html_to_pdf(input_folder, output_folder):
    # 确保输出文件夹存在
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 遍历输入文件夹及其子文件夹中的所有文件
    for root, dirs, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".htm"):
                html_file_path = os.path.join(root, file)
                pdf_file_path = os.path.join(output_folder, file.replace('.htm', '.pdf'))

                try:
                    # 使用pdfkit将HTML文件转换为PDF文件
                    pdfkit.from_file(html_file_path, pdf_file_path,configuration=config, options={'encoding': 'utf-8',"enable-local-file-access":True})
                    print(f"成功转换: {html_file_path} -> {pdf_file_path}")
                except Exception as e:
                    print(f"转换失败: {html_file_path} -> {pdf_file_path}, 错误信息: {e}")


if __name__ == "__main__":
    input_folder = 'D:\'  # 输入文件夹路径,这里设置为当前目录
    output_folder = 'D:\pdf'  # 输出文件夹路径
    convert_html_to_pdf(input_folder, output_folder)

报错解决

Python OSError: wkhtmltopdf reported an error:Exit with code 1 due to network error:ProtocolUnknownE_exit with code 1 due to network error: protocolunk-CSDN博客

参考

pdfkit | 利用python实现html文件转pdf (zhihu.com)

Python OSError: wkhtmltopdf reported an error:Exit with code 1 due to network error:ProtocolUnknownE_exit with code 1 due to network error: protocolunk-CSDN博客