1、方法1 pdfkit
安装依赖
python
# 安装 wkhtmltopdf(系统级)
# Ubuntu/Debian
sudo apt install wkhtmltopdf
# macOS
brew install wkhtmltopdf
# Windows 下载安装:https://wkhtmltopdf.org/downloads.html
# 安装 Python 库
pip install pdfkit
python
import pdfkit
# 将网页转为 PDF
def url_to_pdf(url, output_path):
try:
pdfkit.from_url(url, output_path)
print(f"PDF 已保存为: {output_path}")
except Exception as e:
print(f"转换失败: {e}")
# 示例调用(替换为你的链接)
url_to_pdf(
"https://etcnew.sdut.edu.cn/meol/common/script/preview/download_preview.jsp?fileid=15400633&resid=1451996&lid=65791&preview=preview",
"水泥工艺学-第五章硅酸盐水泥熟料的煅烧.pdf"
)
方法2:pyppeteer (很高级)适用于JavaScript渲染的网页(如Vue、React页面)。
python
import asyncio
from pyppeteer import launch
async def html_to_pdf(url, output_path):
browser = await launch(headless=True)
page = await browser.newPage()
await page.goto(url, {'waitUntil': 'networkidle2'})
await page.pdf({'path': output_path, 'format': 'A4', 'printBackground': True})
await browser.close()
# 使用示例
asyncio.run(html_to_pdf("https://example.com", "output.pdf"))
方法3:playwright (最高级,最稳定版本)
python
# 更新 pip
python3 -m pip install -U pip
# 安装 Playwright Python 库
pip install playwright
# 下载并安装所需的浏览器(Chromium、Firefox、WebKit)
playwright install
playwright --version
python
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto("https://example.com")
page.pdf(path="page.pdf", format="A4")
browser.close()