网上搜了下很多方法都是过期的,这里贴下我的方法
根据 pymupdf -- Pixmap.tobytes
https://pymupdf.readthedocs.io/en/latest/pixmap.html#Pixmap.tobytes
核心方法
python
def pdf2png(pdfPath, img_dir, zoom_x=1,zoom_y=1,rotation_angle=0):
# 打开PDF文件
pdf = fitz.open(pdfPath)
# 逐页读取PDF
for idx in range(0, pdf.page_count):
idx += 1
page = pdf[idx]
trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotation_angle)
pm = page.get_pixmap(matrix=trans, alpha=False, dpi=200)
bt = pm.tobytes()
img_path = os.path.join(img_dir, f'{idx}.png')
with open(img_path, 'wb') as f:f.write(bt)
pdf.close()
整个处理文件
python
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File : pdf2img.py
@Time : 2024-09-28 11:10:57
@Author : shushu
@Version : 1.0
@Desc :
'''
import os
import sys
import os
import fitz
def pdf2png(pdfPath, img_dir, zoom_x=1,zoom_y=1,rotation_angle=0):
# 打开PDF文件
pdf = fitz.open(pdfPath)
# 逐页读取PDF
for idx in range(0, pdf.page_count):
idx += 1
page = pdf[idx]
trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotation_angle)
pm = page.get_pixmap(matrix=trans, alpha=False)
bt = pm.tobytes()
img_path = os.path.join(img_dir, f'{idx}.png')
with open(img_path, 'wb') as f:f.write(bt)
pdf.close()
def test():
pdfPath = '/Users/xxx表格_图片_page_2.pdf'
img_dir = '/Users/xxx/04'
pdf2png(pdfPath, img_dir)
def prcs(file_path):
print('-- ', file_path)
img_dir = file_path.replace('.pdf', '')
if not os.path.isdir(img_dir):os.makedirs(img_dir)
pdf2png(file_path, img_dir)
if __name__ == '__main__':
paths = sys.argv[1:]
print('-- ', paths)
prcs(paths)
伊织 2024-09-28(六)