前言
主要整理记载一些python实现的小脚本,网上基本转换要会员,懒得搞了,这个一键生成,可以打包成exe文件使用
单张图片转换成pdf、图片批量转换成pdf
python
# coding = UTF-8
import os
from io import BytesIO
from PIL import Image
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' # 防止中文乱码
SUPPORT_SUFFIX = ["jpg", "jpeg", "png"] # 支持的图片文件格式
def pic_to_pdf(image_bytes: bytes) -> bytes:
"""将单个图片转换为单张PDF
:param image_bytes: 图片的bytes对象
:return: PDF的bytes对象
"""
# 将bytes对象转换为BytesIO对象
image_bytes_io = BytesIO(image_bytes)
# 从内存中读取图片
image_object = Image.open(image_bytes_io)
# 打开内存中的文件用于保存PDF
with BytesIO() as result_bytes_io:
# 将图片保存为单张PDF
image_object.save(result_bytes_io, "PDF", resolution=100.0)
# 获取内存中的文件
data = result_bytes_io.getvalue()
# 返回PDF的bytes对象
return data
def batch_convert(image_path: str, pdf_path: str) -> None:
"""批量将图片转换为单张PDF
:param image_path: 图片的文件夹
:param pdf_path: PDF文件保存的文件夹
"""
# 遍历文件夹下所有文件
for root, dirs, files in os.walk(image_path, topdown=False):
for name in files:
# 提取文件的后缀名
file_suffix = os.path.splitext(name)[-1].lstrip(".").lower()
# 检测该文件格式是否受到支持
if file_suffix not in SUPPORT_SUFFIX:
continue
# 拼接出图片文件的绝对路径
source_file_path = os.path.join(root, name)
# 拼接出PDF文件的绝对路径
target_file_path = os.path.join(pdf_path, f"{os.path.splitext(name)[0]}.pdf")
# 将图片文件转换为PDF文件
with open(source_file_path, "rb") as source:
with open(target_file_path, "wb") as target:
target.write(pic_to_pdf(source.read()))
# pic_to_pdf('E:\银登中心pdf\\f1669413880707_0.jpg')
batch_convert('E:\pdf\\f1669413880707', 'E:\pdf\\f1669413880707')
多张图片合并为1个pdf文件
python
import os
import re
import time
import PIL.ExifTags
import PIL.Image
from reportlab.lib.pagesizes import A4
from reportlab.lib.utils import ImageReader
from reportlab.pdfgen import canvas
from reportlab.platypus import Image
def img_search(mypath, filenames):
for lists in os.listdir(mypath):
path = os.path.join(mypath, lists)
if os.path.isfile(path):
expression = r'[\w]+\.(jpg|png|jpeg)$'
if re.search(expression, path, re.IGNORECASE):
filenames.append(path)
elif os.path.isdir(path):
img_search(path, filenames)
def img_search1(mypath, filenames):
for lists in os.listdir(mypath):
path = os.path.join(mypath, lists)
if os.path.isfile(path):
a = path.split('.')
if a[-1] in ['jpg', 'png', 'JPEG']:
filenames.append(path)
elif os.path.isdir(path):
img_search1(path, filenames)
def rotate_img_to_proper(image):
try:
# image = Image.open(filename)
if hasattr(image, '_getexif'): # only present in JPEGs
for orientation in PIL.ExifTags.TAGS.keys():
if PIL.ExifTags.TAGS[orientation] == 'Orientation':
break
e = image._getexif() # returns None if no EXIF data
if e is not None:
# log.info('EXIF data found: %r', e)
exif = dict(e.items())
orientation = exif[orientation]
# print('found, ',orientation)
if orientation == 3:
image = image.transpose(Image.ROTATE_180)
elif orientation == 6:
image = image.transpose(Image.ROTATE_270)
elif orientation == 8:
image = image.rotate(90, expand=True)
except:
pass
return image
def main(src_folder=None):
output_file_name = 'E:\pdf\\f1671228232790.pdf'
# save_file_name = 'ex.pdf'
# doc = SimpleDocTemplate(save_file_name, pagesize=A1,
# rightMargin=72, leftMargin=72,
# topMargin=72, bottomMargin=18)
imgDoc = canvas.Canvas(output_file_name) # pagesize=letter
imgDoc.setPageSize(A4)
document_width, document_height = A4
if src_folder is None:
mypath = input('Input the image folder please:')
else:
mypath = src_folder
filenames = []
start = time.perf_counter()
img_search(mypath, filenames)
end = time.perf_counter()
print('find file cost time: ', end - start, 'find files: ', len(filenames))
# for f in filenames:
# print(f)
for image in filenames:
try:
image_file = PIL.Image.open(image)
image_file = rotate_img_to_proper(image_file)
image_width, image_height = image_file.size
print('img size:', image_file.size)
if not (image_width > 0 and image_height > 0):
raise Exception
image_aspect = image_height / float(image_width)
# Determins the demensions of the image in the overview
print_width = document_width
print_height = document_width * image_aspect
imgDoc.drawImage(ImageReader(image_file), document_width - print_width,
document_height - print_height, width=print_width,
height=print_height, preserveAspectRatio=True)
# inform the reportlab we want a new page
imgDoc.showPage()
except Exception as e:
print('error:', e, image)
imgDoc.save()
print('Done')
if __name__ == '__main__':
main(src_folder='E:\pdf\\f1671228232790')