Python PDF批量转图片工具
1.简介:
这是一个简单易用的PDF转图片工具,主要功能:
- 可以批量处理多个PDF文件
- 可以选择需要转换的具体页面
- 支持两种设置方式:
- 统一设置:所有PDF使用相同的页码
- 单独设置:每个PDF单独设置页码
使用方法:
- 点击"选择文件夹",选择PDF所在文件夹
- 选择设置方式并输入要转换的页码
例如:1,2,3 或 1-5 或 1,3-5 - 点击"开始转换"即可
转换后的图片会自动保存在原PDF所在目录下的"文件名_images"文件夹中
2.运行效果:
3.相关源码:
python
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import fitz
import threading
from queue import Queue
import gc
from typing import Dict, List
class PDFConverter:
def __init__(self):
self.root = tk.Tk()
self.root.title("PDF批量转图片工具")
self.root.geometry("900x700")
# 存储PDF文件和对应的页码设置
self.pdf_settings: Dict[str, List[int]] = {}
# 创建工作队列
self.work_queue = Queue()
self.setup_ui()
def setup_ui(self):
# 文件夹选择框
folder_frame = ttk.Frame(self.root)
folder_frame.pack(fill=tk.X, padx=5, pady=5)
self.folder_path = tk.StringVar()
ttk.Entry(folder_frame, textvariable=self.folder_path).pack(side=tk.LEFT, fill=tk.X, expand=True)
ttk.Button(folder_frame, text="选择文件夹", command=self.select_folder).pack(side=tk.RIGHT)
# 添加帮助按钮
help_frame = ttk.Frame(self.root)
help_frame.pack(fill=tk.X, padx=5)
ttk.Button(help_frame, text="使用帮助", command=self.show_help).pack(side=tk.RIGHT)
# 设置模式选择
self.mode_frame = ttk.LabelFrame(self.root, text="设置模式")
self.mode_frame.pack(fill=tk.X, padx=5, pady=5)
self.setting_mode = tk.StringVar(value="batch")
ttk.Radiobutton(self.mode_frame, text="统一设置", variable=self.setting_mode,
value="batch", command=self.toggle_setting_mode).pack(side=tk.LEFT, padx=10)
ttk.Radiobutton(self.mode_frame, text="单独设置", variable=self.setting_mode,
value="individual", command=self.toggle_setting_mode).pack(side=tk.LEFT, padx=10)
# 统一设置页码框架
self.batch_setting_frame = ttk.LabelFrame(self.root, text="统一页码设置")
self.batch_setting_frame.pack(fill=tk.X, padx=5, pady=5)
ttk.Label(self.batch_setting_frame, text="页码格式(例如: 1,2,3-5):").pack(side=tk.LEFT)
self.batch_pages = ttk.Entry(self.batch_setting_frame)
self.batch_pages.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
ttk.Button(self.batch_setting_frame, text="应用", command=self.apply_batch_settings).pack(side=tk.RIGHT)
# 控制按钮面板(移到页码设置下面)
control_frame = ttk.Frame(self.root)
control_frame.pack(fill=tk.X, padx=5, pady=5)
# 转换按钮和进度条
ttk.Button(control_frame, text="开始转换", command=self.start_conversion).pack(side=tk.LEFT, padx=5)
self.progress = ttk.Progressbar(control_frame, mode='determinate')
self.progress.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
# 添加清理内存按钮
ttk.Button(control_frame, text="清理内存", command=self.clean_memory).pack(side=tk.RIGHT, padx=5)
# 创建左右分栏
main_pane = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
main_pane.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
# 左侧面板(文件列表)
left_frame = ttk.Frame(main_pane)
main_pane.add(left_frame, weight=2)
# PDF文件列表框架
list_frame = ttk.LabelFrame(left_frame, text="PDF文件列表")
list_frame.pack(fill=tk.BOTH, expand=True)
# 创建树形视图
columns = ("文件名", "页码设置")
self.tree = ttk.Treeview(list_frame, columns=columns, show="headings")
self.tree.heading("文件名", text="文件名")
self.tree.heading("页码设置", text="页码设置")
self.tree.column("文件名", width=400)
self.tree.column("页码设置", width=200)
# 创建页码输入框
self.page_entries = {} # 存储每个项目的输入框
# 添加滚动条
scrollbar = ttk.Scrollbar(list_frame, orient=tk.VERTICAL, command=self.tree.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.tree.configure(yscrollcommand=scrollbar.set)
self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# 右侧处理日志面板
right_frame = ttk.LabelFrame(main_pane, text="处理日志")
main_pane.add(right_frame, weight=1)
# 创建日志文本框
self.log_text = tk.Text(right_frame, wrap=tk.WORD, width=40)
log_scrollbar = ttk.Scrollbar(right_frame, command=self.log_text.yview)
self.log_text.configure(yscrollcommand=log_scrollbar.set)
self.log_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
log_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# 初始化显示状态
self.toggle_setting_mode()
def toggle_setting_mode(self):
"""切换设置模式"""
mode = self.setting_mode.get()
if mode == "batch":
self.batch_setting_frame.pack(after=self.mode_frame, fill=tk.X, padx=5, pady=5)
# 隐藏所有单独设置的输入框
for entry in self.page_entries.values():
entry.place_forget()
else:
self.batch_setting_frame.pack_forget()
# 显示所有单独设置的输入框
self.update_page_entries()
def load_pdf_files(self):
"""加载PDF文件列表"""
# 清空现有列表和输入框
for item in self.tree.get_children():
self.tree.delete(item)
self.pdf_settings.clear()
self.page_entries.clear()
# 加载新的PDF文件
folder = self.folder_path.get()
for file in os.listdir(folder):
if file.lower().endswith('.pdf'):
full_path = os.path.join(folder, file)
item = self.tree.insert("", tk.END, values=(file, ""))
self.pdf_settings[full_path] = []
# 为每个项目创建输入框
self.create_entry_for_item(item)
# 如果是单独设置模式,显示输入框
if self.setting_mode.get() == "individual":
self.update_page_entries()
def create_entry_for_item(self, item):
"""为树形视图项目创建输入框"""
entry = ttk.Entry(self.tree)
entry.bind('<Return>', lambda e, i=item: self.on_entry_change(e, i))
entry.bind('<FocusOut>', lambda e, i=item: self.on_entry_change(e, i))
self.page_entries[item] = entry
def update_page_entries(self):
"""更新所有输入框的位置"""
for item in self.tree.get_children():
entry = self.page_entries[item]
# 获取页码设置列的位置
bbox = self.tree.bbox(item, "页码设置")
if bbox:
x, y, w, h = bbox
entry.place(x=x, y=y, width=w, height=h)
entry.delete(0, tk.END)
entry.insert(0, self.tree.item(item)['values'][1])
def on_entry_change(self, event, item):
"""处理输入框内容变化"""
entry = event.widget
try:
page_string = entry.get()
file_name = self.tree.item(item)['values'][0]
full_path = os.path.join(self.folder_path.get(), file_name)
if page_string.strip():
pages = self.parse_page_numbers(page_string)
self.pdf_settings[full_path] = pages
self.tree.set(item, "页码设置", page_string)
else:
self.pdf_settings[full_path] = []
self.tree.set(item, "页码设置", "")
except ValueError:
messagebox.showerror("错误", "页码格式错误")
entry.focus_set()
def parse_page_numbers(self, page_string: str) -> List[int]:
pages = set()
if not page_string.strip():
return []
for part in page_string.split(','):
part = part.strip()
if '-' in part:
start, end = map(int, part.split('-'))
pages.update(range(start, end + 1))
else:
pages.add(int(part))
return sorted(list(pages))
def apply_batch_settings(self):
page_string = self.batch_pages.get()
try:
pages = self.parse_page_numbers(page_string)
for pdf_path in self.pdf_settings:
self.pdf_settings[pdf_path] = pages
# 更新显示
for item in self.tree.get_children():
self.tree.set(item, "页码设置", page_string)
except ValueError:
messagebox.showerror("错误", "页码格式错误")
def convert_pdf_to_images(self, pdf_path: str, pages: List[int]):
try:
doc = fitz.open(pdf_path)
base_name = os.path.splitext(os.path.basename(pdf_path))[0]
output_dir = os.path.join(os.path.dirname(pdf_path), f"{base_name}_images")
os.makedirs(output_dir, exist_ok=True)
self.log_message(f"开始处理文件: {base_name}")
total_pages = len(pages)
for idx, page_num in enumerate(pages, 1):
if page_num <= len(doc):
self.log_message(f"处理页面 {page_num} ({idx}/{total_pages})")
page = doc[page_num - 1]
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
output_path = os.path.join(output_dir, f"{base_name}_page_{page_num}.png")
pix.save(output_path)
pix = None # 释放内存
page = None # 释放页面对象
gc.collect() # 及时清理内存
doc.close()
self.log_message(f"文件 {base_name} 处理完成")
except Exception as e:
error_msg = f"转换文件 {base_name} 时出错: {str(e)}"
self.log_message(error_msg)
messagebox.showerror("错误", error_msg)
def process_queue(self):
"""处理队列中的任务"""
while True:
try:
# 非阻塞方式获取任务
pdf_path = self.work_queue.get_nowait()
pages = self.pdf_settings[pdf_path]
self.convert_pdf_to_images(pdf_path, pages)
self.work_queue.task_done()
# 更新进度
with self.lock:
self.completed_files += 1
progress = (self.completed_files / self.total_files) * 100
self.progress['value'] = progress
self.root.update()
except queue.Empty:
break # 队列为空时退出
except Exception as e:
self.log_message(f"处理出错: {str(e)}")
break
def start_conversion(self):
if not self.pdf_settings:
messagebox.showwarning("警告", "没有选择PDF文件")
return
# 初始化计数器和锁
self.completed_files = 0
self.total_files = sum(1 for pdf_path in self.pdf_settings if self.pdf_settings[pdf_path])
self.lock = threading.Lock()
if self.total_files == 0:
messagebox.showwarning("警告", "没有设置页码的文件")
return
self.log_message("开始转换处理...")
# 将任务添加到队列
for pdf_path in self.pdf_settings:
if self.pdf_settings[pdf_path]:
self.work_queue.put(pdf_path)
# 创建并启动工作线程
num_threads = min(4, self.total_files)
self.log_message(f"启动 {num_threads} 个工作线程")
threads = []
for i in range(num_threads):
t = threading.Thread(target=self.process_queue, daemon=True)
t.start()
threads.append(t)
# 创建监控线程
monitor_thread = threading.Thread(target=self.monitor_conversion,
args=(threads,), daemon=True)
monitor_thread.start()
def monitor_conversion(self, threads):
"""监控转换进度"""
try:
# 等待所有任务完成
self.work_queue.join()
# 等待所有线程结束
for t in threads:
t.join()
# 在主线程中更新UI
self.root.after(0, self.conversion_completed)
except Exception as e:
self.log_message(f"监控线程出错: {str(e)}")
def conversion_completed(self):
"""转换完成后的处理"""
self.clean_memory()
self.log_message("所有文件处理完成!")
self.progress['value'] = 0
messagebox.showinfo("完成", "所有PDF文件转换完成!")
def run(self):
self.root.mainloop()
def select_folder(self):
"""选择文件夹并加载PDF文件"""
folder = filedialog.askdirectory()
if folder:
self.folder_path.set(folder)
self.load_pdf_files()
def log_message(self, message):
"""添加日志消息"""
self.log_text.insert(tk.END, f"{message}\n")
self.log_text.see(tk.END)
self.root.update()
def clean_memory(self):
"""手��清理内存"""
gc.collect()
self.log_message("已执行内存清理")
def show_help(self):
"""显示使用帮助对话框"""
help_text = """
PDF批量转图片工具
这是一个简单易用的PDF转图片工具,主要功能:
1. 可以批量处理多个PDF文件
2. 可以选择需要转换的具体页面
3. 支持两种设置方式:
- 统一设置:所有PDF使用相同的页码
- 单独设置:每个PDF单独设置页码
使用方法:
1. 点击"选择文件夹",选择PDF所在文件夹
2. 选择设置方式并输入要转换的页码
例如:1,2,3 或 1-5 或 1,3-5
3. 点击"开始转换"即可
转换后的图片会自动保存在原PDF所在目录下的"文件名_images"文件夹中。
"""
# 创建帮助窗口
help_window = tk.Toplevel(self.root)
help_window.title("使用帮助")
help_window.geometry("600x500")
# 创建文本框和滚动条
text_frame = ttk.Frame(help_window)
text_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
help_text_widget = tk.Text(text_frame, wrap=tk.WORD, width=60, height=25)
scrollbar = ttk.Scrollbar(text_frame, orient=tk.VERTICAL, command=help_text_widget.yview)
help_text_widget.configure(yscrollcommand=scrollbar.set)
help_text_widget.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# 插入帮助文本
help_text_widget.insert(tk.END, help_text)
help_text_widget.configure(state='disabled') # 设置为只读
# 添加关闭按钮
ttk.Button(help_window, text="关闭", command=help_window.destroy).pack(pady=10)
# 设置窗口模态
help_window.transient(self.root)
help_window.grab_set()
# 将窗口居中显示
help_window.update_idletasks()
width = help_window.winfo_width()
height = help_window.winfo_height()
x = (help_window.winfo_screenwidth() // 2) - (width // 2)
y = (help_window.winfo_screenheight() // 2) - (height // 2)
help_window.geometry(f'{width}x{height}+{x}+{y}')
if __name__ == "__main__":
app = PDFConverter()
app.run()