Python PDF转换工具箱(PDF转图片,word,拆分,删除,提取)

Python PDF转换工具箱(PDF转图片,word,拆分,删除,提取)

1.简介:

使用Python自写的pdf工具箱,包括pdf转word,图片,合并,页面拆分,页面删除,页面提取、

转换word,图片功能,支持文件拖入。文章末尾已附源码以及打包好的exe文件,大家需要可自行下载学习,喜欢的话给博主点个小小的关注哦,主页还将会更新更多Python相关干货资源,关注不迷路哦!

功能介绍:

合并:添加顺序就是合并顺序,可多次添加。

拆分:将输入页码的范围拆分成每个独立的pdf,单次可输入多个范围。

删除:将输入页码的范围删除,单次可输入多个范围,保存删除后的文件。

提取:将输入页码的范围提取成独立的pdf,单次可输入多个范围。

2.运行效果:

3.相关源码:

python 复制代码
import os
import re
import sys
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QVBoxLayout, QWidget, QFileDialog, QListWidget, \
    QMessageBox, QLineEdit, QHBoxLayout
from PyQt5.QtCore import QThread, pyqtSignal
from PyPDF2 import PdfReader, PdfWriter, PdfMerger
from pdf2docx import Converter
import fitz  # 用于PDF转JPG的处理

class CustomListWidget(QListWidget):
    def __init__(self, parent=None):
        super().__init__(parent)
        self.setAcceptDrops(True)
        self.parentWindow = parent

    def dragEnterEvent(self, event):
        if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()):
            event.acceptProposedAction()

    def dragMoveEvent(self, event):
        if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()):
            event.acceptProposedAction()

    def dropEvent(self, event):
        pdf_files = [url.toLocalFile() for url in event.mimeData().urls() if url.toString().lower().endswith('.pdf')]
        for f in pdf_files:
            self.parentWindow.addPDFFile(f)

class Worker(QThread):
    finished = pyqtSignal(str)
    error = pyqtSignal(str)

    def __init__(self, pdf_files, range_str=None, save_path=None, operation=None):
        super().__init__()
        self.pdf_files = pdf_files
        self.range_str = range_str
        self.save_path = save_path
        self.operation = operation

    def run(self):
        try:
            if self.operation == 'merge':
                self.merge_pdfs()
            elif self.operation == 'split':
                self.split_pdfs()
            elif self.operation == 'delete':
                self.delete_pages()
            elif self.operation == 'extract':
                self.extract_pages()
            elif self.operation == 'jpg':
                self.pdf_to_jpg()
            elif self.operation == 'word':
                self.pdf_to_word()
        except Exception as e:
            self.error.emit(str(e))

    def merge_pdfs(self):
        merger = PdfMerger()
        for pdf in self.pdf_files:
            merger.append(pdf)
        merger.write(self.save_path)
        merger.close()
        self.finished.emit('PDF文件已成功合并。')

    def split_pdfs(self):
        ranges = self.parse_ranges(self.range_str)
        reader = PdfReader(self.pdf_files[0])
        os.makedirs(self.save_path, exist_ok=True)  # 确保目标文件夹存在
        file_index = 1  # 用于创建唯一的文件名
        for range_index, (start_page, end_page) in enumerate(ranges):
            # 对于每个范围,拆分出来的每个页面为一个单独的PDF文件
            for page_num in range(start_page, end_page + 1):
                writer = PdfWriter()
                writer.add_page(reader.pages[page_num])
                # 使用文件索引来确保每个文件的名称都是唯一的
                split_save_path = os.path.join(self.save_path, f'split_page_{file_index}.pdf')
                with open(split_save_path, 'wb') as f:
                    writer.write(f)
                file_index += 1
        self.finished.emit('PDF文件已成功拆分并保存。')

    def delete_pages(self):
        ranges = self.parse_ranges(self.range_str)
        reader = PdfReader(self.pdf_files[0])
        writer = PdfWriter()
        pages_to_delete = {page for start, end in ranges for page in range(start, end + 1)}
        for i in range(len(reader.pages)):
            if i not in pages_to_delete:
                writer.add_page(reader.pages[i])
        with open(self.save_path, 'wb') as f:
            writer.write(f)
        self.finished.emit('指定页面已从PDF中删除。')

    def extract_pages(self):
        ranges = self.parse_ranges(self.range_str)
        reader = PdfReader(self.pdf_files[0])
        os.makedirs(self.save_path, exist_ok=True)  # 在循环外提前确保目录存在
        for i, (start_page, end_page) in enumerate(ranges):
            writer = PdfWriter()
            for page_num in range(start_page, end_page + 1):
                writer.add_page(reader.pages[page_num])
            extract_save_path = os.path.join(self.save_path, f'extract_{i + 1}.pdf')
            with open(extract_save_path, 'wb') as f:
                writer.write(f)
        self.finished.emit('指定页面已从PDF中提取。')

    def pdf_to_jpg(self):
        for file in self.pdf_files:
            pdf = fitz.open(file)
            img_folder = os.path.join(self.save_path, os.path.splitext(os.path.basename(file))[0])
            os.makedirs(img_folder, exist_ok=True)
            for pg in range(pdf.page_count):
                page = pdf[pg]
                trans = fitz.Matrix(2, 2)  # 设置转换矩阵为放大2倍
                pm = page.get_pixmap(matrix=trans, alpha=False)
                pic_name = f'Page_{pg + 1}.jpg'
                pic_path = os.path.join(img_folder, pic_name)
                pm.save(pic_path)
        self.finished.emit('PDF文件已成功转换为图片。')

    def pdf_to_word(self):
        for file in self.pdf_files:
            docx_name = os.path.splitext(file)[0] + '.docx'
            cv = Converter(file)
            cv.convert(docx_name, start=0, end=None)
            cv.close()
        self.finished.emit('PDF文件已成功转换为Word文档。')

    def parse_ranges(self, ranges_str):
        ranges = []
        for part in re.split(',|,', ranges_str):
            if '-' in part:
                start_page, end_page = map(int, part.split('-'))
                ranges.append((start_page - 1, end_page - 1))
            else:
                page = int(part)
                ranges.append((page - 1, page - 1))
        return ranges

class PDFMergerApp(QMainWindow):
    def __init__(self):
        super().__init__()
        self.initUI()
        self.pdf_files = []

    def initUI(self):
        self.setWindowTitle('PDF 工具箱')
        self.setGeometry(100, 100, 800, 600)

        mainLayout = QVBoxLayout()

        self.addButton = QPushButton('添加 PDF', self)
        self.addButton.clicked.connect(self.addPDF)
        mainLayout.addWidget(self.addButton)

        self.listWidget = CustomListWidget(self)
        mainLayout.addWidget(self.listWidget)

        deleteLayout = QHBoxLayout()
        self.removeButton = QPushButton('删除选定', self)
        self.removeButton.clicked.connect(self.removeSelected)
        deleteLayout.addWidget(self.removeButton)

        self.removeAllButton = QPushButton('删除全部', self)
        self.removeAllButton.clicked.connect(self.removeAll)
        deleteLayout.addWidget(self.removeAllButton)
        mainLayout.addLayout(deleteLayout)

        convertLayout = QHBoxLayout()
        self.convertJPGButton = QPushButton('转换为图片', self)
        self.convertJPGButton.clicked.connect(self.convertToJPG)
        convertLayout.addWidget(self.convertJPGButton)

        self.convertWordButton = QPushButton('转换为Word', self)
        self.convertWordButton.clicked.connect(self.convertToWord)
        convertLayout.addWidget(self.convertWordButton)
        mainLayout.addLayout(convertLayout)

        self.mergeButton = QPushButton('合并 PDFs', self)
        self.mergeButton.clicked.connect(self.mergePDFs)
        mainLayout.addWidget(self.mergeButton)

        splitLayout = QHBoxLayout()
        self.splitInput = QLineEdit(self)
        self.splitInput.setPlaceholderText('输入拆分页码范围可输入多个范围,如1,3-4,8-15')
        splitLayout.addWidget(self.splitInput)
        self.splitButton = QPushButton('拆分页面', self)
        self.splitButton.clicked.connect(self.splitPDF)
        splitLayout.addWidget(self.splitButton)
        mainLayout.addLayout(splitLayout)

        deletePageLayout = QHBoxLayout()
        self.deleteInput = QLineEdit(self)
        self.deleteInput.setPlaceholderText('输入删除页码范围可输入多个范围,如1,3-4,8-15')
        deletePageLayout.addWidget(self.deleteInput)
        self.deleteButton = QPushButton('删除页面', self)
        self.deleteButton.clicked.connect(self.deletePages)
        deletePageLayout.addWidget(self.deleteButton)
        mainLayout.addLayout(deletePageLayout)

        extractLayout = QHBoxLayout()
        self.extractInput = QLineEdit(self)
        self.extractInput.setPlaceholderText('输入提取页码范围可输入多个范围,如1,3-4,8-15')
        extractLayout.addWidget(self.extractInput)
        self.extractButton = QPushButton('提取页面', self)
        self.extractButton.clicked.connect(self.extractPages)
        extractLayout.addWidget(self.extractButton)
        mainLayout.addLayout(extractLayout)



        container = QWidget()
        container.setLayout(mainLayout)
        self.setCentralWidget(container)

    def addPDF(self):
        files, _ = QFileDialog.getOpenFileNames(self, '打开文件', '', 'PDF files (*.pdf)')
        for file_path in files:
            self.addPDFFile(file_path)

    def addPDFFile(self, file_path):
        if file_path and file_path not in self.pdf_files:
            self.pdf_files.append(file_path)
            self.listWidget.addItem(file_path)

    def removeSelected(self):
        for item in self.listWidget.selectedItems():
            self.pdf_files.remove(item.text())
            self.listWidget.takeItem(self.listWidget.row(item))

    def removeAll(self):
        self.pdf_files.clear()
        self.listWidget.clear()

    def mergePDFs(self):
        save_path, _ = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')
        if save_path:
            self.thread = Worker(self.pdf_files, save_path=save_path, operation='merge')
            self.thread.finished.connect(self.onFinished)
            self.thread.error.connect(self.onError)
            self.thread.start()

    def splitPDF(self):
        if len(self.pdf_files) != 1:
            QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行拆分。")
            return

        range_str = self.splitInput.text().strip()
        folder_path = self.getFolderName()
        if range_str and folder_path:
            self.thread = Worker(self.pdf_files, range_str=range_str, save_path=folder_path, operation='split')
            self.thread.finished.connect(self.onFinished)
            self.thread.error.connect(self.onError)
            self.thread.start()

    def deletePages(self):
        if len(self.pdf_files) != 1:
            QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行删除操作。")
            return

        range_str = self.deleteInput.text().strip()
        save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0]
        if save_path and range_str:
            self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='delete')
            self.thread.finished.connect(self.onFinished)
            self.thread.error.connect(self.onError)
            self.thread.start()

    def extractPages(self):
        if len(self.pdf_files) != 1:
            QMessageBox.warning(self, "错误", "请只选择一个PDF文件进行提取操作。")
            return

        range_str = self.extractInput.text().strip()
        save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0]
        if save_path and range_str:
            self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='extract')
            self.thread.finished.connect(self.onFinished)
            self.thread.error.connect(self.onError)
            self.thread.start()

    def convertToJPG(self):
        save_path = QFileDialog.getExistingDirectory(self, "选择保存图片的位置")
        if save_path:
            self.thread = Worker(self.pdf_files, save_path=save_path, operation='jpg')
            self.thread.finished.connect(self.onFinished)
            self.thread.error.connect(self.onError)
            self.thread.start()

    def convertToWord(self):
        save_path = QFileDialog.getExistingDirectory(self, "选择保存Word的位置")
        if save_path:
            self.thread = Worker(self.pdf_files, save_path=save_path, operation='word')
            self.thread.finished.connect(self.onFinished)
            self.thread.error.connect(self.onError)
            self.thread.start()

    def getFolderName(self):
        folder_path = QFileDialog.getExistingDirectory(self, "选择保存拆分文件的位置")
        return folder_path

    def onFinished(self, message):
        QMessageBox.information(self, "操作完成", message)
        self.clear_pdf_list()
        self.clear_text_inputs()

    def onError(self, error_message):
        QMessageBox.warning(self, "操作失败", error_message)

    def clear_pdf_list(self):
        self.pdf_files.clear()
        self.listWidget.clear()

    def clear_text_inputs(self):
        # 清除所有的QLineEdit控件内容
        self.splitInput.clear()
        self.deleteInput.clear()
        self.extractInput.clear()
def main():
    app = QApplication(sys.argv)
    ex = PDFMergerApp()
    ex.show()
    sys.exit(app.exec_())

if __name__ == '__main__':
    main()
相关推荐
Dovir多多6 小时前
Python数据处理——re库与pydantic的使用总结与实战,处理采集到的思科ASA防火墙设备信息
网络·python·计算机网络·安全·网络安全·数据分析
沐霜枫叶8 小时前
解决pycharm无法识别miniconda
ide·python·pycharm
途途途途8 小时前
精选9个自动化任务的Python脚本精选
数据库·python·自动化
蓝染然9 小时前
jax踩坑指南——人类早期驯服jax实录
python
许野平9 小时前
Rust: enum 和 i32 的区别和互换
python·算法·rust·enum·i32
问道飞鱼9 小时前
【Python知识】Python进阶-什么是装饰器?
开发语言·python·装饰器
AI视觉网奇10 小时前
Detected at node ‘truediv‘ defined at (most recent call last): Node: ‘truediv‘
人工智能·python·tensorflow
GuYue.bing10 小时前
网络下载ts流媒体
开发语言·python
牛顿喜欢吃苹果10 小时前
linux创建虚拟串口
python
-Mr_X-10 小时前
FFmpeg在python里推流被处理过的视频流
python·ffmpeg