Python实现Word、Excel、PPT批量转为PDF

今天看见了一个有意思的脚本Python批量实现Word、EXCLE、PPT转PDF文件

因为我平时word用的比较的多,所以深有体会,具体怎么实现的我们就不讨论了,因为这个去学了也没什么提升,不然也不会当作脚本了。这里我将其放入了pyzjr库中,也方便大家进行调用。

你可以去下载pyzjr:

python 复制代码
pip install pyzjr -i https://pypi.tuna.tsinghua.edu.cn/simple

调用方法:

python 复制代码
import pyzjr as pz

# 实例化对象
Mpdf = pz.Microsoft2PDF()
# 调用类的方法
Mpdf.Word2Pdf()  # word -> pdf
Mpdf.Excel2Pdf()  # excel -> pdf
Mpdf.PPt2Pdf()  # ppt -> pdf
Mpdf.WEP2Pdf()  # word,excel,ppt -> pdf

上面就是api的调用了,统一会将文件存放在目标文件夹下新建的名为pdf文件夹中。

pyzjr中的源码:

python 复制代码
import win32com.client, gc, os

class Microsoft2PDF():
    """Convert Microsoft Office documents (Word, Excel, PowerPoint) to PDF format"""
    def __init__(self,filePath = ""):
        """
        :param filePath: 如果默认是空字符,就默认当前路径
        """
        self.flagW = self.flagE = self.flagP = 1
        self.words = []
        self.ppts = []
        self.excels = []

        if filePath == "":
            filePath = os.getcwd()
        folder = filePath + '\\pdf\\'
        self.folder = CreateFolder(folder,debug=False)

        self.filePath = filePath
        for i in os.listdir(self.filePath):
            if i.endswith(('.doc', 'docx')):
                self.words.append(i)
            if i.endswith(('.ppt', 'pptx')):
                self.ppts.append(i)
            if i.endswith(('.xls', 'xlsx')):
                self.excels.append(i)

        if len(self.words) < 1:
            print("\n[pyzjr]:No Word files\n")
            self.flagW = 0
        if len(self.ppts) < 1:
            print("\n[pyzjr]:No PPT file\n")
            self.flagE = 0
        if len(self.excels) < 1:
            print("\n[pyzjr]:No Excel file\n")
            self.flagP = 0

    def Word2Pdf(self):
        if self.flagW == 0:
            return 0
        else:
            print("\n[Start Word ->PDF conversion]")
            try:
                print("Open Word Process...")
                word = win32com.client.Dispatch("Word.Application")
                word.Visible = 0
                word.DisplayAlerts = False
                doc = None
                for i in range(len(self.words)):
                    print(i)
                    fileName = self.words[i]  # file name
                    fromFile = os.path.join(self.filePath, fileName)  # file address
                    toFileName = self.changeSufix2Pdf(fileName)  # Generated file name
                    toFile = self.toFileJoin(toFileName)  # Generated file address

                    print("Conversion:" + fileName + "in files...")
                    try:
                        doc = word.Documents.Open(fromFile)
                        doc.SaveAs(toFile, 17)
                        print("Convert to:" + toFileName + "file completion")
                    except Exception as e:
                        print(e)

                print("All Word files have been printed")
                print("End Word Process...\n")
                doc.Close()
                doc = None
                word.Quit()
                word = None
            except Exception as e:
                print(e)
            finally:
                gc.collect()

    def Excel2Pdf(self):
        if self.flagE == 0:
            return 0
        else:
            print("\n[Start Excel -> PDF conversion]")
            try:
                print("open Excel Process...")
                excel = win32com.client.Dispatch("Excel.Application")
                excel.Visible = 0
                excel.DisplayAlerts = False
                wb = None
                ws = None
                for i in range(len(self.excels)):
                    print(i)
                    fileName = self.excels[i]
                    fromFile = os.path.join(self.filePath, fileName)

                    print("Conversion:" + fileName + "in files...")
                    try:
                        wb = excel.Workbooks.Open(fromFile)
                        for j in range(wb.Worksheets.Count):  # Number of worksheets, one workbook may have multiple worksheets
                            toFileName = self.addWorksheetsOrder(fileName, j + 1)
                            toFile = self.toFileJoin(toFileName)

                            ws = wb.Worksheets(j + 1)
                            ws.ExportAsFixedFormat(0, toFile)
                            print("Convert to:" + toFileName + "file completion")
                    except Exception as e:
                        print(e)
                # 关闭 Excel 进程
                print("All Excel files have been printed")
                print("Ending Excel process...\n")
                ws = None
                wb.Close()
                wb = None
                excel.Quit()
                excel = None
            except Exception as e:
                print(e)
            finally:
                gc.collect()

    def PPt2Pdf(self):
        if self.flagP == 0:
            return 0
        else:
            print("\n[Start PPT ->PDF conversion]")
            try:
                print("Opening PowerPoint process...")
                powerpoint = win32com.client.Dispatch("PowerPoint.Application")
                ppt = None
                for i in range(len(self.ppts)):
                    print(i)
                    fileName = self.ppts[i]
                    fromFile = os.path.join(self.filePath, fileName)
                    toFileName = self.changeSufix2Pdf(fileName)
                    toFile = self.toFileJoin(toFileName)

                    print("Conversion:" + fileName + "in files...")
                    try:
                        ppt = powerpoint.Presentations.Open(fromFile, WithWindow=False)
                        if ppt.Slides.Count > 0:
                            ppt.SaveAs(toFile, 32)
                            print("Convert to:" + toFileName + "file completion")
                        else:
                            print("Error, unexpected: This file is empty, skipping this file")
                    except Exception as e:
                        print(e)
                print("All PPT files have been printed")
                print("Ending PowerPoint process...\n")
                ppt.Close()
                ppt = None
                powerpoint.Quit()
                powerpoint = None
            except Exception as e:
                print(e)
            finally:
                gc.collect()

    def WEP2Pdf(self):
        """
        Word, Excel and PPt are all converted to PDF.
        If there are many files, it may take some time
        """
        print("Convert Microsoft Three Musketeers to PDF")
        self.Word2Pdf()
        self.Excel2Pdf()
        self.PPt2Pdf()
        print(f"All files have been converted, you can find them in the {self.folder}")

    def changeSufix2Pdf(self,file):
        """将文件后缀更改为.pdf"""
        return file[:file.rfind('.')] + ".pdf"

    def addWorksheetsOrder(self,file, i):
        """在文件名中添加工作表顺序"""
        return file[:file.rfind('.')] + "_worksheet" + str(i) + ".pdf"

    def toFileJoin(self, file):
        """将文件路径和文件名连接为完整的文件路径"""
        return os.path.join(self.filePath, 'pdf', file[:file.rfind('.')] + ".pdf")

这里我对原先博主的代码进行了一定的优化,使其可供我们调用。

这是控制台打印出来的信息,我们可以发现在调用WEP2Pdf时,如果当前文件夹中没有word的文件也能继续去转换。

相关推荐
沉到海底去吧Go3 小时前
【PDF识别改名】PDF指定区域OCR识别重命名工具使用教程和注意事项
python·pdf·ocr
空中湖3 小时前
免费批量PDF转Word工具
pdf·word
mangge083 小时前
python读取SQLite表个并生成pdf文件
pdf
开开心心就好3 小时前
免费PDF转图片软件
javascript·智能手机·pdf·flask·word·excel·scikit-learn
Eiceblue10 小时前
Python读取PDF:文本、图片与文档属性
数据库·python·pdf
AAA_自动化工程师13 小时前
TIA博途中的程序导出为PDF格式的具体方法示例
pdf·tia博途·程序导出·pdf格式·具体方法
行云流水剑14 小时前
【学习记录】如何使用 Python 提取 PDF 文件中的内容
python·学习·pdf
IDRSolutions_CN14 小时前
PDF 转 HTML5 —— HTML5 填充图形不支持 Even-Odd 奇偶规则?(第二部分)
java·经验分享·pdf·软件工程·团队开发
upp16 小时前
【bug】Error: /undefinedfilename in (/tmp/ocrmypdf.io.9xfn1e3b/origin.pdf)
ubuntu·pdf·bug·ghostscript
沉到海底去吧Go19 小时前
【工具教程】多个条形码识别用条码内容对图片重命名,批量PDF条形码识别后用条码内容批量改名,使用教程及注意事项
pdf