ppt转换成pdf文件

最近用到了,记一下;

ppt转pdf分为两种情况: 小于2007版本的 .ppt格式(2003) 与大于2007版本的 .pptx格式(2007)

.ppt格式为 二进制文件

.pptx格式为xml格式,在java中有不同的jar包需要使用

引入 jar

xml 复制代码
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.0.1</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.0.1</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>4.0.1</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>4.0.1</version>
</dependency>
<dependency>
	<groupId>com.itextpdf</groupId>
	<artifactId>itextpdf</artifactId>
	<version>5.5.6</version>
</dependency>

代码

java 复制代码
package cc.vace.cloud.utils;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.*;
import org.apache.poi.hslf.usermodel.HSLFSlide;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hslf.usermodel.HSLFTextShape;
import org.apache.poi.xslf.usermodel.*;

import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.List;
import java.util.Objects;

/**
 * @author vace cc
 */
public class PptUtil {
    public static void main(String[] args) throws IOException, DocumentException {

        FileInputStream inputStream0 = new FileInputStream("F:\\file\\text.ppt");
        FileInputStream inputStream1 = new FileInputStream("F:/file/text.pptx");
        FileOutputStream outputStream0 = new FileOutputStream("F:\\file\\text0.pdf");
        FileOutputStream outputStream1 = new FileOutputStream("F:/file/text1.pdf");
        pptToPdf(inputStream0, outputStream0);
        pptxToPdf(inputStream1,outputStream1);
    }

    /**
     * ppt二进制文件转pdf
     *
     * @param pptIs ppt原文件流
     * @param pdfOs pdf 输出文件流
     * @return true
     * @throws IOException io
     * @throws DocumentException doc
     */
    public static boolean pptToPdf(InputStream pptIs, OutputStream pdfOs) throws IOException, DocumentException {
        Document doc = new Document();
        HSLFSlideShow hslfSlideShow = null;
        PdfWriter pdfWriter = null;
        try {
            hslfSlideShow = new HSLFSlideShow(pptIs);
            Dimension dimension = hslfSlideShow.getPageSize();
            pdfWriter = PdfWriter.getInstance(doc, pdfOs);
            doc.open();
            PdfPTable pdfpTable = new PdfPTable(1);
            List<HSLFSlide> slides = hslfSlideShow.getSlides();
            // 设置ppt 为宋体,否则转pdf时会乱码
            for (HSLFSlide slide : slides) {
                slide.getShapes().stream()
                        .filter(shape -> shape instanceof HSLFTextShape)
                        .map(shape -> (HSLFTextShape) shape)
                        .forEach(shapeH -> shapeH.getTextParagraphs().forEach(paragraph -> paragraph.getTextRuns().forEach(textRun -> textRun.setFontFamily("宋体"))));

                BufferedImage bufferedImage = new BufferedImage((int)dimension.getWidth(), (int)dimension.getHeight(), BufferedImage.TYPE_INT_RGB);
                Graphics2D graphics = bufferedImage.createGraphics();
                graphics.setPaint(Color.white);
                graphics.setFont(new java.awt.Font("宋体", java.awt.Font.PLAIN, 12));
                slide.draw(graphics);
                graphics.dispose();
                com.itextpdf.text.Image image = com.itextpdf.text.Image.getInstance(bufferedImage, null);
                image.scalePercent(50f);
                // 写入单元格
                pdfpTable.addCell(new PdfPCell(image, true));
                doc.add(image);
            }
            System.out.println("---------- 转换成功 -------------");
            return true;
        } catch (Throwable e) {
            e.printStackTrace();
            System.out.println("---------- 转换失败 -------------");

            throw e;
        } finally {
            doc.close();
            if (!Objects.isNull(hslfSlideShow)) {
                hslfSlideShow.close();
            }
            if (null != pdfWriter) {
                pdfWriter.close();
            }
        }
    }

    /**
     * pptx XML文件转pdf
     * @param pptIs ppt原文件流
     * @param pdfOs pdf 输出文件流
     * @return true
     * @throws IOException io
     */
    public static boolean pptxToPdf(InputStream pptIs, OutputStream pdfOs) throws IOException {
        Document doc = new Document();
        XMLSlideShow slideShow = null;
        PdfWriter pdfWriter = null;
        try {
            slideShow = new XMLSlideShow(pptIs);
            pdfWriter = PdfWriter.getInstance(doc,pdfOs);
            Dimension dimension = slideShow.getPageSize();
            doc.open();
            PdfPTable pdfpTable = new PdfPTable(1);
            List<XSLFSlide> slides = slideShow.getSlides();
            for (XSLFSlide slide : slides) {
                // 设置字体
                slide.getShapes().stream()
                        .filter(shape -> shape instanceof XSLFTextShape)
                        .map(shape -> (XSLFTextShape) shape)
                        .forEach(shapeH -> shapeH.getTextParagraphs().forEach(paragraph -> paragraph.getTextRuns().forEach(textRun -> textRun.setFontFamily("宋体"))));
                BufferedImage bufferedImage = new BufferedImage((int) dimension.getWidth(), (int) dimension.getHeight(), BufferedImage.TYPE_INT_RGB);
                Graphics2D graphics = bufferedImage.createGraphics();
                graphics.setPaint(Color.white);
                graphics.setFont(new Font("宋体", Font.PLAIN, 12));
                slide.draw(graphics);
                com.itextpdf.text.Image image = com.itextpdf.text.Image.getInstance(bufferedImage, null);
                image.scalePercent(50f);
                pdfpTable.addCell(new PdfPCell(image, true));
                doc.add(image);
            }
            if (slides.size() == 0) {
                BufferedImage bufferedImage = new BufferedImage((int) dimension.getWidth(), (int) dimension.getHeight(), BufferedImage.TYPE_INT_ARGB);
                com.itextpdf.text.Image image = com.itextpdf.text.Image.getInstance(bufferedImage, null);
                pdfpTable.addCell(new PdfPCell(image, true));
                doc.add(image);
            }
            System.out.println("---------- 转换成功 -------------");
            return true;
        }catch (Throwable e) {
            e.printStackTrace();
//            throw e;
            return false;
        }finally {
            doc.close();
            if (!Objects.isNull(slideShow)) {
                slideShow.close();
            }
            if (pdfWriter != null) {
                pdfWriter.close();
            }
        }
    }

}

遇到的exception

遇见报错document 是空的

java 复制代码
The document has no pages.

遇见这个问题,一般情况下是因为slide.draw报错了,但是catch捕获 不到,最后在执行完成之后报错,但此时的错是finally里异常关闭文件导致的,所以会报document has no pages 无法关闭

这里做几个问题点去切入

一、

首先要注意的是这里catch 的是Throwable 而不是Exception

因为当 slide.draw() 方法报错 是继承的Throwable 的异常,而不是Exception 的,所以使用Exception 去捕获异常的时候会失败

java 复制代码
catch (Throwable e) {
   	e.printStackTrace();
   	throw e;
   	return false;
}

二、

针对jdk8以上的版本:如jdk11

报错

java 复制代码
javax/xml/bind/JAXBException

需要添加额外的jar包

XML 复制代码
<dependency>
    <groupId>javax.activation</groupId>
    <artifactId>activation</artifactId>
    <version>1.1.1</version>
</dependency>
<dependency>
    <groupId>javax.xml.bind</groupId>
    <artifactId>jaxb-api</artifactId>
    <version>2.3.0</version>
</dependency>
<dependency>
    <groupId>com.sun.xml.bind</groupId>
    <artifactId>jaxb-core</artifactId>
    <version>2.3.0.1</version>
</dependency>
<dependency>
    <groupId>com.sun.xml.bind</groupId>
    <artifactId>jaxb-impl</artifactId>
    <version>2.3.0.1</version>
</dependency>

如果上传的一个空ppt 则pdf文件一定是已损坏,有两种解决办法,一,初始化一个空的pdf

这里选择TYPE_INT_ARGB,这样底色就是空白的啦,如果使用TYPE_INT_RGB那就是一块黑

bash 复制代码
if (slides.size() == 0) {
                BufferedImage bufferedImage = new BufferedImage((int) dimension.getWidth(), (int) dimension.getHeight(), BufferedImage.TYPE_INT_ARGB);
                com.itextpdf.text.Image image = com.itextpdf.text.Image.getInstance(bufferedImage, null);
                pdfpTable.addCell(new PdfPCell(image, true));
                doc.add(image);
            }

poi的5.X版本

这里介绍的poi适合4.X的版本,如果使用 5.X版本则需要加入xmlbeans包

bash 复制代码
<dependency>
    <groupId>org.apache.xmlbeans</groupId>
    <artifactId>xmlbeans</artifactId>
    <version>5.0.3</version>
</dependency>

完整的 5.X包:截止博主写文的时间,最新的poi版本如下

bash 复制代码
<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>5.2.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>5.2.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>4.1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>5.2.2</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.5.13.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.xmlbeans</groupId>
            <artifactId>xmlbeans</artifactId>
            <version>5.0.3</version>
        </dependency>

设置页面大小

当我们把上边的步骤实现之后,发现两个ppt 合成了一个pdf页面,想要一个pdf页面对应一个ppt页面

实现方式:

java 复制代码
Document 在new 的 时候可以直接将页面的尺寸放进去
Document 可以使用 setPageSize来设置大小
setPageSize 有一个问题,setPageSize不会立即生效,导致我们在设置页面的时候,从第二页开始才会生效。 具体是bug还是故意这样设计的不清楚

代码实现:只列出 pptx格式的

ppt 转也是一样的

java 复制代码
public static boolean pptxToPdf(InputStream pptIs, OutputStream pdfOs) throws IOException {
    Document doc = null;
    XMLSlideShow slideShow = null;
    PdfWriter pdfWriter = null;
    try {
        slideShow = new XMLSlideShow(pptIs);
        Dimension dimension = slideShow.getPageSize();

        BufferedImage bufferedImage0 = new BufferedImage((int) dimension.getWidth(), (int) dimension.getHeight(), BufferedImage.TYPE_INT_ARGB);
        com.itextpdf.text.Image image0 = com.itextpdf.text.Image.getInstance(bufferedImage0, null);
        com.itextpdf.text.Rectangle pageSize = new com.itextpdf.text.Rectangle(image0);
        //PageSize.A4.rotate()
        doc = new Document(pageSize);
        pdfWriter = PdfWriter.getInstance(doc,pdfOs);
        doc.open();
        PdfPTable pdfpTable = new PdfPTable(1);
        List<XSLFSlide> slides = slideShow.getSlides();
		// 如果是空ppt, 则生成默认一页
        if (slides.size() == 0) {
            doc.add(image0);
        }
        for (XSLFSlide slide : slides) {
            // 设置字体
            slide.getShapes().stream()
                    .filter(shape -> shape instanceof XSLFTextShape)
                    .map(shape -> (XSLFTextShape) shape)
                    .forEach(shapeH -> shapeH.getTextParagraphs().forEach(paragraph -> paragraph.getTextRuns().forEach(textRun -> textRun.setFontFamily("宋体"))));
            BufferedImage bufferedImage = new BufferedImage((int) dimension.getWidth(), (int) dimension.getHeight(), BufferedImage.TYPE_INT_RGB);
            Graphics2D graphics = bufferedImage.createGraphics();
            graphics.setPaint(Color.white);
            graphics.setFont(new Font("宋体", Font.PLAIN, 12));
            slide.draw(graphics);
            com.itextpdf.text.Image image = com.itextpdf.text.Image.getInstance(bufferedImage, null);
            image.scalePercent(90f);
            pdfpTable.addCell(new PdfPCell(image, true));

            doc.add(image);
        }
        System.out.println("---------- 转换成功 -------------");
        return true;
    }catch (Throwable e) {
        e.printStackTrace();
//            throw e;
        return false;
    }finally {
        doc.close();
        if (!Objects.isNull(slideShow)) {
            slideShow.close();
        }
        if (pdfWriter != null) {
            pdfWriter.close();
        }
    }
}
相关推荐
belldeep6 小时前
python:reportlab 将多个图片合并成一个PDF文件
python·pdf·reportlab
墨染辉11 小时前
pdf处理2
pdf
墨染辉1 天前
10.2 如何解决从复杂 PDF 文件中提取数据的问题?
pdf
shandianchengzi1 天前
【记录】Excel|Excel 打印成 PDF 页数太多怎么办
pdf·excel
PythonFun1 天前
Python批量下载PPT模块并实现自动解压
开发语言·python·powerpoint
bianshaopeng1 天前
android 原生加载pdf
android·pdf
卢卡斯2331 天前
在线PDF怎么转换成JPG图片?分享14种转换操作!
pdf
J不A秃V头A2 天前
iTextPDF中,要实现表格中的内容在数据长度超过边框时自动换行
java·pdf
嘻嘻仙人2 天前
【杂谈一之概率论】CDF、PDF、PMF和PPF概念解释与分析
pdf·概率论·pmf·cdf
资深前端之路3 天前
vue2 将页面生成pdf下载
前端·vue.js·pdf