支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片工具类

支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片

1.依赖

xml 复制代码
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>5.2.5</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>5.2.5</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>5.2.5</version> <!-- 如果有 DOCX 相关 -->
</dependency>
<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>3.0.5</version>
</dependency>
<!-- Word转PDF:Docx4J核心 -->
        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-JAXB-ReferenceImpl</artifactId>
            <version>8.3.10</version>
        </dependency>

        <!-- Word转PDF:FO导出支持(用于PDF渲染) -->
        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-export-fo</artifactId>
            <version>8.3.10</version>
        </dependency>
        <dependency>
            <groupId>javax.xml.bind</groupId>
            <artifactId>jaxb-api</artifactId>
            <version>2.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.glassfish.jaxb</groupId>
            <artifactId>jaxb-runtime</artifactId>
            <version>2.3.3</version>
        </dependency>
        <!-- 如果 Docx4J 版本旧,还可能需激活模块 -->
        <dependency>
            <groupId>com.sun.activation</groupId>
            <artifactId>javax.activation</artifactId>
            <version>1.2.0</version>
        </dependency>

2.代码

java 复制代码
import lombok.extern.slf4j.Slf4j;
import org.apache.fop.apps.Fop;
import org.apache.fop.apps.FopFactory;
import org.apache.fop.apps.MimeConstants;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToFoConverter;
import org.docx4j.Docx4J;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.w3c.dom.Document;

import javax.imageio.ImageIO;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @Description 文档转长图片工具类
 * 支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片
 */

@Slf4j
public class DocumentToImageUtil {

    /**
     * 将文档(Word或PDF)转换为长图片字节数组
     * @param content 文档字节数组
     * @param fileType 文件类型(doc, docx, pdf)
     * @return PNG图片字节数组
     * @throws IOException 转换失败
     */
    public byte[] convertToStitchedImage(byte[] content, String fileType) throws IOException {
        byte[] pdfContent = content;
        if (fileType.toLowerCase().contains("doc") || fileType.toLowerCase().contains("docx")) {
            pdfContent = convertWordToPdf(content, fileType.toLowerCase());
        }
        return convertPdfToStitchedImage(pdfContent);
    }

    /**
     * Word转PDF(支持doc和docx)
     */
    private byte[] convertWordToPdf(byte[] wordContent, String fileType) throws IOException {
        if (fileType.toLowerCase().contains("docx")) {
            // DOCX转PDF使用Docx4J
            try (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent)) {
                WordprocessingMLPackage loadedPackage = WordprocessingMLPackage.load(bais);
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                Docx4J.toPDF(loadedPackage, baos);
                return baos.toByteArray();
            } catch (Exception e) {
                throw new IOException("DOCX转PDF失败", e);
            }
        } else if (fileType.toLowerCase().contains("doc")) {
            // DOC转PDF使用POI HWPF + Transformer + FOP
            try (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent);
                 HWPFDocument document = new HWPFDocument(bais)) {

                // 1. 使用 WordToFoConverter 生成 FO Document(移除 setFontResolver,使用默认)
                Document foDoc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
                WordToFoConverter converter = new WordToFoConverter(foDoc);
                // 注意:在 POI 5.2.5 中,setFontResolver 已弃用或移除,使用默认字体解析器
                converter.processDocument(document);

                // 2. Transformer 将 FO Document 序列化为字节流
                ByteArrayOutputStream foBaos = new ByteArrayOutputStream();
                TransformerFactory tf = TransformerFactory.newInstance();
                Transformer serializer = tf.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "xml"); // 输出 XML-FO
                serializer.transform(new DOMSource(foDoc), new StreamResult(foBaos));
                byte[] foBytes = foBaos.toByteArray();

                // 3. FOP 将 FO 转 PDF(使用带URI参数的newInstance,避免解析问题)
                FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); // 标准方式,设置base URI
                ByteArrayOutputStream pdfBaos = new ByteArrayOutputStream();
                Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, pdfBaos);
                Transformer transformer = tf.newTransformer(); // 身份转换器
                Source src = new StreamSource(new ByteArrayInputStream(foBytes));
                Result res = new SAXResult(fop.getDefaultHandler());
                transformer.transform(src, res);

                return pdfBaos.toByteArray();
            } catch (Exception e) {
                throw new IOException("DOC转PDF失败", e);
            }
        } else {
            throw new IOException("不支持的文件类型: " + fileType);
        }
    }

    /**
     * PDF转垂直拼接的长图片
     */
    private byte[] convertPdfToStitchedImage(byte[] pdfContent) throws IOException {
        try (ByteArrayInputStream bais = new ByteArrayInputStream(pdfContent);
             RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(bais);
             PDDocument document = Loader.loadPDF(buffer)) {
            PDFRenderer renderer = new PDFRenderer(document);
            int numPages = document.getNumberOfPages();
            if (numPages == 0) {
                return null;
            }

            List<BufferedImage> pageImages = new ArrayList<>();
            int totalHeight = 0;
            int maxWidth = 0;
            for (int i = 0; i < numPages; i++) {
                BufferedImage pageImage = renderer.renderImageWithDPI(i, 150);
                pageImages.add(pageImage);
                totalHeight += pageImage.getHeight();
                maxWidth = Math.max(maxWidth, pageImage.getWidth());
            }

            BufferedImage stitchedImage = new BufferedImage(maxWidth, totalHeight, BufferedImage.TYPE_INT_RGB);
            Graphics2D g2d = stitchedImage.createGraphics();
            g2d.setBackground(java.awt.Color.WHITE);
            g2d.clearRect(0, 0, maxWidth, totalHeight);

            int yOffset = 0;
            for (BufferedImage pageImage : pageImages) {
                g2d.drawImage(pageImage, 0, yOffset, null);
                yOffset += pageImage.getHeight();
            }
            g2d.dispose();

            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            ImageIO.write(stitchedImage, "png", baos);
            return baos.toByteArray();
        } catch (Exception e) {
            throw new IOException("PDF转图片失败", e);
        }
    }
}

3.如何使用

java 复制代码
byte[] imageBytes = new DocumentToImageUtil().convertToStitchedImage(content, fileType.toLowerCase());
相关推荐
__XYZ5 小时前
RedisTemplate 实现分布式锁
java·spring boot·redis·分布式·junit
闭着眼睛学算法6 小时前
【双机位A卷】华为OD笔试之【模拟】双机位A-新学校选址【Py/Java/C++/C/JS/Go六种语言】【欧弟算法】全网注释最详细分类最全的华子OD真题题解
java·c语言·javascript·c++·python·算法·华为od
VBAMatrix6 小时前
报告工具更新!Word附注一键期末转期初
word·办公自动化·审计报告·审计·会计师事务所·报告工具·word附注
源码_V_saaskw6 小时前
JAVA校园跑腿校园外卖源码校园外卖小程序校园代买帮忙外卖源码社区外卖源码小程序+公众号+h5
java·开发语言·微信小程序·小程序
源码哥_博纳软云6 小时前
JAVA同城预约服务家政服务美容美发洗车保洁搬家维修家装系统源码小程序+公众号+h5
java·开发语言·微信小程序·小程序
红尘客栈26 小时前
Kubernetes 集群调度
java·linux·网络·容器·kubernetes
编程岁月6 小时前
java面试-0203-java集合并发修改异常、快速/安全失败原理、解决方法?
java·开发语言·面试
whltaoin7 小时前
AI 超级智能体全栈项目阶段五:RAG 四大流程详解、最佳实践与调优(基于 Spring AI 实现)
java·人工智能·spring·rag·springai
junnhwan7 小时前
【苍穹外卖笔记】Day05--Redis入门与店铺营业状态设置
java·数据库·redis·笔记·后端·苍穹外卖