java
复制代码
import lombok.extern.slf4j.Slf4j;
import org.apache.fop.apps.Fop;
import org.apache.fop.apps.FopFactory;
import org.apache.fop.apps.MimeConstants;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToFoConverter;
import org.docx4j.Docx4J;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.w3c.dom.Document;
import javax.imageio.ImageIO;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @Description 文档转长图片工具类
* 支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片
*/
@Slf4j
public class DocumentToImageUtil {
/**
* 将文档(Word或PDF)转换为长图片字节数组
* @param content 文档字节数组
* @param fileType 文件类型(doc, docx, pdf)
* @return PNG图片字节数组
* @throws IOException 转换失败
*/
public byte[] convertToStitchedImage(byte[] content, String fileType) throws IOException {
byte[] pdfContent = content;
if (fileType.toLowerCase().contains("doc") || fileType.toLowerCase().contains("docx")) {
pdfContent = convertWordToPdf(content, fileType.toLowerCase());
}
return convertPdfToStitchedImage(pdfContent);
}
/**
* Word转PDF(支持doc和docx)
*/
private byte[] convertWordToPdf(byte[] wordContent, String fileType) throws IOException {
if (fileType.toLowerCase().contains("docx")) {
// DOCX转PDF使用Docx4J
try (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent)) {
WordprocessingMLPackage loadedPackage = WordprocessingMLPackage.load(bais);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Docx4J.toPDF(loadedPackage, baos);
return baos.toByteArray();
} catch (Exception e) {
throw new IOException("DOCX转PDF失败", e);
}
} else if (fileType.toLowerCase().contains("doc")) {
// DOC转PDF使用POI HWPF + Transformer + FOP
try (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent);
HWPFDocument document = new HWPFDocument(bais)) {
// 1. 使用 WordToFoConverter 生成 FO Document(移除 setFontResolver,使用默认)
Document foDoc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToFoConverter converter = new WordToFoConverter(foDoc);
// 注意:在 POI 5.2.5 中,setFontResolver 已弃用或移除,使用默认字体解析器
converter.processDocument(document);
// 2. Transformer 将 FO Document 序列化为字节流
ByteArrayOutputStream foBaos = new ByteArrayOutputStream();
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "xml"); // 输出 XML-FO
serializer.transform(new DOMSource(foDoc), new StreamResult(foBaos));
byte[] foBytes = foBaos.toByteArray();
// 3. FOP 将 FO 转 PDF(使用带URI参数的newInstance,避免解析问题)
FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); // 标准方式,设置base URI
ByteArrayOutputStream pdfBaos = new ByteArrayOutputStream();
Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, pdfBaos);
Transformer transformer = tf.newTransformer(); // 身份转换器
Source src = new StreamSource(new ByteArrayInputStream(foBytes));
Result res = new SAXResult(fop.getDefaultHandler());
transformer.transform(src, res);
return pdfBaos.toByteArray();
} catch (Exception e) {
throw new IOException("DOC转PDF失败", e);
}
} else {
throw new IOException("不支持的文件类型: " + fileType);
}
}
/**
* PDF转垂直拼接的长图片
*/
private byte[] convertPdfToStitchedImage(byte[] pdfContent) throws IOException {
try (ByteArrayInputStream bais = new ByteArrayInputStream(pdfContent);
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(bais);
PDDocument document = Loader.loadPDF(buffer)) {
PDFRenderer renderer = new PDFRenderer(document);
int numPages = document.getNumberOfPages();
if (numPages == 0) {
return null;
}
List<BufferedImage> pageImages = new ArrayList<>();
int totalHeight = 0;
int maxWidth = 0;
for (int i = 0; i < numPages; i++) {
BufferedImage pageImage = renderer.renderImageWithDPI(i, 150);
pageImages.add(pageImage);
totalHeight += pageImage.getHeight();
maxWidth = Math.max(maxWidth, pageImage.getWidth());
}
BufferedImage stitchedImage = new BufferedImage(maxWidth, totalHeight, BufferedImage.TYPE_INT_RGB);
Graphics2D g2d = stitchedImage.createGraphics();
g2d.setBackground(java.awt.Color.WHITE);
g2d.clearRect(0, 0, maxWidth, totalHeight);
int yOffset = 0;
for (BufferedImage pageImage : pageImages) {
g2d.drawImage(pageImage, 0, yOffset, null);
yOffset += pageImage.getHeight();
}
g2d.dispose();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(stitchedImage, "png", baos);
return baos.toByteArray();
} catch (Exception e) {
throw new IOException("PDF转图片失败", e);
}
}
}