JAVA 有关PDF文件和图片文件合并并生产一个PDF

情景:

1.文件列表包含多个图片和PDF时需要对文件进行合并

2.合并时保持文件顺序

开淦:

一、导入POM

XML 复制代码
  <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.24</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>fontbox</artifactId>
            <version>2.0.24</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>xmpbox</artifactId>
            <version>2.0.24</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox-tools</artifactId>
            <version>2.0.24</version>
        </dependency>

二、Java 代码

java 复制代码
package com.aisino.datadocking;

import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

/**
 * @description: ImgPDF
 * @author: Stiven
 * @create: 2023-12-20 16:57
 **/
public class MergeImgPDFUntil {
    public static void main(String[] args) throws Exception {
        String[] urls={"http://+IP+wKgFOGV7uYuAMWldAAMsi8rvZ3Y062.jpg"
                ,"http:+IP+/group1/M01/03/76/wKgFOGWKOA6AZ5i-eecoQ405.pdf"
                ,"http://+IP+/group1/M01/02/FF/wKgFOGV7ubyAb6q3AATSEcwOiu8024.jpg"
           };
        //文件临时存储位置(必须) 同时需要定时清理
        String target = "/Users/stiven/IdeaProjects/tmp/";
        List<String> fileList = new ArrayList<>();
        for(String url:urls){
            fileList.add(downloadFile(url,target));
        }
        // 执行合并
        mergePDFAndImages(fileList,target);
    }

    /**
     * 文件和图片同时合并
     * @param FileList     需要合并的文件地址list
     * @param mergedFilePath  合并后文件存储位置
     * @throws Exception
     */
    public static String mergePDFAndImages( List<String> FileList, String mergedFilePath) throws Exception {
// 创建一个 PDFMergerUtility 对象
        PDFMergerUtility merger = new PDFMergerUtility();
        for (String filePath : FileList) {
            String extension = "";
            int dotIndex = filePath.lastIndexOf(".");
            if (dotIndex > 0 && dotIndex < filePath.length() - 1) {
                extension = filePath.substring(dotIndex + 1).toLowerCase();
            }
            if(extension.equals("pdf")){
                merger.addSource(new File(filePath));
            }else if(extension.equals("png")||extension.equals("jpg")||extension.equals("jpeg")){
                merger.addSource(convertImageToPdf(new File(filePath),mergedFilePath));
            }
        }
        String mergeTmpFilePath=mergedFilePath+UUID.randomUUID().toString() +"_merged.pdf";
        // 合并所有文件并写入指定文件
        merger.setDestinationFileName(mergeTmpFilePath);
        merger.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly());
        return mergeTmpFilePath;
    }
    /**
     * 图片转pdf
     * @param imageFile
     * @return
     * @throws IOException
     */
    private static String convertImageToPdf(File imageFile,String mergedFilePath) throws IOException {
        //图片临时记录pdf文件存储,未做删除。
        String tempPdfFilename = mergedFilePath+UUID.randomUUID().toString() + ".pdf";
        BufferedImage image = ImageIO.read(imageFile);
        float width = image.getWidth();
        float height = image.getHeight();
        PDDocument document = new PDDocument();
        PDPage page = new PDPage();
        document.addPage(page);
        try (FileInputStream fis = new FileInputStream(imageFile)) {
            PDImageXObject pdImage = LosslessFactory.createFromImage(document, ImageIO.read(fis));
            float pageWidth = page.getMediaBox().getWidth();
            float pageHeight = page.getMediaBox().getHeight();
           //图片高宽自适应pdf
            if (width > pageWidth || height > pageHeight) {
                float scale = Math.min(pageWidth / width, pageHeight / height);
                float scaledWidth = width * scale;
                float scaledHeight = height * scale;
                float x = (pageWidth - scaledWidth) / 2;
                float y = (pageHeight - scaledHeight) / 2;
                page.setCropBox(new PDPage().getMediaBox());
                page.setMediaBox(new PDPage().getMediaBox());
                page.setBleedBox(new PDPage().getMediaBox());
                page.setTrimBox(new PDPage().getMediaBox());
                page.setArtBox(new PDPage().getMediaBox());
                PDPageContentStream contentStream = new PDPageContentStream(document, page);
                contentStream.drawImage(pdImage, x, y, scaledWidth, scaledHeight);
                contentStream.close();
            } else {
                PDPageContentStream contentStream = new PDPageContentStream(document, page);
                contentStream.drawImage(pdImage, 0, 0);
                contentStream.close();
            }
        }
        document.save(tempPdfFilename);
        document.close();
        return tempPdfFilename;
    }

    /**
     * 文件下载
     * @param fileUrl
     * @param targetDirectory
     * @throws IOException
     */
    public static String  downloadFile(String fileUrl, String targetDirectory) throws IOException {
        URL url = new URL(fileUrl);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        String filePath="";
        try (InputStream inputStream = new BufferedInputStream(connection.getInputStream())) {
            String fileName = getFileNameFromUrl(fileUrl);
             filePath = targetDirectory + fileName;

            try (FileOutputStream outputStream = new FileOutputStream(filePath)) {
                byte[] buffer = new byte[1024];
                int bytesRead;
                while ((bytesRead = inputStream.read(buffer)) != -1) {
                    outputStream.write(buffer, 0, bytesRead);
                }
            }
        }

        connection.disconnect();
        return filePath;
    }

    /**
     * 获取文件名称
     * @param fileUrl
     * @return
     */
    public static String getFileNameFromUrl(String fileUrl) {
        int lastIndexOfSlash = fileUrl.lastIndexOf("/");
        if (lastIndexOfSlash != -1 && lastIndexOfSlash < fileUrl.length() - 1) {
            return fileUrl.substring(lastIndexOfSlash + 1);
        }
        return "";
    }
    }
相关推荐
小奥超人10 小时前
PDF无法打印!怎么办?
windows·经验分享·pdf·办公技巧·pdf加密解密
m0_748241231 天前
ElasticPDF-新国产 PDF 编辑器开发框架(基于 pdf.js Web PDF批注开发,实现高亮多边形橡皮擦历史记录保存注释文字)
前端·pdf·编辑器
ComPDFKit1 天前
开源 JS PDF 库比较
pdf
杨浦老苏1 天前
开源PDF翻译工具PDFMathTranslate
人工智能·docker·ai·pdf·群晖·翻译
LostSpeed1 天前
在福昕(pdf)阅读器中导航到上次阅读页面的方法
pdf
旭久1 天前
SpringBoot的Thymeleaf做一个可自定义合并td的pdf表格
pdf·html·springboot
神色自若2 天前
Net9为PDF文字替换,使用Spire.PDF版本10.12.4.1360
pdf
机器懒得学习2 天前
解析交通事故报告:利用 PDF、AI 与数据标准化技术构建智能分析系统
pdf
合合技术团队3 天前
高效准确的PDF解析工具,赋能企业非结构化数据治理
人工智能·科技·pdf·aigc·文档
jingling5553 天前
如何使用免费资源--知网篇
开发语言·经验分享·搜索引擎·pdf·开源