JAVA实现将富文本内容插入已有word文档并下载(dock4j+jsoup)

JAVA实现将富文本内容插入已有word文档并下载(dock4j+jsoup)

需求描述:

最近公司项目需要开发一个功能,需要将前端保存的富文本内容和目录插入到已有的word文档模版里,并提供下载功能。参考了很多方法,也踩了一些坑,最后使用dock4j+jsoup实现了;因为图片在富文本里保存的是相当路径,需要使用jsoup将富文本的标签解析出来并处理,dock4j无法直接将HTML的路径图片转换成word,所以需要将图片下载,并转换成base64编码格式。

引用依赖:

此处依赖是针对JDK8的,其实也写了一个JDK11的,提交代码的时候发现编译不通过,才想起公司运行的JDK版本是JDK1.8的。(一定要注意依赖版本)

bash 复制代码
 <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-ImportXHTML</artifactId>
            <version>8.3.10</version>
            <exclusions>
                <exclusion>
                    <groupId>com.sun.xml.bind</groupId>
                    <artifactId>jaxb-impl</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>javax.xml.bind</groupId>
                    <artifactId>jaxb-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-JAXB-Internal</artifactId>
            <version>8.3.10</version>
            <exclusions>
                <exclusion>
                    <groupId>com.sun.xml.bind</groupId>
                    <artifactId>jaxb-impl</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
  	 <!-- 手动指定新版JAXB依赖 -->
        <dependency>
            <groupId>javax.xml.bind</groupId>
            <artifactId>jaxb-api</artifactId>
            <version>2.3.1</version>
        </dependency>
        <dependency>
            <groupId>com.sun.xml.bind</groupId>
            <artifactId>jaxb-impl</artifactId>
            <version>2.3.8</version>
        </dependency>
        <dependency>
            <groupId>javax.activation</groupId>
            <artifactId>activation</artifactId>
            <version>1.1.1</version>
        </dependency>
        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-JAXB-ReferenceImpl</artifactId>
            <version>8.3.10</version>
        </dependency>
        <!-- 其他工具 -->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.14.3</version>
        </dependency>

代码实现

bash 复制代码
 private static final Map<String, String> IMAGE_CACHE = new ConcurrentHashMap<>();
    private static final ExecutorService IMAGE_EXECUTOR = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2);


    public String exportSowToWord(String fileName, HashMap<String, String> param)
            throws Exception {

        // 1. 批量获取数据
        String versionId = param.getOrDefault("versionId", "test");
        List<CatalogTressDTO> catalogList = zentaoProSowCatalogMapper.queryTreeMode(versionId);

        // 批量获取所有内容
        List<String> catalogIds = catalogList.stream()
                .map(CatalogTressDTO::getId)
                .collect(Collectors.toList());

        Map<String, ZentaoProSowContent> contentMap = zentaoProSowContentMapper.selectList(
                        new LambdaQueryWrapper<ZentaoProSowContent>()
                                .in(ZentaoProSowContent::getCatalogId, catalogIds))
                .stream()
                .collect(Collectors.toMap(ZentaoProSowContent::getCatalogId, Function.identity()));

        // 2. 构建完整HTML内容
        StringBuilder contentHtml = new StringBuilder();
        for (CatalogTressDTO catalog : catalogList) {
            // 处理标题
            if (StringUtils.isNotBlank(catalog.getIndentedTitle())) {
                contentHtml.append(buildHeadingTag(catalog));
            }

            // 处理内容
            ZentaoProSowContent content = contentMap.get(catalog.getId());
            if (content != null && StringUtils.isNotBlank(content.getContent())) {
                contentHtml.append(content.getContent());
            }
        }

        // 3. 统一处理图片和HTML
        String fullHtml = "<!DOCTYPE html><html><head><meta charset='UTF-8'></head><body>"
                + contentHtml.toString() + "</body></html>";
        String processedHtml = processHtmlWithImages(fullHtml);

        // 4. 生成Word文档
            ClassPathResource templateResource = new ClassPathResource("templates/sow_V2.0.docx");
           WordprocessingMLPackage wordPackage = WordprocessingMLPackage.load(templateResource.getInputStream());

            MainDocumentPart mainDoc = wordPackage.getMainDocumentPart();
            // 查找插入位置
            int insertIndex = findInsertPosition(mainDoc);
            // 添加HTML内容
            mainDoc.addAltChunk(AltChunkType.Html, processedHtml.getBytes(), mainDoc, insertIndex);
            mainDoc.convertAltChunks();
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            // 生成目录
            generateTableOfContents(wordPackage, insertIndex);
            // 保存文档
            wordPackage.save(outputStream);
            return buildResponse(fileName, outputStream.toByteArray());
    }
    private String buildHeadingTag(CatalogTressDTO catalog) {
        int level = catalog.getLevel() != null ? Math.min(Integer.parseInt(catalog.getLevel()), 6) : 1;
        return String.format("<h%d style='mso-style-name:标题%d'>%s</h%d>",
                level, level, catalog.getIndentedTitle(), level);
    }

    private int findInsertPosition(MainDocumentPart mainDoc) {
        List<Object> content = mainDoc.getContent();
        for (int i = 0; i < content.size(); i++) {
            if (content.get(i) instanceof P) {
                P p = (P) content.get(i);
                String text= TextUtils.getText(p);
                if (text != null && text.contains("插入的内容")) {
                    content.remove(i);  // 移除占位符段落
                    return i+1;          // 返回插入位置
                }
            }
        }
        return content.size();  // 默认插入到文档末尾
    }

    private void generateTableOfContents(WordprocessingMLPackage wordPackage, int insertIndex) throws Exception {
        TocGenerator tocGenerator = new TocGenerator(wordPackage);
        Toc.setTocHeadingText("目录");
        tocGenerator.generateToc(insertIndex - 1, "TOC \\o \"1-3\" \\h \\z \\u ", true);
    }

    private String processHtmlWithImages(String html) {
        Document doc = Jsoup.parse(html);
        Elements imgs = doc.select("img");

        // 并行处理图片
        List<CompletableFuture<Void>> futures = imgs.stream()
                .map(img -> CompletableFuture.runAsync(() -> processImageTag(img), IMAGE_EXECUTOR))
                .collect(Collectors.toList());

        // 等待所有任务完成
        CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
        return doc.html();
    }

    private void processImageTag(Element img) {
        try {
            String src = img.attr("src");
            if (StringUtils.isBlank(src)) return;
            String networkUrl = convertToNetworkUrl(src);
            String base64 = IMAGE_CACHE.computeIfAbsent(networkUrl, this::fetchImageBase64);

            // 异步获取图片尺寸
            CompletableFuture<BufferedImage> imageFuture = CompletableFuture.supplyAsync(() -> {
                try {
                    return ImageIO.read(new URL(networkUrl));
                } catch (Exception e) {
                    return null;
                }
            }, IMAGE_EXECUTOR);

            BufferedImage image = imageFuture.get(3, TimeUnit.SECONDS);
            if (image != null) {
                int scaledWidth = (int) (image.getWidth() * 0.9);
                int scaledHeight = (int) (image.getHeight() * 0.9);
                img.attr("width", String.valueOf(scaledWidth))
                        .attr("height", String.valueOf(scaledHeight));
            }
            img.attr("src", base64);
        } catch (Exception e) {
            img.attr("src", "#error");
        }
    }

    private String fetchImageBase64(String imageUrl) {
        try (InputStream in = new URL(imageUrl).openStream()) {
            byte[] bytes = IOUtils.toByteArray(in);
            String mimeType = getMimeType(imageUrl);
            return "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(bytes);
        } catch (Exception e) {
            return "#error";
        }
    }

    // 以下为原有工具方法保持不变
    private String convertToNetworkUrl(String relativePath) {
	//富文本保存的是相对路径
        return "http://10.80.88.93:8090/" + relativePath.replaceFirst("^(?:\\.\\./)+", "");
    }

    private String getMimeType(String url) {
        if (url.endsWith(".png")) return "image/png";
        if (url.endsWith(".jpg") || url.endsWith(".jpeg")) return "image/jpeg";
        if (url.endsWith(".gif")) return "image/gif";
        return "application/octet-stream";
    }

    private String buildResponse(String fileName, byte[] content) throws UnsupportedEncodingException {
        //直接返回文件
//        String encodeFileName = URLEncoder.encode(fileName, "UTF-8").replace("\\+", "%20");
//        HttpHeaders header = new HttpHeaders();
//        header.add("Content-Type", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
//        header.add("Content-Disposition", "attachment; filename=" + encodeFileName);
//        return new ResponseEntity<>(content, header, HttpStatus.OK);
        //上传到MINISO
        MultipartFile multipartFile = convertByteArrayToMultipartFile(content, fileName);
        Result result = fileFeign.addFileByInfo(multipartFile);
        String id = ((Map<String, Object>) result.getData()).get("id").toString();
        return id;
    }
    public MultipartFile convertByteArrayToMultipartFile(byte[] fileBytes, String filename) {
        return new MultipartFile() {
            @Override
            public String getName() {
                return "file"; // 表单字段名
            }

            @Override
            public String getOriginalFilename() {
                return filename;
            }

            @Override
            public String getContentType() {
                return "application/octet-stream"; // 默认二进制流,可自定义(如 "image/png")
            }

            @Override
            public boolean isEmpty() {
                return fileBytes == null || fileBytes.length == 0;
            }

            @Override
            public long getSize() {
                return fileBytes.length;
            }

            @Override
            public byte[] getBytes() throws IOException {
                return fileBytes;
            }

            @Override
            public InputStream getInputStream() throws IOException {
                return new ByteArrayInputStream(fileBytes);
            }

            @Override
            public void transferTo(File dest) throws IOException, IllegalStateException {
                try (FileOutputStream fos = new FileOutputStream(dest)) {
                    fos.write(fileBytes);
                }
            }
        };
    }
}
相关推荐
小小年纪不学好11 分钟前
【60.组合总和】
java·算法·面试
Miku1612 分钟前
基于SpringAI实现简易聊天对话
java·ai编程
凤年徐14 分钟前
【C/C++】深入理解指针(六)
c语言·开发语言·c++·经验分享·笔记·指针
24k小善17 分钟前
FlinkJobmanager深度解析
java·大数据·flink·云计算
forestsea19 分钟前
Maven多模块工程版本管理:flatten-maven-plugin扁平化POM
java·maven
CodeFox20 分钟前
线上 nacos 挂了 !cp 模式下,naming server down 掉问题深度解析!
java·后端·架构
VvUppppp23 分钟前
动态代理与反射
java·后端
学了就忘23 分钟前
JavaScript 异步编程与请求取消全指南
开发语言·javascript·ecmascript
通南北27 分钟前
使用python实现自动化拉取压缩包并处理流程
开发语言·python·自动化
十五年专注C++开发34 分钟前
Qt中的全局函数讲解集合(全)
开发语言·c++·qt·算法