JAVA实现将富文本内容插入已有word文档并下载(dock4j+jsoup)
需求描述:
最近公司项目需要开发一个功能,需要将前端保存的富文本内容和目录插入到已有的word文档模版里,并提供下载功能。参考了很多方法,也踩了一些坑,最后使用dock4j+jsoup实现了;因为图片在富文本里保存的是相当路径,需要使用jsoup将富文本的标签解析出来并处理,dock4j无法直接将HTML的路径图片转换成word,所以需要将图片下载,并转换成base64编码格式。
引用依赖:
此处依赖是针对JDK8的,其实也写了一个JDK11的,提交代码的时候发现编译不通过,才想起公司运行的JDK版本是JDK1.8的。(一定要注意依赖版本)
bash
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-ImportXHTML</artifactId>
<version>8.3.10</version>
<exclusions>
<exclusion>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
</exclusion>
<exclusion>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-JAXB-Internal</artifactId>
<version>8.3.10</version>
<exclusions>
<exclusion>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- 手动指定新版JAXB依赖 -->
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>2.3.8</version>
</dependency>
<dependency>
<groupId>javax.activation</groupId>
<artifactId>activation</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-JAXB-ReferenceImpl</artifactId>
<version>8.3.10</version>
</dependency>
<!-- 其他工具 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
代码实现
bash
private static final Map<String, String> IMAGE_CACHE = new ConcurrentHashMap<>();
private static final ExecutorService IMAGE_EXECUTOR = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2);
public String exportSowToWord(String fileName, HashMap<String, String> param)
throws Exception {
// 1. 批量获取数据
String versionId = param.getOrDefault("versionId", "test");
List<CatalogTressDTO> catalogList = zentaoProSowCatalogMapper.queryTreeMode(versionId);
// 批量获取所有内容
List<String> catalogIds = catalogList.stream()
.map(CatalogTressDTO::getId)
.collect(Collectors.toList());
Map<String, ZentaoProSowContent> contentMap = zentaoProSowContentMapper.selectList(
new LambdaQueryWrapper<ZentaoProSowContent>()
.in(ZentaoProSowContent::getCatalogId, catalogIds))
.stream()
.collect(Collectors.toMap(ZentaoProSowContent::getCatalogId, Function.identity()));
// 2. 构建完整HTML内容
StringBuilder contentHtml = new StringBuilder();
for (CatalogTressDTO catalog : catalogList) {
// 处理标题
if (StringUtils.isNotBlank(catalog.getIndentedTitle())) {
contentHtml.append(buildHeadingTag(catalog));
}
// 处理内容
ZentaoProSowContent content = contentMap.get(catalog.getId());
if (content != null && StringUtils.isNotBlank(content.getContent())) {
contentHtml.append(content.getContent());
}
}
// 3. 统一处理图片和HTML
String fullHtml = "<!DOCTYPE html><html><head><meta charset='UTF-8'></head><body>"
+ contentHtml.toString() + "</body></html>";
String processedHtml = processHtmlWithImages(fullHtml);
// 4. 生成Word文档
ClassPathResource templateResource = new ClassPathResource("templates/sow_V2.0.docx");
WordprocessingMLPackage wordPackage = WordprocessingMLPackage.load(templateResource.getInputStream());
MainDocumentPart mainDoc = wordPackage.getMainDocumentPart();
// 查找插入位置
int insertIndex = findInsertPosition(mainDoc);
// 添加HTML内容
mainDoc.addAltChunk(AltChunkType.Html, processedHtml.getBytes(), mainDoc, insertIndex);
mainDoc.convertAltChunks();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
// 生成目录
generateTableOfContents(wordPackage, insertIndex);
// 保存文档
wordPackage.save(outputStream);
return buildResponse(fileName, outputStream.toByteArray());
}
private String buildHeadingTag(CatalogTressDTO catalog) {
int level = catalog.getLevel() != null ? Math.min(Integer.parseInt(catalog.getLevel()), 6) : 1;
return String.format("<h%d style='mso-style-name:标题%d'>%s</h%d>",
level, level, catalog.getIndentedTitle(), level);
}
private int findInsertPosition(MainDocumentPart mainDoc) {
List<Object> content = mainDoc.getContent();
for (int i = 0; i < content.size(); i++) {
if (content.get(i) instanceof P) {
P p = (P) content.get(i);
String text= TextUtils.getText(p);
if (text != null && text.contains("插入的内容")) {
content.remove(i); // 移除占位符段落
return i+1; // 返回插入位置
}
}
}
return content.size(); // 默认插入到文档末尾
}
private void generateTableOfContents(WordprocessingMLPackage wordPackage, int insertIndex) throws Exception {
TocGenerator tocGenerator = new TocGenerator(wordPackage);
Toc.setTocHeadingText("目录");
tocGenerator.generateToc(insertIndex - 1, "TOC \\o \"1-3\" \\h \\z \\u ", true);
}
private String processHtmlWithImages(String html) {
Document doc = Jsoup.parse(html);
Elements imgs = doc.select("img");
// 并行处理图片
List<CompletableFuture<Void>> futures = imgs.stream()
.map(img -> CompletableFuture.runAsync(() -> processImageTag(img), IMAGE_EXECUTOR))
.collect(Collectors.toList());
// 等待所有任务完成
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
return doc.html();
}
private void processImageTag(Element img) {
try {
String src = img.attr("src");
if (StringUtils.isBlank(src)) return;
String networkUrl = convertToNetworkUrl(src);
String base64 = IMAGE_CACHE.computeIfAbsent(networkUrl, this::fetchImageBase64);
// 异步获取图片尺寸
CompletableFuture<BufferedImage> imageFuture = CompletableFuture.supplyAsync(() -> {
try {
return ImageIO.read(new URL(networkUrl));
} catch (Exception e) {
return null;
}
}, IMAGE_EXECUTOR);
BufferedImage image = imageFuture.get(3, TimeUnit.SECONDS);
if (image != null) {
int scaledWidth = (int) (image.getWidth() * 0.9);
int scaledHeight = (int) (image.getHeight() * 0.9);
img.attr("width", String.valueOf(scaledWidth))
.attr("height", String.valueOf(scaledHeight));
}
img.attr("src", base64);
} catch (Exception e) {
img.attr("src", "#error");
}
}
private String fetchImageBase64(String imageUrl) {
try (InputStream in = new URL(imageUrl).openStream()) {
byte[] bytes = IOUtils.toByteArray(in);
String mimeType = getMimeType(imageUrl);
return "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(bytes);
} catch (Exception e) {
return "#error";
}
}
// 以下为原有工具方法保持不变
private String convertToNetworkUrl(String relativePath) {
//富文本保存的是相对路径
return "http://10.80.88.93:8090/" + relativePath.replaceFirst("^(?:\\.\\./)+", "");
}
private String getMimeType(String url) {
if (url.endsWith(".png")) return "image/png";
if (url.endsWith(".jpg") || url.endsWith(".jpeg")) return "image/jpeg";
if (url.endsWith(".gif")) return "image/gif";
return "application/octet-stream";
}
private String buildResponse(String fileName, byte[] content) throws UnsupportedEncodingException {
//直接返回文件
// String encodeFileName = URLEncoder.encode(fileName, "UTF-8").replace("\\+", "%20");
// HttpHeaders header = new HttpHeaders();
// header.add("Content-Type", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
// header.add("Content-Disposition", "attachment; filename=" + encodeFileName);
// return new ResponseEntity<>(content, header, HttpStatus.OK);
//上传到MINISO
MultipartFile multipartFile = convertByteArrayToMultipartFile(content, fileName);
Result result = fileFeign.addFileByInfo(multipartFile);
String id = ((Map<String, Object>) result.getData()).get("id").toString();
return id;
}
public MultipartFile convertByteArrayToMultipartFile(byte[] fileBytes, String filename) {
return new MultipartFile() {
@Override
public String getName() {
return "file"; // 表单字段名
}
@Override
public String getOriginalFilename() {
return filename;
}
@Override
public String getContentType() {
return "application/octet-stream"; // 默认二进制流,可自定义(如 "image/png")
}
@Override
public boolean isEmpty() {
return fileBytes == null || fileBytes.length == 0;
}
@Override
public long getSize() {
return fileBytes.length;
}
@Override
public byte[] getBytes() throws IOException {
return fileBytes;
}
@Override
public InputStream getInputStream() throws IOException {
return new ByteArrayInputStream(fileBytes);
}
@Override
public void transferTo(File dest) throws IOException, IllegalStateException {
try (FileOutputStream fos = new FileOutputStream(dest)) {
fos.write(fileBytes);
}
}
};
}
}