Html to Word(docx) 自定义样式解析,可以的话点一点赞哦!😯
ok,多了不说,少了不唠,忙坏了。遇到了一个界面样式1:1复刻导出到docx的需求,寻求无果只能jsoup+poi手搓。
1. 引用的依赖(只关注核心依赖)
implementation("org.jsoup:jsoup:1.15.+")
implementation("org.apache.poi:poi-ooxml:5.+")
具体版本如图:

2. 思想,使用jsoup解析每一个标签,然后获取数据标签,根据标签去进行每个类型的数据处理,当当当当,直接看代码:
title, p, img, h1, h2, h3, h4, h5, h6, br, table (因为目前只用了这些,多了不想写)
2.1 转换入口方法
java
public Mono<ResponseEntity<Resource>> htmlToWord(ExportHtmlWord htmlWord) {
XWPFDocument document = new XWPFDocument();
// 设置页边距,数字单位厘米,自动换算
HTWConverter.pageMar(document,
HTWConverter.pageMarConvert(2.8),
HTWConverter.pageMarConvert(2.6),
HTWConverter.pageMarConvert(3.7),
HTWConverter.pageMarConvert(3.5));
// 解析html内容并写道文档
HTWConverter.processHtmlContent(document, htmlWord.getContent());
if (!ObjectUtils.isEmpty(htmlWord.getIsFooter()) && htmlWord.getIsFooter()) {
// 设置页脚
HTWConverter.setFooter(document, "宋体", 14, null, false, "- ", " -");
}
if (!StringUtils.hasLength(htmlWord.getTitle())) {
htmlWord.setTitle("未命名");
}
return Mono.just(ResponseEntity.ok().contentType(MediaType.APPLICATION_OCTET_STREAM)
.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" +
URLEncoder.encode((htmlWord.getTitle().contains(".docx") ? htmlWord.getTitle()
: htmlWord.getTitle() + ".docx"), StandardCharsets.UTF_8) + "\"")
.body(HTWConverter.readDoc(document)));
}
2.1.1 接参类
java
/**
* @author <a href="https://github.com/motcs">motcs</a>
* @since 2026-04-03 星期五
*/
@Data
@Schema(title = "html转word|ppt")
public class ExportHtmlWord implements Serializable {
@Schema(description = "导出后文件名")
private String title;
@NotBlank(message = "导出的内容[content]不能为空!")
@Schema(description = "需要导出的内容")
private String content;
@Schema(description = "是否设置页脚")
private Boolean isFooter;
}
2.2 转换入口方法
java
/**
* 解析数据 - 修复版
*
* @param document 插入的文档
* @param htmlContent 需要解析的内容
*/
public static void processHtmlContent(XWPFDocument document, String htmlContent) {
Document doc = Jsoup.parse(htmlContent);
Elements elements = doc.select("title, p, img, h1, h2, h3, h4, h5, h6, br, table");
for (Element element : elements) {
if (element.tagName().equals("title")) {
addParagraphTitleLabel(document, element);
} else if (element.tagName().equals("br")) {
XWPFParagraph paragraph = document.createParagraph();
XWPFRun run = paragraph.createRun();
run.setText("");
} else if (element.tagName().equals("p")) {
addParagraphPLabel(document, element);
} else if (element.tagName().equals("img")) {
if (element.attr("src").contains("data:image/")) {
try {
addImageBase64ToDocument(document, ImageUrl.builder()
.style(element.attr("style"))
.url(element.attr("src")).build());
} catch (Exception e) {
log.info("解析不了图片:{}", e.getMessage());
}
} else {
addImageToDocument(document, ImageUrl.builder()
.style(element.attr("style"))
.url(element.attr("src")).build());
}
} else if (isHeadingTag(element.tagName())) {
addParagraphH1ToDocument(document, element.tagName(), element.text());
} else if (element.tagName().equals("table")) {
addParagraphTableToDocument(document, element);
} else if (element.tagName().equals("ul") || element.tagName().equals("ol")) {
addParagraphUlToDocument(document, element.tagName(), element);
} else {
addParagraphPLabel3(document, element);
}
}
}
2.3 每个标签使用的解析方法,一行代码都没删除,直接就可以用,不过需要根据自己业务内容进行调测
java
public static void addParagraphUlToDocument(XWPFDocument document, String tagName, Element element) {
element.select("li").forEach(li -> {
XWPFParagraph paragraph = document.createParagraph();
XWPFRun run = paragraph.createRun();
if (tagName.equals("ul")) {
paragraph.setStyle("ListParagraph");
run.setText("○ " + li.text());
} else {
paragraph.setStyle("ListParagraph");
run.setText("● " + li.text());
}
run.setFontFamily("黑体");
run.setFontSize(FontSizeEnum.z5.getFontSize());
});
}
public static boolean isHeadingTag(String tagName) {
return Pattern.matches("h[1-6]", tagName.toLowerCase());
}
/**
* 插入正文,字体 仿宋GB2312 字号: 三号
*
* @param document 文档
* @param text 正文内容
*/
public static void addParagraphPLabel(XWPFDocument document, String text) {
XWPFParagraph paragraph = document.createParagraph();
XWPFRun run = paragraph.createRun();
run.setText(text);
run.setFontFamily("仿宋GB2312");
run.setFontSize(FontSizeEnum.z3.getFontSize());
Double fontSizeAsDouble = run.getFontSizeAsDouble();
paragraph.setIndentationFirstLine(ObjectUtils.isEmpty(fontSizeAsDouble) ?
420 : fontSizeAsDouble.intValue() * 2 * 20);
}
public static void addParagraphTitleLabel(XWPFDocument document, Element text) {
XWPFParagraph paragraph = document.createParagraph();
paragraph.setAlignment(CENTER);
XWPFRun run = paragraph.createRun();
// 提取样式信息
run.setFontFamily("小标宋");
run.setFontSize(FontSizeEnum.z2.getFontSize());
run.setText(text.text());
}
/**
* 正文段落p标签的添加
*
* @param document 文档
* @param text 添加的内容
*/
public static void addParagraphPLabel(XWPFDocument document, Element text) {
XWPFParagraph paragraph = document.createParagraph();
if (!ObjectUtils.isEmpty(text) && StringUtils.hasLength(text.text())) {
setTextElement(paragraph, text, FontSizeEnum.z2.getFontSize());
}
}
public static void addParagraphPLabel3(XWPFDocument document, Element text) {
XWPFParagraph paragraph = document.createParagraph();
if (!ObjectUtils.isEmpty(text) && StringUtils.hasLength(text.text())) {
setTextElement(paragraph, text, FontSizeEnum.z3.getFontSize());
}
}
/**
* 设置标题
*
* @param document 文档
* @param text 标题
*/
public static void addParagraphH1ToDocument(XWPFDocument document, String tagName, String text) {
XWPFParagraph paragraph = document.createParagraph();
XWPFRun run = paragraph.createRun();
run.setText(text);
Matcher matcher = Pattern.compile("h([1-6])").matcher(tagName);
if (matcher.find()) {
int number = Integer.parseInt(matcher.group(1));
if (number == 1) {
run.setFontSize(FontSizeEnum.z1.getFontSize());
} else if (number == 2) {
run.setFontSize(FontSizeEnum.z2.getFontSize());
} else if (number == 3) {
run.setFontSize(FontSizeEnum.z3.getFontSize());
} else if (number == 4) {
run.setFontSize(FontSizeEnum.z4.getFontSize());
} else if (number == 5) {
run.setFontSize(FontSizeEnum.z5.getFontSize());
} else if (number == 6) {
run.setFontSize(FontSizeEnum.z6.getFontSize());
} else {
run.setFontSize(FontSizeEnum.z3.getFontSize());
}
} else {
run.setFontSize(FontSizeEnum.z3.getFontSize());
}
run.setFontFamily("黑体");
}
/**
* 文档增加图片base64
*
* @param document 文档
* @param image1 图片信息
*/
public static void addImageBase64ToDocument(XWPFDocument document, ImageUrl image1) {
String base64Data = image1.getUrl().split(",")[1];
byte[] imageData = Base64.getDecoder().decode(base64Data);
XWPFParagraph paragraph = document.createParagraph();
paragraph.setAlignment(ParagraphAlignment.CENTER);
XWPFRun run = paragraph.createRun();
try (ByteArrayInputStream bis = new ByteArrayInputStream(imageData)) {
// 自动识别图片类型
PictureType type = PictureType.PNG;
try {
BufferedImage bufferedImage = ImageIO.read(new ByteArrayInputStream(imageData));
type = getImageType(bufferedImage);
} catch (Exception ignored) {
}
run.addPicture(bis, type, "base64-image",
Units.pixelToEMU((int) image1.getWidth() * 50),
Units.pixelToEMU((int) image1.getHeight() * 50));
} catch (Exception e) {
log.error("Base64图片插入失败", e);
run.setText("图片加载失败");
}
}
/**
* 文档增加图片url
*
* @param document 文档
* @param image1 图片信息
*/
public static void addImageToDocument(XWPFDocument document, ImageUrl image1) {
String imageUrl = image1.getUrl();
try (InputStream urlStream = URI.create(imageUrl).toURL().openStream();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
// 读取图片到内存
byte[] buffer = new byte[1024];
int len;
while ((len = urlStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, len);
}
byte[] imageBytes = outputStream.toByteArray();
// 自动识别真实图片格式
BufferedImage bufferedImage = ImageIO.read(new ByteArrayInputStream(imageBytes));
PictureType pictureType = getImageType(bufferedImage);
String ext = getExtensionByType(pictureType);
// 创建段落插入图片
XWPFParagraph paragraph = document.createParagraph();
paragraph.setAlignment(ParagraphAlignment.CENTER);
XWPFRun run = paragraph.createRun();
try (InputStream is = new ByteArrayInputStream(imageBytes)) {
run.addPicture(is, pictureType,
"image." + ext,
Units.pixelToEMU((int) image1.getWidth() * 50),
Units.pixelToEMU((int) image1.getHeight() * 50)
);
}
} catch (Exception e) {
log.error("下载/插入图片失败 URL:{}", imageUrl, e);
addParagraphPLabel(document, "图片加载失败:" + imageUrl);
}
}
/**
* 获取图片后缀
*/
private static String getExtensionByType(PictureType type) {
return switch (type) {
case PNG -> "png";
case GIF -> "gif";
case BMP -> "bmp";
default -> "jpg";
};
}
/**
* 根据BufferedImage自动识别图片类型
*/
private static PictureType getImageType(BufferedImage image) {
if (image == null) return PictureType.PNG;
if (image.getColorModel().hasAlpha()) return PictureType.PNG;
return PictureType.JPEG;
}
/**
* 解析table标签
*
* @param document 文档
* @param element 需要解析的表格信息
*/
public static void addParagraphTableToDocument(XWPFDocument document, Element element) {
int tr = element.select("tr").size();
int trTh = 0;
for (Element trThe : element.select("tr th")) {
if (trThe.hasAttr("colspan")) {
trTh += Integer.parseInt(trThe.attr("colspan"));
} else {
trTh++;
}
}
XWPFTable wordTable = document.createTable(tr, trTh);
wordTable.setWidth("100%");
Elements trAll = element.select("tr");
int rowIndex = 0;
for (Element tbody : trAll) {
int i = 0;
XWPFTableRow row = wordTable.getRow(rowIndex);
row.setHeight(0);
if (rowIndex == 0) {
for (Element th : tbody.select("th")) {
XWPFTableCell cell = row.getCell(i);
if (ObjectUtils.isEmpty(cell)) {
cell = row.createCell();
}
setCellStylesCentered(cell);
// 检查是否存在colspan属性,并获取其值
i = horizontalMergeTable(wordTable, rowIndex, i, th);
setTextElement(cell.getParagraphs().getFirst(), th, FontSizeEnum.z3.getFontSize());
}
} else {
for (Element td : tbody.select("td")) {
XWPFTableCell cell = row.getCell(i);
if (ObjectUtils.isEmpty(cell)) {
cell = row.createCell();
}
setCellStylesCentered(cell);
// 检查是否存在colspan属性,并获取其值
i = horizontalMergeTable(wordTable, rowIndex, i, td);
setTextElement(cell.getParagraphs().getFirst(), td, FontSizeEnum.z3.getFontSize());
}
}
rowIndex++;
}
}
/**
* 判断当前td或th是否需要合并
*
* @param wordTable 操作的表格
* @param rowIndex 操作的行
* @param i 从第几列开始
* @param td 需要判断的标签内容
* @return 返回合并后下一个需要插入值的单元格位置
*/
private static int horizontalMergeTable(XWPFTable wordTable, int rowIndex, int i, Element td) {
if (td.hasAttr("colspan")) {
int colspan = Integer.parseInt(td.attr("colspan"));
if (colspan > 1) {
// 水平合并单元格
mergeCellsHorizontal(wordTable, rowIndex, i, i + colspan - 1);
i += colspan;
}
} else {
i++;
}
return i;
}
/**
* @param document 文档
* @param fontFamily 字体
* @param fontSize 字体大小
* @param color 字体颜色
* @param bold 是否加粗
* @param prefix 页码前缀 如 "第 1 页" 的 "第"
* @param suffix 页码后缀 如 "第 1 页" 的 "页"
*/
public static void setFooter(XWPFDocument document, String fontFamily,
int fontSize, String color, boolean bold, String prefix, String suffix) {
XWPFFooter footer = document.createFooter(HeaderFooterType.DEFAULT);
//创建一个新的XWPFFooter对象,HeaderFooterType.DEFAULT表示所有页
XWPFParagraph paragraph = footer.createParagraph();
//创建新的XWPFParagraph对象
paragraph.setAlignment(ParagraphAlignment.CENTER);
appendPageNumber(paragraph, fontFamily, fontSize, color, bold, prefix, suffix);
}
/**
* 段落后拼接页码
*
* @param paragraph 段落
* @param fontFamily 字体
* @param fontSize 字体大小
* @param color 字体颜色
* @param bold 是否加粗
* @param prefix 页码前缀 如 "第 1 页" 的 "第"
* @param suffix 页码后缀 如 "第 1 页" 的 "页"
*/
public static void appendPageNumber(XWPFParagraph paragraph, String fontFamily, int fontSize,
String color, boolean bold, String prefix, String suffix) {
if (ObjectUtils.isEmpty(color)) {
color = "000000";
}
setStyle(paragraph.createRun(), fontFamily, fontSize, bold, prefix, color);
CTFldChar fldChar = paragraph.createRun().getCTR().addNewFldChar();
fldChar.setFldCharType(STFldCharType.Enum.forString("begin"));
XWPFRun numberRun = paragraph.createRun();
CTText ctText = numberRun.getCTR().addNewInstrText();
ctText.setStringValue("PAGE \\* MERGEFORMAT");
ctText.setSpace(SpaceAttribute.Space.Enum.forString("preserve"));
setStyle(numberRun, fontFamily, fontSize, bold, null, color);
fldChar = paragraph.createRun().getCTR().addNewFldChar();
fldChar.setFldCharType(STFldCharType.Enum.forString("end"));
setStyle(paragraph.createRun(), fontFamily, fontSize, bold, suffix, color);
}
/**
* 设置样式
*
* @param fontFamily 字体
* @param fontSize 字体大小
* @param bold 是否加粗
* @param text 设置的文本
* @param color 字体颜色
*/
public static void setStyle(XWPFRun run, String fontFamily, int fontSize, boolean bold, String text, String color) {
run.setBold(bold);
run.setFontFamily(fontFamily);
run.setFontSize(fontSize);
if (StringUtils.hasLength(text)) {
run.setText(text);
} //颜色默认黑色
run.setColor(StringUtils.hasLength(color) ? color : "000000");
}
/**
* 表格水平合并单元格
*
* @param table 表格
* @param row 第几行需要合并
* @param fromCell 从第几列开始
* @param toCell 合并几列 1 -> 2 合并时 传 1代表向后合并一个单元格
*/
public static void mergeCellsHorizontal(XWPFTable table, int row, int fromCell, int toCell) {
for (int cellIndex = fromCell; cellIndex <= toCell; cellIndex++) {
XWPFTableCell cell = table.getRow(row).getCell(cellIndex);
if (cellIndex == fromCell) {
cell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.RESTART);
} else {
cell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.CONTINUE);
}
}
}
/**
* 单元格设置居中
*
* @param cell 单元格
*/
public static void setCellStylesCentered(XWPFTableCell cell) {
XWPFParagraph paragraph = cell.getParagraphs().getFirst();
paragraph.setAlignment(ParagraphAlignment.CENTER);
CTTcPr tcPr = cell.getCTTc().getTcPr();
if (tcPr == null) {
tcPr = cell.getCTTc().addNewTcPr();
}
CTVerticalJc vAlign = tcPr.addNewVAlign();
vAlign.setVal(STVerticalJc.CENTER);
}
/**
* 设置段落的内容
*
* @param paragraph 需要设置的段落
* @param text 需要设置的解析文本
* @param defaultFontSize 默认的字体大小
*/
public static void setTextElement(XWPFParagraph paragraph, Element text, double defaultFontSize) {
XWPFRun run = paragraph.createRun();
// 两个空格转成七个空格缩进两个字符 一个空格换成4个适配wps,注意office不适配
String replace = text.html().replace(" ", "\u00A0");
String style = text.attr("style");
boolean fontFamilyBool = true;
boolean fontSizeBool = true;
if (StringUtils.hasLength(style)) {
String[] strings = style.split(";");
for (String string : strings) {
log.debug("开始解析样式信息:{}", string);
String[] strings1 = string.split(":");
if (strings1.length != 2) {
continue;
}
// 获取段落的字体样式
if (strings1[0].contains("font-family")) {
run.setFontFamily(strings1[1].replace("\"", ""));
fontFamilyBool = false;
}
// 获取段落的字体是否加粗
else if (strings1[0].contains("font-weight")) {
run.setBold(true);
}
// 获取段落的字体大小
else if (strings1[0].contains("font-size")) {
String s = strings1[1].replace("px", "");
if (StringUtils.hasLength(s)) {
run.setFontSize(Math.round(Double.parseDouble(s) * 0.75));
fontSizeBool = false;
}
}
// 获取段落的字体颜色
else if (strings1[0].contains("color")) {
String color = strings1[1].trim().replace("#", "");
run.setColor(color);
}
// 获取段落的排列规则 居中,居左 居右
else if (strings1[0].contains("text-align")) {
if (strings1[1].contains("center")) {
paragraph.setAlignment(CENTER);
} else if (strings1[1].contains("left")) {
paragraph.setAlignment(LEFT);
} else if (strings1[1].contains("right")) {
paragraph.setAlignment(RIGHT);
}
}
}
}
run.setText(HtmlTagRegular.removeHtmlTag(replace));
if (fontFamilyBool) {
run.setFontFamily("华文仿宋");
}
if (fontSizeBool) {
run.setFontSize(defaultFontSize);
}
}
2.4 设置页边距
java
/**
* 设置文档的页边距 单位567 为1厘米
*
* @param document 文档
* @param left 左边距
* @param right 右边距
* @param top 上边距
* @param bottom 下边距
*/
public static void pageMar(XWPFDocument document, long left, long right, long top, long bottom) {
CTPageMar pageMar = document.getDocument().getBody().addNewSectPr().addNewPgMar();
pageMar.setLeft(BigInteger.valueOf(left));
pageMar.setRight(BigInteger.valueOf(right));
pageMar.setTop(BigInteger.valueOf(top));
pageMar.setBottom(BigInteger.valueOf(bottom));
}
2.4.1 页边距设置示例
java
XWPFDocument document = new XWPFDocument();
// 设置页边距,数字单位厘米,自动换算
pageMar(document,
HTWConverter.pageMarConvert(2.8),
HTWConverter.pageMarConvert(2.6),
HTWConverter.pageMarConvert(3.7),
HTWConverter.pageMarConvert(3.5));
2.5 读取文档为Resource
java
/**
* 读取文档
*
* @param document 写好的文档
* @return 返回文档的字节码
*/
public static Resource readDoc(XWPFDocument document) {
try {
DefaultDataBuffer dataBuffer = new DefaultDataBufferFactory()
.allocateBuffer(1024);
document.write(dataBuffer.asOutputStream());
return new ByteArrayResource(dataBuffer.asInputStream().readAllBytes());
} catch (IOException e) {
log.info("文档转换流失败,读取字节发生错误:{}", e.getMessage());
throw RestServerException.withMsg("文档转换流失败,读取字节发生错误:" + e.getMessage());
}
}
ok,下班,回家猛攻。😯