在实际项目中,我们经常需要把 Markdown 内容(比如技术文档、API 接口文档、产品需求文档)批量导出为正式的 Word 和 PDF 文件,交给领导、客户或存档。本文基于目前最稳定、最常用的两套开源方案,结合破解版 Aspose.Words(仅供学习交流),实现了从 Markdown → 完美排版的 Word → 完美 PDF的全链路自动化,并且重点解决了中文表格乱码、字体缺失、标题加粗等问题。
一:使用 Flexmark-java (推荐,纯 Java)
这是最直接、代码量最少且效果最好的纯 Java 方案。Flexmark 有一个专门的模块 flexmark-docx-converter,可以直接将 Markdown 语法树转换为 Word 文档,无需中间转 HTML。
1. 引入 Maven 依赖
请确保版本兼容 Java 8。Flexmark 0.62.2 是支持 Java 8 的稳定版本。
java
<dependencies>
<!-- 核心解析库 -->
<dependency>
<groupId>com.vladsch.flexmark</groupId>
<artifactId>flexmark-all</artifactId>
<version>0.62.2</version>
</dependency>
<!-- Docx 转换模块 -->
<dependency>
<groupId>com.vladsch.flexmark</groupId>
<artifactId>flexmark-docx-converter</artifactId>
<version>0.62.2</version>
</dependency>
</dependencies>
2. Java 代码实现
java
import com.vladsch.flexmark.docx.converter.DocxRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.ast.Node;
import com.vladsch.flexmark.util.data.MutableDataSet;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import java.io.File;
import java.io.IOException;
public class MdToDocxExample {
public static void main(String[] args) {
String markdown = "# 标题1\n" +
"## 标题2\n" +
"这是一段普通的文本。\n" +
"- 列表项 1\n" +
"- **加粗文本**\n" +
"\n" +
"| 表头1 | 表头2 |\n" +
"| --- | --- |\n" +
"| 内容1 | 内容2 |";
String outputPath = "output.docx";
convertMarkdownToDocx(markdown, outputPath);
}
public static void convertMarkdownToDocx(String markdown, String outputPath) {
// 1. 配置 Flexmark 选项(支持表格等扩展)
MutableDataSet options = new MutableDataSet();
// 2. 创建 Parser 和 Renderer
Parser parser = Parser.builder(options).build();
DocxRenderer renderer = DocxRenderer.builder(options).build();
// 3. 解析 Markdown 为抽象语法树 (AST)
Node document = parser.parse(markdown);
// 4. 渲染为 WordprocessingMLPackage (Docx4j 的核心对象)
WordprocessingMLPackage template = DocxRenderer.getDefaultTemplate();
renderer.render(document, template);
// 5. 保存文件
try {
template.save(new File(outputPath));
System.out.println("转换成功,文件路径:" + outputPath);
} catch (Docx4JException e) {
e.printStackTrace();
}
}
}
二、使用Docx4j实现Word转PDF并解决中文字体乱码问题
在企业级应用开发中,将 Word 文档(doc/docx)转换为 PDF 是一个非常常见的需求。虽然市面上有 OpenOffice、Aspose 等方案,但 Docx4j 作为一款开源、基于 JAXB 的 Java 库,因其无需安装外部软件且完全可控的特性,成为了很多 Java 8 项目的首选。
然而,Docx4j 在转 PDF 时最头疼的问题莫过于 中文字体支持。如果服务器缺少对应字体,生成的 PDF 往往会变成乱码或"方块字"。
1. 核心依赖
首先,我们需要在 Maven 中引入 Docx4j 的核心包以及导出 PDF 的扩展包(Docx4j 8.x 版本适配 Java 8)。
java
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-local</artifactId>
<version>1.1.7</version>
</dependency>
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-transformer-msoffice-word</artifactId>
<version>1.1.7</version>
</dependency>
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId>
<version>15.11.0</version>
</dependency>
<dependency>
<groupId>net.coobird</groupId>
<artifactId>thumbnailator</artifactId>
<version>0.4.13</version>
</dependency>
2、doc转换为pdf
java
file:
path: /data/tianjin/web/file/dzdt_admin/report/
font: C:\Windows\Fonts
@Value("${file.font}")
private String font;
@Value("${file.path}")
private String path;
//WordToPdf("202601291817.doc", new File("\\data\\tianjin\\web\\file\\dzdt_admin\\report\\202601291817.doc"));
//word转pdf
public File WordToPdf(String fileName,File file) {
File newfile = new File(path+fileName);
wordFonts(file,fileName);
String name =fileName;
fileName = name.replace("docx","pdf");
fileName = fileName.replace("doc","pdf");
File outputFile = new File(path+fileName);
try {
InputStream docxInputStream = new FileInputStream(newfile);
OutputStream outputStream = new FileOutputStream(outputFile);
downloadPdf(outputStream,docxInputStream);
outputStream.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return outputFile;
}
public void wordFonts(File file,String fileName) {
WordprocessingMLPackage wPackage = null;
try {
wPackage = WordprocessingMLPackage.load(new FileInputStream(file));
} catch (Docx4JException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
MainDocumentPart document = wPackage.getMainDocumentPart();
//获取文档中的标签
List<Object> content = document.getContent();
List<Object> table = new ArrayList<>();
//修改表格中字体
ClassFinder classFinder = new ClassFinder(Tbl.class);
new TraversalUtil(document.getContent(),classFinder);
List<Object> tableList = classFinder.results;
for(Object object:tableList){
Tbl tbl = (Tbl) object;
List<Object> pContent = tbl.getContent();
for(Object objects:pContent){
if(objects instanceof Tr){
Tr r = (Tr) objects;
List<Object> content1 = r.getContent();
for(Object content2:content1){
Tc c = (Tc) ((JAXBElement<?>) content2).getValue();
List<Object> content3 = c.getContent();
for(Object content4:content3){
P p = (P) content4;
List<Object> content5 = p.getContent();
for(Object content6:content5){
if(content6 instanceof R){
R rs = (R) content6;
RPr rPr = rs.getRPr();
if(rPr != null){
setFont(rPr);
}
}
}
}
}
}
// else{
// CTMarkupRange ctMarkupRange = (CTMarkupRange) ((JAXBElement<?>) objects).getValue();
//// Tc c = (Tc)
// Tbl tbs = (Tbl)ctMarkupRange.getParent();
// List<Object> content3 = tbs.getContent();
// for(Object content4:content3){
// Tr r = (Tr) content4;
// List<Object> content41 = r.getContent();
// P p = (P) content4;
// List<Object> content5 = p.getContent();
// for(Object content6:content5){
// if(content6 instanceof R){
// R rs = (R) content6;
// RPr rPr = rs.getRPr();
// if(rPr != null){
// setFont(rPr);
// }
// }
//
// }
// }
// }
}
}
//修改正文和标题
int i=0;
for(Object object:content){
i++;
P p;
if(object instanceof P){
p = (P)object;
}else{
continue;
}
//获取 p 标签的内容,即它的孩子标签
List<Object> pContent = p.getContent();
if(pContent.size() == 0){
continue;
}
for(Object objects:pContent){
R r;
if(objects instanceof R){
r = (R)objects;
//获取 r 的属性
RPr rPr = r.getRPr();
if(i<=7){
if(rPr != null){
rPr.setB(new BooleanDefaultTrue());
}
}
if(rPr != null){
setFont(rPr);
}
//设置字体大小
}else{
continue;
}
}
}
try {
wPackage.save(new File(path+fileName));
} catch (Docx4JException e) {
e.printStackTrace();
}
}
public static void setFont( RPr rPr){
//设置字体
RFonts rFonts = new RFonts();
rFonts.setEastAsia("新宋体");
rFonts.setAscii("新宋体");
rFonts.setHAnsi("新宋体");
rFonts.setCs("新宋体");
rPr.setRFonts(rFonts);
}
public void downloadPdf(OutputStream outputStream, InputStream inputStream) {
try {
getLicense();
// OsInfo osInfo = SystemUtil.getOsInfo();
FontSettings.setFontsFolder(font, false);
FontSettings.setDefaultFontName("宋体");
Document doc = new Document(inputStream);
doc.save(outputStream, SaveFormat.PDF);
byte[] buf = new byte[1024];
int length = 0;
while ((length = inputStream.read(buf)) != -1) {
//将输入流写入到输出流
outputStream.write(buf, 0, length);
}
}catch (Exception e) {
log.error("*********");
log.error(e.getMessage());
} finally {
try {
if (inputStream != null) {
inputStream.close();
}
if (outputStream != null) {
outputStream.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
private static boolean getLicense() {
boolean result = false;
try {
// 凭证
String licenseStr =
"<License> " +
" <Data> " +
" <Products> " +
" <Product>Aspose.Total for Java</Product> " +
" <Product>Aspose.Words for Java</Product> " +
" </Products> " +
" <EditionType>Enterprise</EditionType> " +
" <SubscriptionExpiry>20991231</SubscriptionExpiry> " +
" <LicenseExpiry>20991231</LicenseExpiry> " +
" <SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber> " +
" </Data> " +
" <Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature> " +
"</License>";
InputStream license = new ByteArrayInputStream(licenseStr.getBytes("UTF-8"));
License asposeLic = new License();
asposeLic.setLicense(license);
result = true;
} catch (Exception e) {
log.error(e.getMessage());
}
return result;
}