推荐两种方式:
方式一:使用spire.doc框架(非常简单,但需要商业授权)
- (需要商业授权,没有授权不能商用,且生成开头有标记)
- 也有免费版本spire.doc.free(但缺少很多功能)
方式二:markdown -》 html -》pdf(开源,但要自己实现转换)
- 可以将markdown先转为html,在将html转为pdf的方式,实现两者的转换
方式一:spire框架实现
-
引入jar包
<dependency> <groupId>e-iceblue</groupId> <artifactId>spire.doc</artifactId> <version>13.8.7</version> </dependency>
需要定义一下仓库地址
<repositories>
<repository>
<id>com.e-iceblue</id>
<url>https://repo.e-iceblue.cn/repository/maven-public/</url>
</repository>
</repositories>
-
定义转换代码
public static void main(String[] args) { Document document = new Document(); document.loadFromFile("C:\\Users\\Administrator\\Desktop\\test111\\123.md", FileFormat.Markdown); document.saveToFile("C:\\Users\\Administrator\\Desktop\\test111\\123.docx", FileFormat.PDF); }
成功,非常简单,且可以指定为FileFormat.Docx转为word,但需要商业授权
方式二:markdown -》 html -》pdf
这种方式有很多框架可以实现,可以自由搭配,主要能实现markdown转为html,html转为pdf即可
搭配一:commonmark + openhtmltopdf
-
commonmark:将markdown转为html
-
openhtmltopdf:将html转为pdf
-
jsoup:用来纠正html格式(因为markdown填写的内容并不是符合条件的,比如代码块外定义标签)
<!-- CommonMark --> <dependency> <groupId>org.commonmark</groupId> <artifactId>commonmark</artifactId> <version>0.18.0</version> </dependency> <dependency> <groupId>org.commonmark</groupId> <artifactId>commonmark-ext-gfm-tables</artifactId> <version>0.18.0</version> </dependency> <!-- OpenHTMLtoPDF (基于 iText 7) --> <dependency> <groupId>com.openhtmltopdf</groupId> <artifactId>openhtmltopdf-core</artifactId> <version>1.0.10</version> </dependency> <dependency> <groupId>com.openhtmltopdf</groupId> <artifactId>openhtmltopdf-pdfbox</artifactId> <version>1.0.10</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.17.2</version> </dependency>
实现
-
将 Markdown 转换为 HTML 片段
-
将 HTML 中的本地图片嵌入为 Base64 Data URI(这个如果不需要图片的,可以不用加)
-
包装为完整的 HTML 文档(含 CSS 样式,css样式模板)
-
转换为 XHTML 兼容格式(用来保证转化的html格式正确)
-
将 XHTML 渲染为 PDF
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
import org.commonmark.ext.gfm.tables.TablesExtension;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Base64;public class TestPDF {
public static void main(String[] args) throws Exception { String inputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.md"; String outputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.pdf"; // 1. 将 Markdown 转换为 HTML 片段 String htmlFragment = convertMarkdownToHtml(inputPath); // 2. 将 HTML 中的本地图片嵌入为 Base64 Data URI String embeddedHtml = embedLocalImagesAsDataUri(htmlFragment, inputPath); // 3. 包装为完整的 HTML 文档(含 CSS 样式) String fullHtml = wrapWithCompleteHtml(embeddedHtml); // 4. 转换为 XHTML 兼容格式 String xhtmlContent = convertToXhtml(fullHtml); // 5. 将 XHTML 渲染为 PDF renderXhtmlToPdf(xhtmlContent, outputPath); } /** * 将 XHTML 内容渲染为 PDF 文件。 */ private static void renderXhtmlToPdf(String xhtmlContent, String outputPath) throws IOException { // Step 4: 使用 OpenHTMLtoPDF (PDFBox 后端) 转 PDF try (OutputStream os = new FileOutputStream(outputPath)) { PdfRendererBuilder builder = new PdfRendererBuilder(); builder.useFastMode(); // 可选:提升速度 builder.withHtmlContent(xhtmlContent, null); // 第二个参数是 baseUri,用于解析相对路径资源(如图片) builder.toStream(os); String lowerCase = System.getProperty("os.name").toLowerCase(); if (lowerCase.contains("win")) { // Windows: 尝试宋体、微软雅黑 tryRegisterFont(builder, "C:/Windows/Fonts/simsun.ttc", "SimSun"); tryRegisterFont(builder, "C:/Windows/Fonts/msyh.ttc", "Microsoft YaHei"); } else if (lowerCase.contains("nix") || lowerCase.contains("nux") || lowerCase.contains("mac")) { // Linux / macOS: 常见中文字体 // Noto Sans CJK (Google) tryRegisterFont(builder, "/usr/share/fonts/noto/NotoSansCJK-Regular.ttc", "Noto Sans CJK SC"); tryRegisterFont(builder, "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", "Noto Sans CJK SC"); // WenQuanYi Micro Hei (文泉驿微米黑) tryRegisterFont(builder, "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", "WenQuanYi Micro Hei"); tryRegisterFont(builder, "/usr/share/fonts/wenquanyi/wqy-microhei/wqy-microhei.ttc", "WenQuanYi Micro Hei"); // macOS if (lowerCase.contains("mac")) { tryRegisterFont(builder, "/System/Library/Fonts/PingFang.ttc", "PingFang SC"); tryRegisterFont(builder, "/System/Library/Fonts/Helvetica.ttc", "Helvetica"); } } builder.run(); } System.out.println("PDF 已生成: " + outputPath); } /** * 将 HTML 字符串转换为 XHTML 兼容格式。 */ private static String convertToXhtml(String html) { Document doc = Jsoup.parse(html); doc.outputSettings() .syntax(Document.OutputSettings.Syntax.xml) // 输出 XML 语法 .escapeMode(Entities.EscapeMode.xhtml) // 实体转义(如 & → &) .prettyPrint(false); // 禁用美化,避免多余空白 return doc.html(); } /** * 读取 Markdown 文件并将其转换为 HTML 片段。 */ private static String convertMarkdownToHtml(String markdownFilePath) throws IOException { // Step 1: 读取 Markdown 文件 byte[] bytes = Files.readAllBytes(Paths.get(markdownFilePath)); String markdown = new String(bytes, "UTF-8"); // Step 2: 使用 CommonMark 解析并转为 HTML(支持表格) Parser parser = Parser.builder() .extensions(Arrays.asList(TablesExtension.create())) .build(); HtmlRenderer renderer = HtmlRenderer.builder() .extensions(Arrays.asList(TablesExtension.create())) .build(); String html = renderer.render(parser.parse(markdown)); return html; } /** * 将 HTML 片段包装为完整的 HTML 文档,包含内联 CSS 样式。 */ private static String wrapWithCompleteHtml(String htmlFragment) { // Step 3: 构建完整 HTML(含 CSS) String css = "body {\n" + " font-family: 'Noto Sans SC', SimSun, Arial, sans-serif;\n" + " margin: 40px;\n" + " line-height: 1.6;\n" + " color: #333;\n" + " font-size: 12pt;\n" + "}\n" + "h1, h2, h3, h4, h5, h6 {\n" + " color: #2c3e50;\n" + " margin-top: 1.2em;\n" + " margin-bottom: 0.6em;\n" + "}\n" + "h1 { font-size: 24pt; }\n" + "h2 { font-size: 20pt; }\n" + "h3 { font-size: 16pt; }\n" + "\n" + "p, ul, ol, pre, table {\n" + " margin: 0 0 16px 0;\n" + "}\n" + "\n" + "a {\n" + " color: #3498db;\n" + " text-decoration: none;\n" + "}\n" + "\n" + "strong { font-weight: bold; }\n" + "em { font-style: italic; }\n" + "del { text-decoration: line-through; }\n" + "\n" + "/* 表格:自适应 + 边框 */\n" + "table {\n" + " border-collapse: collapse;\n" + " width: 100%;\n" + " empty-cells: show;\n" + "}\n" + "th, td {\n" + " border: 1px solid #999;\n" + " padding: 8px 12px;\n" + " text-align: left;\n" + " vertical-align: top;\n" + "}\n" + "th {\n" + " background-color: #f0f0f0;\n" + " font-weight: bold;\n" + "}\n" + "\n" + "/* 代码块 */\n" + "code {\n" + " font-family: Consolas, Monaco, monospace;\n" + " background-color: #f8f8f8;\n" + " padding: 2px 4px;\n" + " border-radius: 3px;\n" + " font-size: 11pt;\n" + "}\n" + "pre {\n" + " background-color: #f8f8f8;\n" + " padding: 12px;\n" + " border-left: 4px solid #ccc;\n" + " overflow: hidden;\n" + " white-space: pre-wrap;\n" + " font-family: Consolas, Monaco, monospace;\n" + " font-size: 10pt;\n" + "}\n" + "\n" + "/* 列表 */\n" + "ul, ol {\n" + " padding-left: 24px;\n" + "}\n" + "li {\n" + " margin-bottom: 6px;\n" + "}\n" + "\n" + "/* 图片(可选) */\n" + "img {\n" + " max-width: 100%;\n" + " height: auto;\n" + "}\n"; // 可选:包装成完整 HTML(带 CSS) String fullHtml = "<!DOCTYPE html>\n" + "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" + "<head>\n" + " <meta charset=\"UTF-8\" />\n" + " <title>Markdown to PDF</title>\n" + " <style type=\"text/css\">\n" + css + // ← 直接插入 css 变量(不通过 %s) " </style>\n" + "</head>\n" + "<body>\n" + " " + htmlFragment + "\n" + // ← 直接插入 html 变量 "</body>\n" + "</html>"; return fullHtml; } /** * 尝试注册指定路径的字体到 PDF 渲染器。 */ private static void tryRegisterFont(PdfRendererBuilder builder, String fontPath, String fontFamilyName) { File fontFile = new File(fontPath); if (fontFile.exists()) { System.out.println("✅ 注册字体: " + fontFamilyName + " (" + fontPath + ")"); builder.useFont(fontFile, fontFamilyName); } else { System.out.println("⚠️ 字体未找到: " + fontPath); } } /** * 将 HTML 中引用的本地图片转换为 Base64 Data URI 并内联到 HTML 中。 */ private static String embedLocalImagesAsDataUri(String html, String markdownFilePath) throws IOException { Path markdownBasePath = Paths.get(markdownFilePath).getParent(); Document doc = Jsoup.parse(html); for (Element img : doc.select("img")) { String src = img.attr("src"); if (src == null || src.trim().isEmpty()) continue; if (src.startsWith("data:")) continue; // 已内联 if (src.startsWith("http://") || src.startsWith("https://")) { System.err.println("Warning: Remote image not embedded: " + src); continue; } Path imgPath; // >>>>>>>>>> 新增:支持绝对 Windows 路径(以盘符开头) <<<<<<<<<< if (src.matches("^[A-Za-z]:\\\\.*")) { // 匹配 D:\xxx 形式 // 将反斜杠替换为正斜杠(兼容性更好) src = src.replace("\\", "/"); imgPath = Paths.get(src); } else { // 相对路径:相对于 Markdown 文件所在目录 imgPath = markdownBasePath.resolve(src).normalize(); // 安全检查:防止路径穿越 if (!imgPath.startsWith(markdownBasePath)) { throw new SecurityException("Invalid relative image path: " + src); } } // >>>>>>>>>> END 新增 <<<<<<<<<< if (!Files.exists(imgPath)) { System.err.println("Image not found: " + imgPath); continue; } byte[] imageBytes = Files.readAllBytes(imgPath); String mimeType = Files.probeContentType(imgPath); if (mimeType == null || !mimeType.startsWith("image/")) { mimeType = "image/png"; } String dataUri = "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(imageBytes); img.attr("src", dataUri); } return doc.body().html(); }}
搭配二:flexmark + flying-saucer-pdf
-
flexmark:将markdown转为html -
flying-saucer-pdf:将html转为pdf -
jsoup:用来纠正html格式(因为markdown填写的内容并不是符合条件的,比如代码块外定义标签)
<dependency> <groupId>com.vladsch.flexmark</groupId> <artifactId>flexmark-all</artifactId> <version>0.50.44</version> </dependency> <!-- Flying Saucer: HTML to PDF --> <dependency> <groupId>org.xhtmlrenderer</groupId> <artifactId>flying-saucer-pdf</artifactId> <version>9.1.22</version> </dependency>
实现
-
将 Markdown 转换为 HTML 片段
-
将 HTML 中的本地图片嵌入为 Base64 Data URI(这个如果不需要图片的,可以不用加)
-
包装为完整的 HTML 文档(含 CSS 样式,css样式模板)
-
转换为 XHTML 兼容格式(用来保证转化的html格式正确)
-
将 XHTML 渲染为 PDF
import com.lowagie.text.DocumentException;
import com.lowagie.text.pdf.BaseFont;
import com.vladsch.flexmark.ext.tables.TablesExtension;
import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.data.MutableDataSet;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Base64;public class TestPDF {
public static void main(String[] args) throws Exception { String inputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.md"; String outputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.pdf"; // 1. 将 Markdown 转换为 HTML 片段 String htmlFragment = convertMarkdownToHtml(inputPath); // 2. 将 HTML 中的本地图片嵌入为 Base64 Data URI String embeddedHtml = embedLocalImagesAsDataUri(htmlFragment, inputPath); // 3. 包装为完整的 HTML 文档(含 CSS 样式) String fullHtml = wrapWithCompleteHtml(embeddedHtml); // 4. 转换为 XHTML 兼容格式 String xhtmlContent = convertToXhtml(fullHtml); // 5. 将 XHTML 渲染为 PDF renderXhtmlToPdf(xhtmlContent, outputPath); } /** * 将 XHTML 内容渲染为 PDF 文件。 */ private static void renderXhtmlToPdf(String xhtmlContent, String outputPath) throws IOException { // Step 4: 使用 OpenHTMLtoPDF (PDFBox 后端) 转 PDF try (OutputStream out = new FileOutputStream(outputPath)) { ITextRenderer pdfRenderer = new ITextRenderer(); // 👇 关键:注册中文字体 ITextFontResolver fontResolver = pdfRenderer.getFontResolver(); // Windows 系统路径(宋体) String osName = System.getProperty("os.name").toLowerCase(); if (osName.contains("win")) { // Windows: 宋体、微软雅黑(.ttc 支持良好) fontResolver.addFont("C:/Windows/Fonts/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); fontResolver.addFont("C:/Windows/Fonts/msyh.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); } else if (osName.contains("nix") || osName.contains("nux")) { // Linux fontResolver.addFont("/usr/share/fonts/noto/NotoSansCJK-Regular.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); fontResolver.addFont("/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); } else if (osName.contains("mac")) { // macOS fontResolver.addFont("/System/Library/Fonts/PingFang.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); } pdfRenderer.setDocumentFromString(xhtmlContent); pdfRenderer.layout(); pdfRenderer.createPDF(out); } catch (DocumentException e) { throw new RuntimeException(e); } System.out.println("PDF 已生成: " + outputPath); } /** * 将 HTML 字符串转换为 XHTML 兼容格式。 */ private static String convertToXhtml(String html) { Document doc = Jsoup.parse(html); doc.outputSettings() .syntax(Document.OutputSettings.Syntax.xml) // 输出 XML 语法 .escapeMode(Entities.EscapeMode.xhtml) // 实体转义(如 & → &) .prettyPrint(false); // 禁用美化,避免多余空白 return doc.html(); } /** * 读取 Markdown 文件并将其转换为 HTML 片段。 */ private static String convertMarkdownToHtml(String markdownFilePath) throws IOException { // Step 1: 读取 Markdown 文件 byte[] bytes = Files.readAllBytes(Paths.get(markdownFilePath)); String markdown = new String(bytes, "utf-8"); // Step 1: Markdown -> HTML MutableDataSet options = new MutableDataSet(); options.set(Parser.EXTENSIONS, Arrays.asList(TablesExtension.create())); // 注册 TablesExtension Parser parser = Parser.builder(options).build(); HtmlRenderer renderer = HtmlRenderer.builder(options).build(); String html = renderer.render(parser.parse(markdown)); return html; } /** * 将 HTML 片段包装为完整的 HTML 文档,包含内联 CSS 样式。 */ private static String wrapWithCompleteHtml(String htmlFragment) { // Step 3: 构建完整 HTML(含 CSS) String css = "body {\n" + " font-family: 'Noto Sans SC', SimSun, Arial, sans-serif;\n" + " margin: 40px;\n" + " line-height: 1.6;\n" + " color: #333;\n" + " font-size: 12pt;\n" + "}\n" + "h1, h2, h3, h4, h5, h6 {\n" + " color: #2c3e50;\n" + " margin-top: 1.2em;\n" + " margin-bottom: 0.6em;\n" + "}\n" + "h1 { font-size: 24pt; }\n" + "h2 { font-size: 20pt; }\n" + "h3 { font-size: 16pt; }\n" + "\n" + "p, ul, ol, pre, table {\n" + " margin: 0 0 16px 0;\n" + "}\n" + "\n" + "a {\n" + " color: #3498db;\n" + " text-decoration: none;\n" + "}\n" + "\n" + "strong { font-weight: bold; }\n" + "em { font-style: italic; }\n" + "del { text-decoration: line-through; }\n" + "\n" + "/* 表格:自适应 + 边框 */\n" + "table {\n" + " border-collapse: collapse;\n" + " width: 100%;\n" + " empty-cells: show;\n" + "}\n" + "th, td {\n" + " border: 1px solid #999;\n" + " padding: 8px 12px;\n" + " text-align: left;\n" + " vertical-align: top;\n" + "}\n" + "th {\n" + " background-color: #f0f0f0;\n" + " font-weight: bold;\n" + "}\n" + "\n" + "/* 代码块 */\n" + "code {\n" + " font-family: 'Noto Sans SC', SimSun, Consolas, Monaco, monospace !important;\n" + " background-color: #f8f8f8;\n" + " padding: 2px 4px;\n" + " border-radius: 3px;\n" + " font-size: 11pt;\n" + "}\n" + "pre {\n" + " font-family: 'Noto Sans SC', SimSun, Consolas, Monaco, monospace !important;\n" + " background-color: #f8f8f8;\n" + " padding: 12px;\n" + " border-left: 4px solid #ccc;\n" + " overflow: hidden;\n" + " white-space: pre-wrap;\n" + " font-size: 10pt;\n" + "}\n" + "\n" + "/* 列表 */\n" + "ul, ol {\n" + " padding-left: 24px;\n" + "}\n" + "li {\n" + " margin-bottom: 6px;\n" + "}\n" + "\n" + "/* 图片(可选) */\n" + "img {\n" + " max-width: 100%;\n" + " height: auto;\n" + "}"; // 可选:包装成完整 HTML(带 CSS) String fullHtml = "<!DOCTYPE html>\n" + "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" + "<head>\n" + " <meta charset=\"UTF-8\" />\n" + " <title>Markdown to PDF</title>\n" + " <style type=\"text/css\">\n" + css + // ← 直接插入 css 变量(不通过 %s) " </style>\n" + "</head>\n" + "<body>\n" + " " + htmlFragment + "\n" + // ← 直接插入 html 变量 "</body>\n" + "</html>"; return fullHtml; } /** * 将 HTML 中引用的本地图片转换为 Base64 Data URI 并内联到 HTML 中。 */ private static String embedLocalImagesAsDataUri(String html, String markdownFilePath) throws IOException { Path markdownBasePath = Paths.get(markdownFilePath).getParent(); Document doc = Jsoup.parse(html); for (Element img : doc.select("img")) { String src = img.attr("src"); if (src == null || src.trim().isEmpty()) continue; if (src.startsWith("data:")) continue; // 已内联 if (src.startsWith("http://") || src.startsWith("https://")) { System.err.println("Warning: Remote image not embedded: " + src); continue; } Path imgPath; // >>>>>>>>>> 新增:支持绝对 Windows 路径(以盘符开头) <<<<<<<<<< if (src.matches("^[A-Za-z]:\\\\.*")) { // 匹配 D:\xxx 形式 // 将反斜杠替换为正斜杠(兼容性更好) src = src.replace("\\", "/"); imgPath = Paths.get(src); } else { // 相对路径:相对于 Markdown 文件所在目录 imgPath = markdownBasePath.resolve(src).normalize(); // 安全检查:防止路径穿越 if (!imgPath.startsWith(markdownBasePath)) { throw new SecurityException("Invalid relative image path: " + src); } } // >>>>>>>>>> END 新增 <<<<<<<<<< if (!Files.exists(imgPath)) { System.err.println("Image not found: " + imgPath); continue; } byte[] imageBytes = Files.readAllBytes(imgPath); String mimeType = Files.probeContentType(imgPath); if (mimeType == null || !mimeType.startsWith("image/")) { mimeType = "image/png"; } String dataUri = "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(imageBytes); img.attr("src", dataUri); } return doc.body().html(); }}