java中实现markdown转为pdf

推荐两种方式:

方式一:使用spire.doc框架(非常简单,但需要商业授权)

  • (需要商业授权,没有授权不能商用,且生成开头有标记)
  • 也有免费版本spire.doc.free(但缺少很多功能)

方式二:markdown -》 html -》pdf(开源,但要自己实现转换)

  • 可以将markdown先转为html,在将html转为pdf的方式,实现两者的转换

方式一:spire框架实现

  • 引入jar包

    复制代码
          <dependency>
              <groupId>e-iceblue</groupId>
              <artifactId>spire.doc</artifactId>
              <version>13.8.7</version>
          </dependency>

需要定义一下仓库地址

复制代码
    <repositories>
        <repository>
            <id>com.e-iceblue</id>
            <url>https://repo.e-iceblue.cn/repository/maven-public/</url>
        </repository>
    </repositories>
  • 定义转换代码

    复制代码
      public static void main(String[] args) {
    
    
          Document document = new Document();
    
          document.loadFromFile("C:\\Users\\Administrator\\Desktop\\test111\\123.md", FileFormat.Markdown);
    
          document.saveToFile("C:\\Users\\Administrator\\Desktop\\test111\\123.docx", FileFormat.PDF);
    
      }

成功,非常简单,且可以指定为FileFormat.Docx转为word,但需要商业授权

方式二:markdown -》 html -》pdf

这种方式有很多框架可以实现,可以自由搭配,主要能实现markdown转为html,html转为pdf即可

搭配一:commonmark + openhtmltopdf

  • commonmark:将markdown转为html

  • openhtmltopdf:将html转为pdf

  • jsoup:用来纠正html格式(因为markdown填写的内容并不是符合条件的,比如代码块外定义标签)

    复制代码
          <!-- CommonMark -->
          <dependency>
              <groupId>org.commonmark</groupId>
              <artifactId>commonmark</artifactId>
              <version>0.18.0</version>
          </dependency>
          <dependency>
              <groupId>org.commonmark</groupId>
              <artifactId>commonmark-ext-gfm-tables</artifactId>
              <version>0.18.0</version>
          </dependency>
    
          <!-- OpenHTMLtoPDF (基于 iText 7) -->
          <dependency>
              <groupId>com.openhtmltopdf</groupId>
              <artifactId>openhtmltopdf-core</artifactId>
              <version>1.0.10</version>
          </dependency>
          <dependency>
              <groupId>com.openhtmltopdf</groupId>
              <artifactId>openhtmltopdf-pdfbox</artifactId>
              <version>1.0.10</version>
          </dependency>
    
          <dependency>
              <groupId>org.jsoup</groupId>
              <artifactId>jsoup</artifactId>
              <version>1.17.2</version>
          </dependency>

实现

  1. 将 Markdown 转换为 HTML 片段

  2. 将 HTML 中的本地图片嵌入为 Base64 Data URI(这个如果不需要图片的,可以不用加)

  3. 包装为完整的 HTML 文档(含 CSS 样式,css样式模板)

  4. 转换为 XHTML 兼容格式(用来保证转化的html格式正确)

  5. 将 XHTML 渲染为 PDF

    import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
    import org.commonmark.ext.gfm.tables.TablesExtension;
    import org.commonmark.parser.Parser;
    import org.commonmark.renderer.html.HtmlRenderer;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.nodes.Entities;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.OutputStream;
    import java.nio.file.Files;
    import java.nio.file.Path;
    import java.nio.file.Paths;
    import java.util.Arrays;
    import java.util.Base64;

    public class TestPDF {

    复制代码
     public static void main(String[] args) throws Exception {
         String inputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.md";
         String outputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.pdf";
    
         // 1. 将 Markdown 转换为 HTML 片段
         String htmlFragment = convertMarkdownToHtml(inputPath);
    
         // 2. 将 HTML 中的本地图片嵌入为 Base64 Data URI
         String embeddedHtml = embedLocalImagesAsDataUri(htmlFragment, inputPath);
    
         // 3. 包装为完整的 HTML 文档(含 CSS 样式)
         String fullHtml = wrapWithCompleteHtml(embeddedHtml);
    
         // 4. 转换为 XHTML 兼容格式
         String xhtmlContent = convertToXhtml(fullHtml);
    
         // 5. 将 XHTML 渲染为 PDF
         renderXhtmlToPdf(xhtmlContent, outputPath);
     }
    
     /**
      * 将 XHTML 内容渲染为 PDF 文件。
      */
     private static void renderXhtmlToPdf(String xhtmlContent, String outputPath) throws IOException {
         // Step 4: 使用 OpenHTMLtoPDF (PDFBox 后端) 转 PDF
         try (OutputStream os = new FileOutputStream(outputPath)) {
             PdfRendererBuilder builder = new PdfRendererBuilder();
             builder.useFastMode(); // 可选:提升速度
             builder.withHtmlContent(xhtmlContent, null); // 第二个参数是 baseUri,用于解析相对路径资源(如图片)
             builder.toStream(os);
    
             String lowerCase = System.getProperty("os.name").toLowerCase();
    
             if (lowerCase.contains("win")) {
                 // Windows: 尝试宋体、微软雅黑
                 tryRegisterFont(builder, "C:/Windows/Fonts/simsun.ttc", "SimSun");
                 tryRegisterFont(builder, "C:/Windows/Fonts/msyh.ttc", "Microsoft YaHei");
             } else if (lowerCase.contains("nix") || lowerCase.contains("nux") || lowerCase.contains("mac")) {
                 // Linux / macOS: 常见中文字体
                 // Noto Sans CJK (Google)
                 tryRegisterFont(builder, "/usr/share/fonts/noto/NotoSansCJK-Regular.ttc", "Noto Sans CJK SC");
                 tryRegisterFont(builder, "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", "Noto Sans CJK SC");
    
                 // WenQuanYi Micro Hei (文泉驿微米黑)
                 tryRegisterFont(builder, "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", "WenQuanYi Micro Hei");
                 tryRegisterFont(builder, "/usr/share/fonts/wenquanyi/wqy-microhei/wqy-microhei.ttc", "WenQuanYi Micro Hei");
    
                 // macOS
                 if (lowerCase.contains("mac")) {
                     tryRegisterFont(builder, "/System/Library/Fonts/PingFang.ttc", "PingFang SC");
                     tryRegisterFont(builder, "/System/Library/Fonts/Helvetica.ttc", "Helvetica");
                 }
             }
    
             builder.run();
         }
    
         System.out.println("PDF 已生成: " + outputPath);
     }
    
     /**
      * 将 HTML 字符串转换为 XHTML 兼容格式。
      */
     private static String convertToXhtml(String html) {
         Document doc = Jsoup.parse(html);
         doc.outputSettings()
                 .syntax(Document.OutputSettings.Syntax.xml)      // 输出 XML 语法
                 .escapeMode(Entities.EscapeMode.xhtml)           // 实体转义(如 & → &amp;)
                 .prettyPrint(false);                             // 禁用美化,避免多余空白
         return doc.html();
     }
    
    
     /**
      * 读取 Markdown 文件并将其转换为 HTML 片段。
      */
     private static String convertMarkdownToHtml(String markdownFilePath) throws IOException {
         // Step 1: 读取 Markdown 文件
         byte[] bytes = Files.readAllBytes(Paths.get(markdownFilePath));
         String markdown = new String(bytes, "UTF-8");
    
         // Step 2: 使用 CommonMark 解析并转为 HTML(支持表格)
         Parser parser = Parser.builder()
                 .extensions(Arrays.asList(TablesExtension.create()))
                 .build();
    
         HtmlRenderer renderer = HtmlRenderer.builder()
                 .extensions(Arrays.asList(TablesExtension.create()))
                 .build();
    
         String html = renderer.render(parser.parse(markdown));
         return html;
     }
    
    
     /**
      * 将 HTML 片段包装为完整的 HTML 文档,包含内联 CSS 样式。
      */
     private static String wrapWithCompleteHtml(String htmlFragment) {
         // Step 3: 构建完整 HTML(含 CSS)
         String css =
                 "body {\n" +
                         "    font-family: 'Noto Sans SC', SimSun, Arial, sans-serif;\n" +
                         "    margin: 40px;\n" +
                         "    line-height: 1.6;\n" +
                         "    color: #333;\n" +
                         "    font-size: 12pt;\n" +
                         "}\n" +
                         "h1, h2, h3, h4, h5, h6 {\n" +
                         "    color: #2c3e50;\n" +
                         "    margin-top: 1.2em;\n" +
                         "    margin-bottom: 0.6em;\n" +
                         "}\n" +
                         "h1 { font-size: 24pt; }\n" +
                         "h2 { font-size: 20pt; }\n" +
                         "h3 { font-size: 16pt; }\n" +
                         "\n" +
                         "p, ul, ol, pre, table {\n" +
                         "    margin: 0 0 16px 0;\n" +
                         "}\n" +
                         "\n" +
                         "a {\n" +
                         "    color: #3498db;\n" +
                         "    text-decoration: none;\n" +
                         "}\n" +
                         "\n" +
                         "strong { font-weight: bold; }\n" +
                         "em { font-style: italic; }\n" +
                         "del { text-decoration: line-through; }\n" +
                         "\n" +
                         "/* 表格:自适应 + 边框 */\n" +
                         "table {\n" +
                         "    border-collapse: collapse;\n" +
                         "    width: 100%;\n" +
                         "    empty-cells: show;\n" +
                         "}\n" +
                         "th, td {\n" +
                         "    border: 1px solid #999;\n" +
                         "    padding: 8px 12px;\n" +
                         "    text-align: left;\n" +
                         "    vertical-align: top;\n" +
                         "}\n" +
                         "th {\n" +
                         "    background-color: #f0f0f0;\n" +
                         "    font-weight: bold;\n" +
                         "}\n" +
                         "\n" +
                         "/* 代码块 */\n" +
                         "code {\n" +
                         "    font-family: Consolas, Monaco, monospace;\n" +
                         "    background-color: #f8f8f8;\n" +
                         "    padding: 2px 4px;\n" +
                         "    border-radius: 3px;\n" +
                         "    font-size: 11pt;\n" +
                         "}\n" +
                         "pre {\n" +
                         "    background-color: #f8f8f8;\n" +
                         "    padding: 12px;\n" +
                         "    border-left: 4px solid #ccc;\n" +
                         "    overflow: hidden;\n" +
                         "    white-space: pre-wrap;\n" +
                         "    font-family: Consolas, Monaco, monospace;\n" +
                         "    font-size: 10pt;\n" +
                         "}\n" +
                         "\n" +
                         "/* 列表 */\n" +
                         "ul, ol {\n" +
                         "    padding-left: 24px;\n" +
                         "}\n" +
                         "li {\n" +
                         "    margin-bottom: 6px;\n" +
                         "}\n" +
                         "\n" +
                         "/* 图片(可选) */\n" +
                         "img {\n" +
                         "    max-width: 100%;\n" +
                         "    height: auto;\n" +
                         "}\n";
    
         // 可选:包装成完整 HTML(带 CSS)
         String fullHtml =
                 "<!DOCTYPE html>\n" +
                         "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" +
                         "<head>\n" +
                         "    <meta charset=\"UTF-8\" />\n" +
                         "    <title>Markdown to PDF</title>\n" +
                         "    <style type=\"text/css\">\n" +
                         css +  // ← 直接插入 css 变量(不通过 %s)
                         "    </style>\n" +
                         "</head>\n" +
                         "<body>\n" +
                         "    " + htmlFragment + "\n" +  // ← 直接插入 html 变量
                         "</body>\n" +
                         "</html>";
         return fullHtml;
     }
    
    
     /**
      * 尝试注册指定路径的字体到 PDF 渲染器。
      */
     private static void tryRegisterFont(PdfRendererBuilder builder, String fontPath, String fontFamilyName) {
         File fontFile = new File(fontPath);
         if (fontFile.exists()) {
             System.out.println("✅ 注册字体: " + fontFamilyName + " (" + fontPath + ")");
             builder.useFont(fontFile, fontFamilyName);
         } else {
             System.out.println("⚠️ 字体未找到: " + fontPath);
         }
     }
    
    
    
     /**
      * 将 HTML 中引用的本地图片转换为 Base64 Data URI 并内联到 HTML 中。
      */
     private static String embedLocalImagesAsDataUri(String html, String markdownFilePath) throws IOException {
         Path markdownBasePath = Paths.get(markdownFilePath).getParent();
         Document doc = Jsoup.parse(html);
         for (Element img : doc.select("img")) {
             String src = img.attr("src");
             if (src == null || src.trim().isEmpty()) continue;
             if (src.startsWith("data:")) continue; // 已内联
             if (src.startsWith("http://") || src.startsWith("https://")) {
                 System.err.println("Warning: Remote image not embedded: " + src);
                 continue;
             }
    
             Path imgPath;
    
             // >>>>>>>>>> 新增:支持绝对 Windows 路径(以盘符开头) <<<<<<<<<<
             if (src.matches("^[A-Za-z]:\\\\.*")) { // 匹配 D:\xxx 形式
                 // 将反斜杠替换为正斜杠(兼容性更好)
                 src = src.replace("\\", "/");
                 imgPath = Paths.get(src);
             } else {
                 // 相对路径:相对于 Markdown 文件所在目录
                 imgPath = markdownBasePath.resolve(src).normalize();
                 // 安全检查:防止路径穿越
                 if (!imgPath.startsWith(markdownBasePath)) {
                     throw new SecurityException("Invalid relative image path: " + src);
                 }
             }
             // >>>>>>>>>> END 新增 <<<<<<<<<<
    
             if (!Files.exists(imgPath)) {
                 System.err.println("Image not found: " + imgPath);
                 continue;
             }
    
             byte[] imageBytes = Files.readAllBytes(imgPath);
             String mimeType = Files.probeContentType(imgPath);
             if (mimeType == null || !mimeType.startsWith("image/")) {
                 mimeType = "image/png";
             }
             String dataUri = "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(imageBytes);
             img.attr("src", dataUri);
         }
         return doc.body().html();
     }

    }

搭配二:flexmark + flying-saucer-pdf

  • flexmark :将markdown转为html

  • flying-saucer-pdf:将html转为pdf

  • jsoup:用来纠正html格式(因为markdown填写的内容并不是符合条件的,比如代码块外定义标签)

    复制代码
          <dependency>
              <groupId>com.vladsch.flexmark</groupId>
              <artifactId>flexmark-all</artifactId>
              <version>0.50.44</version>
          </dependency>
    
          <!-- Flying Saucer: HTML to PDF -->
          <dependency>
              <groupId>org.xhtmlrenderer</groupId>
              <artifactId>flying-saucer-pdf</artifactId>
              <version>9.1.22</version>
          </dependency>

实现

  1. 将 Markdown 转换为 HTML 片段

  2. 将 HTML 中的本地图片嵌入为 Base64 Data URI(这个如果不需要图片的,可以不用加)

  3. 包装为完整的 HTML 文档(含 CSS 样式,css样式模板)

  4. 转换为 XHTML 兼容格式(用来保证转化的html格式正确)

  5. 将 XHTML 渲染为 PDF

    import com.lowagie.text.DocumentException;
    import com.lowagie.text.pdf.BaseFont;
    import com.vladsch.flexmark.ext.tables.TablesExtension;
    import com.vladsch.flexmark.html.HtmlRenderer;
    import com.vladsch.flexmark.parser.Parser;
    import com.vladsch.flexmark.util.data.MutableDataSet;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.nodes.Entities;
    import org.xhtmlrenderer.pdf.ITextFontResolver;
    import org.xhtmlrenderer.pdf.ITextRenderer;

    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.OutputStream;
    import java.nio.file.Files;
    import java.nio.file.Path;
    import java.nio.file.Paths;
    import java.util.Arrays;
    import java.util.Base64;

    public class TestPDF {

    复制代码
     public static void main(String[] args) throws Exception {
         String inputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.md";
         String outputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.pdf";
    
         // 1. 将 Markdown 转换为 HTML 片段
         String htmlFragment = convertMarkdownToHtml(inputPath);
    
         // 2. 将 HTML 中的本地图片嵌入为 Base64 Data URI
         String embeddedHtml = embedLocalImagesAsDataUri(htmlFragment, inputPath);
    
         // 3. 包装为完整的 HTML 文档(含 CSS 样式)
         String fullHtml = wrapWithCompleteHtml(embeddedHtml);
    
         // 4. 转换为 XHTML 兼容格式
         String xhtmlContent = convertToXhtml(fullHtml);
    
         // 5. 将 XHTML 渲染为 PDF
         renderXhtmlToPdf(xhtmlContent, outputPath);
     }
    
     /**
      * 将 XHTML 内容渲染为 PDF 文件。
      */
     private static void renderXhtmlToPdf(String xhtmlContent, String outputPath) throws IOException {
         // Step 4: 使用 OpenHTMLtoPDF (PDFBox 后端) 转 PDF
    
         try (OutputStream out = new FileOutputStream(outputPath)) {
    
             ITextRenderer pdfRenderer = new ITextRenderer();
             // 👇 关键:注册中文字体
             ITextFontResolver fontResolver = pdfRenderer.getFontResolver();
             // Windows 系统路径(宋体)
    
             String osName = System.getProperty("os.name").toLowerCase();
             if (osName.contains("win")) {
                 // Windows: 宋体、微软雅黑(.ttc 支持良好)
                 fontResolver.addFont("C:/Windows/Fonts/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                 fontResolver.addFont("C:/Windows/Fonts/msyh.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
    
             } else if (osName.contains("nix") || osName.contains("nux")) {
                 // Linux
                 fontResolver.addFont("/usr/share/fonts/noto/NotoSansCJK-Regular.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                 fontResolver.addFont("/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
    
             } else if (osName.contains("mac")) {
                 // macOS
                 fontResolver.addFont("/System/Library/Fonts/PingFang.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
             }
    
             pdfRenderer.setDocumentFromString(xhtmlContent);
             pdfRenderer.layout();
             pdfRenderer.createPDF(out);
         } catch (DocumentException e) {
             throw new RuntimeException(e);
         }
    
         System.out.println("PDF 已生成: " + outputPath);
     }
    
     /**
      * 将 HTML 字符串转换为 XHTML 兼容格式。
      */
     private static String convertToXhtml(String html) {
         Document doc = Jsoup.parse(html);
         doc.outputSettings()
                 .syntax(Document.OutputSettings.Syntax.xml)      // 输出 XML 语法
                 .escapeMode(Entities.EscapeMode.xhtml)           // 实体转义(如 & → &amp;)
                 .prettyPrint(false);                             // 禁用美化,避免多余空白
         return doc.html();
     }
    
    
     /**
      * 读取 Markdown 文件并将其转换为 HTML 片段。
      */
     private static String convertMarkdownToHtml(String markdownFilePath) throws IOException {
         // Step 1: 读取 Markdown 文件
         byte[] bytes = Files.readAllBytes(Paths.get(markdownFilePath));
         String markdown = new String(bytes, "utf-8");
    
         // Step 1: Markdown -> HTML
         MutableDataSet options = new MutableDataSet();
    
         options.set(Parser.EXTENSIONS, Arrays.asList(TablesExtension.create())); // 注册 TablesExtension
    
         Parser parser = Parser.builder(options).build();
         HtmlRenderer renderer = HtmlRenderer.builder(options).build();
         String html = renderer.render(parser.parse(markdown));
         return html;
     }
    
    
     /**
      * 将 HTML 片段包装为完整的 HTML 文档,包含内联 CSS 样式。
      */
     private static String wrapWithCompleteHtml(String htmlFragment) {
         // Step 3: 构建完整 HTML(含 CSS)
         String css =
                 "body {\n" +
                         "    font-family: 'Noto Sans SC', SimSun, Arial, sans-serif;\n" +
                         "    margin: 40px;\n" +
                         "    line-height: 1.6;\n" +
                         "    color: #333;\n" +
                         "    font-size: 12pt;\n" +
                         "}\n" +
                         "h1, h2, h3, h4, h5, h6 {\n" +
                         "    color: #2c3e50;\n" +
                         "    margin-top: 1.2em;\n" +
                         "    margin-bottom: 0.6em;\n" +
                         "}\n" +
                         "h1 { font-size: 24pt; }\n" +
                         "h2 { font-size: 20pt; }\n" +
                         "h3 { font-size: 16pt; }\n" +
                         "\n" +
                         "p, ul, ol, pre, table {\n" +
                         "    margin: 0 0 16px 0;\n" +
                         "}\n" +
                         "\n" +
                         "a {\n" +
                         "    color: #3498db;\n" +
                         "    text-decoration: none;\n" +
                         "}\n" +
                         "\n" +
                         "strong { font-weight: bold; }\n" +
                         "em { font-style: italic; }\n" +
                         "del { text-decoration: line-through; }\n" +
                         "\n" +
                         "/* 表格:自适应 + 边框 */\n" +
                         "table {\n" +
                         "    border-collapse: collapse;\n" +
                         "    width: 100%;\n" +
                         "    empty-cells: show;\n" +
                         "}\n" +
                         "th, td {\n" +
                         "    border: 1px solid #999;\n" +
                         "    padding: 8px 12px;\n" +
                         "    text-align: left;\n" +
                         "    vertical-align: top;\n" +
                         "}\n" +
                         "th {\n" +
                         "    background-color: #f0f0f0;\n" +
                         "    font-weight: bold;\n" +
                         "}\n" +
                         "\n" +
                         "/* 代码块 */\n" +
                         "code {\n" +
                         "    font-family: 'Noto Sans SC', SimSun, Consolas, Monaco, monospace !important;\n" +
                         "    background-color: #f8f8f8;\n" +
                         "    padding: 2px 4px;\n" +
                         "    border-radius: 3px;\n" +
                         "    font-size: 11pt;\n" +
                         "}\n" +
                         "pre {\n" +
                         "    font-family: 'Noto Sans SC', SimSun, Consolas, Monaco, monospace !important;\n" +
                         "    background-color: #f8f8f8;\n" +
                         "    padding: 12px;\n" +
                         "    border-left: 4px solid #ccc;\n" +
                         "    overflow: hidden;\n" +
                         "    white-space: pre-wrap;\n" +
                         "    font-size: 10pt;\n" +
                         "}\n" +
                         "\n" +
                         "/* 列表 */\n" +
                         "ul, ol {\n" +
                         "    padding-left: 24px;\n" +
                         "}\n" +
                         "li {\n" +
                         "    margin-bottom: 6px;\n" +
                         "}\n" +
                         "\n" +
                         "/* 图片(可选) */\n" +
                         "img {\n" +
                         "    max-width: 100%;\n" +
                         "    height: auto;\n" +
                         "}";
         // 可选:包装成完整 HTML(带 CSS)
         String fullHtml =
                 "<!DOCTYPE html>\n" +
                         "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" +
                         "<head>\n" +
                         "    <meta charset=\"UTF-8\" />\n" +
                         "    <title>Markdown to PDF</title>\n" +
                         "    <style type=\"text/css\">\n" +
                         css +  // ← 直接插入 css 变量(不通过 %s)
                         "    </style>\n" +
                         "</head>\n" +
                         "<body>\n" +
                         "    " + htmlFragment + "\n" +  // ← 直接插入 html 变量
                         "</body>\n" +
                         "</html>";
         return fullHtml;
     }
    
    
     /**
      * 将 HTML 中引用的本地图片转换为 Base64 Data URI 并内联到 HTML 中。
      */
     private static String embedLocalImagesAsDataUri(String html, String markdownFilePath) throws IOException {
         Path markdownBasePath = Paths.get(markdownFilePath).getParent();
         Document doc = Jsoup.parse(html);
         for (Element img : doc.select("img")) {
             String src = img.attr("src");
             if (src == null || src.trim().isEmpty()) continue;
             if (src.startsWith("data:")) continue; // 已内联
             if (src.startsWith("http://") || src.startsWith("https://")) {
                 System.err.println("Warning: Remote image not embedded: " + src);
                 continue;
             }
    
             Path imgPath;
    
             // >>>>>>>>>> 新增:支持绝对 Windows 路径(以盘符开头) <<<<<<<<<<
             if (src.matches("^[A-Za-z]:\\\\.*")) { // 匹配 D:\xxx 形式
                 // 将反斜杠替换为正斜杠(兼容性更好)
                 src = src.replace("\\", "/");
                 imgPath = Paths.get(src);
             } else {
                 // 相对路径:相对于 Markdown 文件所在目录
                 imgPath = markdownBasePath.resolve(src).normalize();
                 // 安全检查:防止路径穿越
                 if (!imgPath.startsWith(markdownBasePath)) {
                     throw new SecurityException("Invalid relative image path: " + src);
                 }
             }
             // >>>>>>>>>> END 新增 <<<<<<<<<<
    
             if (!Files.exists(imgPath)) {
                 System.err.println("Image not found: " + imgPath);
                 continue;
             }
    
             byte[] imageBytes = Files.readAllBytes(imgPath);
             String mimeType = Files.probeContentType(imgPath);
             if (mimeType == null || !mimeType.startsWith("image/")) {
                 mimeType = "image/png";
             }
             String dataUri = "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(imageBytes);
             img.attr("src", dataUri);
         }
         return doc.body().html();
     }

    }

相关推荐
柏林以东_2 小时前
线程安全的数据集合
java·开发语言·安全
sunnyday04262 小时前
Nginx与Spring Cloud Gateway QPS统计全攻略
java·spring boot·后端·nginx
海南java第二人2 小时前
Spring Boot全局异常处理终极指南:打造优雅的API错误响应体系
java·spring boot·后端
南朝雨2 小时前
Spring Boot Admin日志监控坑点:远程配置的logging.file.name为何生效又失效?
java·spring boot·spring cloud·微服务·logback
sanggou2 小时前
Spring Cloud Gateway 转发 SSE 的那些坑
java
それども2 小时前
理解 Java21 虚拟线程
java
毕设源码-赖学姐2 小时前
【开题答辩全过程】以 基于JAVA的宠物医院管理系统的设计为例,包含答辩的问题和答案
java·开发语言
开开心心_Every2 小时前
安卓语音转文字工具:免费支持实时转换视频
python·游戏·微信·django·pdf·excel·语音识别
Kratzdisteln2 小时前
【1902】0121-1 Dify工作流节点详细配置(方案B最终版)
java·前端·javascript