实现富文本即html语法转md,要求是尽可能展示效果一样,可以有少许误差,另外只实现了html中的body转md,其他标签如head等未实现。
大致思路是:通过jsoup工具获取html节点,再穷举替换。前提是熟悉html以及md语法
依赖如下:
XML
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.16.1</version>
</dependency>
代码如下:
java
import lombok.Data;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class Html2MarkdownUtil {
public static void main(String[] args) {
String html = "<p style=\"padding: 0; margin: 10px 0; line-height: 1.5; font-size: 16px;\"><b style=\"padding: 0; margin: 0;\">Hello<font color=\"#c24f4a\" style=\"padding: 0; margin: 0;\">world</font></b></p>";
System.out.println(parseHtml2Markdown(html));
}
/**
* 解析html2md
*
* @param html html
* @return {@link String}
*/
public static String parseHtml2Markdown(String html) {
Document doc = Jsoup.parse(html);
StringBuilder sb = new StringBuilder();
for (Element element : doc.body().children()) {
HtmlElement htmlElement = new HtmlElement(element);
sb.append(htmlElement.getMarkdownText());
}
return sb.toString();
}
/**
* 标记文本
*
* @param element 要素
* @return {@link String}
*/
public static String toMarkdownText(Element element) {
StringBuilder sb = new StringBuilder();
String tagName = element.tagName().toLowerCase();
String text = element.ownText();
switch (tagName) {
case "h1":
sb.append("# ").append(text);
break;
case "h2":
sb.append("## ").append(text);
break;
case "h3":
sb.append("### ").append(text);
break;
case "h4":
sb.append("#### ").append(text);
break;
case "h5":
sb.append("##### ").append(text);
break;
case "h6":
sb.append("###### ").append(text);
break;
case "p":
case "font":
case "b":
case "span":
sb.append(text);
break;
case "ul":
for (Element child : element.children()) {
sb.append("* ").append(child.text().trim()).append("\n");
}
break;
case "ol":
int index = 1;
for (Element child : element.children()) {
sb.append(index).append(". ").append(child.text().trim()).append("\n");
index++;
}
break;
case "a":
sb.append("[").append(text).append("](").append(element.attr("href")).append(")");
break;
case "strong":
sb.append("**").append(text).append("**");
break;
case "em":
sb.append("_").append(text).append("_");
break;
case "blockquote":
sb.append("> ").append(text);
break;
case "img":
sb.append("![");
if (element.hasAttr("alt")) {
sb.append(element.attr("alt"));
}
sb.append("](").append(element.attr("src")).append(")");
break;
default:
break;
}
return sb.toString();
}
@Data
public static class HtmlElement {
private Element element;
private String tagName;
private String ownText;
private boolean isNewline;
private List<HtmlElement> children;
public HtmlElement(Element element) {
this.element = element;
this.tagName = element.tagName().toLowerCase();
this.ownText = element.ownText();
this.isNewline = isNewline();
if (!StringUtils.equalsAny(this.tagName, "ul", "ol")) {
Elements children = element.children();
if (children.size() > 0) {
this.children = new ArrayList<>();
for (Element child : children) {
this.children.add(new HtmlElement(child));
}
}
}
}
public boolean isNewline() {
return StringUtils.equalsAny(tagName, "h1", "h2", "h3", "h4", "h5", "h6", "p", "ul", "ol", "blockquote");
}
public String getMarkdownText() {
StringBuilder sb = new StringBuilder();
sb.append(toMarkdownText(element));
if (children != null && children.size() > 0) {
for (HtmlElement child : children) {
sb.append(child.getMarkdownText());
}
}
if (isNewline) {
sb.append("\n");
}
return sb.toString();
}
}
}