maven依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.3</version>
</dependency>
import org.apache.poi.xwpf.usermodel.*;
import java.io.*;
public class WordToHtmlConverter {
public static String convertWordToHtml(String filePath) throws IOException {
StringBuilder htmlContent = new StringBuilder("<html><body>");
try (FileInputStream fis = new FileInputStream(filePath);
XWPFDocument document = new XWPFDocument(fis)) {
// 首先处理段落
for (XWPFParagraph paragraph : document.getParagraphs()) {
htmlContent.append("<p>").append(getParagraphText(paragraph)).append("</p>");
}
// 然后处理表格
for (XWPFTable table : document.getTables()) {
htmlContent.append("<table border='1'>");
for (XWPFTableRow row : table.getRows()) {
htmlContent.append("<tr>");
for (XWPFTableCell cell : row.getTableCells()) {
htmlContent.append("<td>");
for (XWPFParagraph paragraph : cell.getParagraphs()) {
htmlContent.append(getParagraphText(paragraph));
}
htmlContent.append("</td>");
}
htmlContent.append("</tr>");
}
htmlContent.append("</table>");
}
}
htmlContent.append("</body></html>");
return htmlContent.toString();
}
public static String convertWordToHtml(InputStream inputStream) throws IOException {
StringBuilder htmlContent = new StringBuilder("<html><body>");
try (XWPFDocument document = new XWPFDocument(inputStream)) {
// 处理段落
for (XWPFParagraph paragraph : document.getParagraphs()) {
htmlContent.append("<p>").append(getParagraphText(paragraph)).append("</p>");
}
// 处理表格
for (XWPFTable table : document.getTables()) {
htmlContent.append("<table border='1'>");
for (XWPFTableRow row : table.getRows()) {
htmlContent.append("<tr>");
for (XWPFTableCell cell : row.getTableCells()) {
htmlContent.append("<td>");
// 这里简单地将单元格中的文本内容添加到HTML中
// 注意:单元格可能包含多个段落,需要遍历它们
for (XWPFParagraph paragraph : cell.getParagraphs()) {
htmlContent.append(getParagraphText(paragraph));
}
htmlContent.append("</td>");
}
htmlContent.append("</tr>");
}
htmlContent.append("</table>");
}
}
htmlContent.append("</body></html>");
return htmlContent.toString();
}
// 辅助方法,用于获取段落中的文本内容
private static String getParagraphText(XWPFParagraph paragraph) {
StringBuilder text = new StringBuilder();
for (XWPFRun run : paragraph.getRuns()) {
text.append(run.getText(0));
}
return text.toString();
}
public static void main(String[] args) {
try {
String html = convertWordToHtml("xxx.doc");
System.out.println(html);
} catch (IOException e) {
e.printStackTrace();
}
}
}
word文档转html(只支持段落和表格)
李李李李李李等待,李李李李李李哒哒2024-08-31 10:07
相关推荐
她的男孩8 小时前
Spring Boot 接 Flowable 工作流:用 3 个注解搭一个请假审批流程荣码10 小时前
LLM结构化输出:让AI返回JSON而不是废话,我踩了4个坑plainGeekDev11 小时前
Gson → kotlinx.serialization小bo波20 小时前
Java Swing 图形用户界面实验 —— 从算术练习到游戏开发的完整实践咖啡八杯21 小时前
GoF设计模式——备忘录模式SamDeepThinking1 天前
裁掉那个差程序员后,给你看团队里高手的代码:这个习惯,希望你有朕瞧着你甚好1 天前
技术雷达 & Java 集成评估报告 — Apache Tika 3.3.1MacroZheng1 天前
短短几天,暴涨2.8万Star!又一款编程神器开源!