使用Apache POI将Word转HTML

java 复制代码
package com.shengun.utils;


import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

public class WordToHtmlTable {


    public static void main(String[] args) throws Exception {
        String path = "C:/Users/28916/Desktop/aaa/";
        try {
            convertToHtml(path + "a.doc", path + "sample.html");
            System.out.println("Word to HTML conversion successful!");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 使用Apache POI将Word转HTML
     * @param docFilePath
     * @param htmlFilePath
     * @throws Exception
     */
    public static void convertToHtml(String docFilePath, String htmlFilePath) throws Exception {
        InputStream input = new FileInputStream(new File(docFilePath));
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.processDocument(wordDocument);

        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty("encoding", "UTF-8");
        serializer.setOutputProperty("indent", "yes");
        serializer.setOutputProperty("method", "html");
        serializer.transform(domSource, streamResult);

        out.close();

        OutputStream outputStream = new FileOutputStream(htmlFilePath);
        outputStream.write(out.toByteArray());
        outputStream.close();
    }




}
相关推荐
Metaphor6922 天前
使用 Python 将 PDF 转换为 HTML
python·pdf·html
a1117762 天前
“黑夜流星“个人引导页 网页html
java·前端·html
小小龙学IT2 天前
Apache Airflow 2.x 深度指南:用 Python 编排一切的现代化工作流引擎
开发语言·python·apache
JieE2122 天前
手把手带你用纯 CSS 实现一个 3D 旋转魔方,这些前端基础你能打几分?
前端·css·html
Shepherd06192 天前
【IT 运维】Apache 使用 mod_remoteip 恢复 Cloudflare 后的真实访客 IP
运维·tcp/ip·apache
YHL2 天前
🧊 CSS 3D 硬核解析:四个属性手写旋转立方体
前端·css·html
a1117762 天前
无限森林漫游(简约几何版 html
前端·html
qq_546937272 天前
Excel批量转PDF_Word_图片,支持自动合并报表,效率翻倍。
pdf·word·excel
isyangli_blog2 天前
SDN 基本应用实践 —— 使用命令行实现简易防火墙功能实验报告
服务器·php·apache
LaughingZhu2 天前
Product Hunt 每日热榜 | 2026-06-16
前端·人工智能·经验分享·chatgpt·html