使用Apache POI将Word转HTML

java 复制代码
package com.shengun.utils;


import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

public class WordToHtmlTable {


    public static void main(String[] args) throws Exception {
        String path = "C:/Users/28916/Desktop/aaa/";
        try {
            convertToHtml(path + "a.doc", path + "sample.html");
            System.out.println("Word to HTML conversion successful!");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 使用Apache POI将Word转HTML
     * @param docFilePath
     * @param htmlFilePath
     * @throws Exception
     */
    public static void convertToHtml(String docFilePath, String htmlFilePath) throws Exception {
        InputStream input = new FileInputStream(new File(docFilePath));
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.processDocument(wordDocument);

        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty("encoding", "UTF-8");
        serializer.setOutputProperty("indent", "yes");
        serializer.setOutputProperty("method", "html");
        serializer.transform(domSource, streamResult);

        out.close();

        OutputStream outputStream = new FileOutputStream(htmlFilePath);
        outputStream.write(out.toByteArray());
        outputStream.close();
    }




}
相关推荐
塔塔开!.3 小时前
HTML中的padding和margin
前端·html
儒雅的烤地瓜4 小时前
CSS | 面试题:你知道几种移动端适配方案?
css·html·css3·vw·媒体查询·移动端适配·vh
有盐、在见4 小时前
CSS画出三角形的做法
前端·css·html
等什么君!4 小时前
复习HTML(基础)
前端·html
喻师傅4 小时前
Apache Iceberg 与 Spark整合-使用教程(Iceberg 官方文档解析)
大数据·spark·apache·iceberg·数据湖
武子康10 小时前
大数据-152 Apache Druid 集群模式 配置启动【下篇】 超详细!
java·大数据·clickhouse·flink·apache
灵感__idea12 小时前
HTML并不简单:自带交互的元素
前端·html
倩倩_ICE_王王13 小时前
PHP安装后Apache无法运行的问题
开发语言·php·apache·httpd.conf
智慧的牛17 小时前
java往word中添加水印,往excel中添加图片
java·pdf·word·excel·水印
Jason-河山18 小时前
Java中的HTTP请求:使用Apache HttpClient
java·http·apache