使用Apache POI将Word转HTML

java 复制代码
package com.shengun.utils;


import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

public class WordToHtmlTable {


    public static void main(String[] args) throws Exception {
        String path = "C:/Users/28916/Desktop/aaa/";
        try {
            convertToHtml(path + "a.doc", path + "sample.html");
            System.out.println("Word to HTML conversion successful!");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 使用Apache POI将Word转HTML
     * @param docFilePath
     * @param htmlFilePath
     * @throws Exception
     */
    public static void convertToHtml(String docFilePath, String htmlFilePath) throws Exception {
        InputStream input = new FileInputStream(new File(docFilePath));
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.processDocument(wordDocument);

        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty("encoding", "UTF-8");
        serializer.setOutputProperty("indent", "yes");
        serializer.setOutputProperty("method", "html");
        serializer.transform(domSource, streamResult);

        out.close();

        OutputStream outputStream = new FileOutputStream(htmlFilePath);
        outputStream.write(out.toByteArray());
        outputStream.close();
    }




}
相关推荐
封印师请假去地球钓鱼8 小时前
问题解决|Word大纲层级混乱?在word导航窗格中调整目录缩进
word
汽车芯猿8 小时前
Word频繁崩溃?一步解决 Office 加载项冲突
word
东小黑8 小时前
一些论文word格式
word
T0uken9 小时前
【Python】docxnote:优雅的 Word 批注
开发语言·python·word
小J听不清21 小时前
CSS 外边距(margin)全解析:取值规则 + 实战用法
前端·javascript·css·html·css3
小J听不清1 天前
CSS 边框(border)全解析:样式 / 宽度 / 颜色 / 方向取值
前端·javascript·css·html·css3
Suchadar1 天前
源码编译Apache
apache
热爱生活的五柒1 天前
word中如何一键修改英文字母数字为新罗马字体Times New Roman
word·西文·times new roman
赵锦川1 天前
大屏比例缩放
前端·javascript·html
我命由我123451 天前
Element Plus 2.2.27 的单选框 Radio 组件,选中一个选项后,全部选项都变为选中状态
开发语言·前端·javascript·html·ecmascript·html5·js