使用Apache POI将Word转HTML

java 复制代码
package com.shengun.utils;


import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

public class WordToHtmlTable {


    public static void main(String[] args) throws Exception {
        String path = "C:/Users/28916/Desktop/aaa/";
        try {
            convertToHtml(path + "a.doc", path + "sample.html");
            System.out.println("Word to HTML conversion successful!");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 使用Apache POI将Word转HTML
     * @param docFilePath
     * @param htmlFilePath
     * @throws Exception
     */
    public static void convertToHtml(String docFilePath, String htmlFilePath) throws Exception {
        InputStream input = new FileInputStream(new File(docFilePath));
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.processDocument(wordDocument);

        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty("encoding", "UTF-8");
        serializer.setOutputProperty("indent", "yes");
        serializer.setOutputProperty("method", "html");
        serializer.transform(domSource, streamResult);

        out.close();

        OutputStream outputStream = new FileOutputStream(htmlFilePath);
        outputStream.write(out.toByteArray());
        outputStream.close();
    }




}
相关推荐
强化生物科研小助手19 小时前
CAS:47623-98-3,DiSBAC2(3)一种慢反应的膜电位敏感探针
django·html·pygame
木木黄木木21 小时前
HTML5图片裁剪工具实现详解
前端·html·html5
FanetheDivine1 天前
正确使用flex-1
css·html
漂洋过海cv1 天前
Word 插入无页眉页码的空白页(即插入奇数页)
word
response_L1 天前
国产系统统信uos和麒麟v10在线打开word给表格赋值
java·c#·word·信创·在线编辑
木木黄木木1 天前
HTML5重力球动画实现详解
前端·html·html5
hello_simon1 天前
在线小白工具,PPT转PDF支持多种热门工具,支持批量转换,操作简单,高效适合各种需求
pdf·html·powerpoint·excel·pdf转html·excel转pdf格式
zhougl9961 天前
html处理Base文件流
linux·前端·html
木木黄木木1 天前
html5炫酷图片悬停效果实现详解
前端·html·html5
ZhangChuChu_92481 天前
Word在生成PDF后,PDF左侧导航书签目录错误显示的解决方法
pdf·word