java
package com.shengun.utils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
public class WordToHtmlTable {
public static void main(String[] args) throws Exception {
String path = "C:/Users/28916/Desktop/aaa/";
try {
convertToHtml(path + "a.doc", path + "sample.html");
System.out.println("Word to HTML conversion successful!");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 使用Apache POI将Word转HTML
* @param docFilePath
* @param htmlFilePath
* @throws Exception
*/
public static void convertToHtml(String docFilePath, String htmlFilePath) throws Exception {
InputStream input = new FileInputStream(new File(docFilePath));
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty("encoding", "UTF-8");
serializer.setOutputProperty("indent", "yes");
serializer.setOutputProperty("method", "html");
serializer.transform(domSource, streamResult);
out.close();
OutputStream outputStream = new FileOutputStream(htmlFilePath);
outputStream.write(out.toByteArray());
outputStream.close();
}
}