doc转html并图片转base64

复制代码
package com.lly.demo.util;

//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//

import com.lly.demo.bean.DocUser;
import com.spire.doc.FileFormat;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import static java.lang.System.out;


//word转html
public class docTohtml {


    public static void main(String[] args) throws Exception {
        docxToHtml();
    }
    public Boolean docTohtml(DocUser docUser) throws Exception {
        String sourceFileName = docUser.getDocname();
        String targetFileName = docUser.getHtmlname();
        String imagePathStr = "D:/doc2htmltest/image/";
        Boolean secORfiled=true;
        HWPFDocument wordDocument = null;
        File file=new File(sourceFileName);
        FileInputStream fileInputStream = new FileInputStream(sourceFileName);
        String docxtodocSourceFileName = docUser.getDocname().substring(0,docUser.getDocname().length()-5)+".doc";
        try {
            if(StringUtils.isNotBlank(sourceFileName)&&StringUtils.isNotBlank(targetFileName)){
                    if(sourceFileName.endsWith(".doc")){

                        if(file.exists()){

                            out.println("aaaaa11111");
                            wordDocument = new HWPFDocument(fileInputStream);
                            out.println("aaaaa");
                            if (wordDocument != null) {
                                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
                                wordToHtmlConverter.setPicturesManager((a, b, suggestedName, d, e) -> {
                                    // convertFileToBase64()
                                    out.println(suggestedName);

                                    //返回图片路径
                                    //return "image" + File.separator + suggestedName;
                                    //返回图片base64值
                                    return "data:image/"+b.getExtension().toLowerCase()+";base64,"+new String(Base64.encodeBase64(a));
                                });
                                wordToHtmlConverter.processDocument(wordDocument);
                                List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
                                out.println(pics);
                                Iterator var8 = pics.iterator();

                                //转换图片
                                imgToBase64 imgToBase64pl=new imgToBase64();
                                int i=1;
                                while(var8.hasNext()) {
                                    Picture pic = (Picture)var8.next();
                                    //下载图片
                                    // pic.writeImageContent(new FileOutputStream(imagePathStr + pic.suggestFullFileName()));
                                    //System.out.println("第"+i++ +":"+imgToBase64pl.convertFileToBase64(imagePathStr + pic.suggestFullFileName()));
                                }

                                Document htmlDocument = wordToHtmlConverter.getDocument();
                                //生成空文档
                                DOMSource domSource = new DOMSource(htmlDocument);
                                //获取路径下html
                                StreamResult streamResult = new StreamResult(new File(targetFileName));
                                //设置html文件规范
                                TransformerFactory tf = TransformerFactory.newInstance();
                                Transformer serializer = tf.newTransformer();
                                serializer.setOutputProperty("encoding", "utf-8");
                                serializer.setOutputProperty("indent", "yes");
                                serializer.setOutputProperty("method", "html");
                                serializer.transform(domSource, streamResult);
                            }else {
                                out.println("请检查文件类型或后缀是否正确!");
                                secORfiled=false;
                            }

                        }else{
                            out.println("文件不存在");
                            secORfiled=false;
                        }

                    }else if(sourceFileName.endsWith("docx")){
                        System.out.println("上传文件为docx类型");
                        secORfiled=false;
                    }else{
                        out.println("上传文件为其他类型文件");
                        secORfiled=false;
                    }

            }else{
                secORfiled=false;
            }
        } catch (IOException e) {
            e.printStackTrace();
            secORfiled=false;
        }finally {
            fileInputStream.close();
            return secORfiled;
        }


    }

    public static void docxToHtml() throws Exception {

        //D:\zpdtolly\工作总结文档\zpd使用文档\v4\用户使用手册\客户端使用手册
        String sourceFileName = "D:\\zpdtolly\\工作总结文档\\zpd使用文档\\uos+ql飞腾\\uos+麒麟飞腾使用手册-网络版.doc";
        String targetFileName = "D:\\zpdtolly\\工作总结文档\\zpd使用文档\\uos+ql飞腾\\uos+麒麟飞腾使用手册-网络版.html";
        String imagePathStr = "D:/doc2htmltest/image/";
        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.setPicturesManager((a, b, suggestedName, d, e) -> {
           // convertFileToBase64()
            out.println(suggestedName);

            //返回图片路径
            //return "image" + File.separator + suggestedName;
            //返回图片base64值
            return "data:image/"+b.getExtension().toLowerCase()+";base64,"+new String(Base64.encodeBase64(a));
        });
        wordToHtmlConverter.processDocument(wordDocument);
        List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
        out.println(pics);
        Iterator var8 = pics.iterator();

        //转换图片
        imgToBase64 imgToBase64pl=new imgToBase64();
        int i=1;
        while(var8.hasNext()) {
            Picture pic = (Picture)var8.next();
            //下载图片
           // pic.writeImageContent(new FileOutputStream(imagePathStr + pic.suggestFullFileName()));
            //System.out.println("第"+i++ +":"+imgToBase64pl.convertFileToBase64(imagePathStr + pic.suggestFullFileName()));
        }

        Document htmlDocument = wordToHtmlConverter.getDocument();
        //生成空文档
        DOMSource domSource = new DOMSource(htmlDocument);
        //获取路径下html
        StreamResult streamResult = new StreamResult(new File(targetFileName));
        //设置html文件规范
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty("encoding", "utf-8");
        serializer.setOutputProperty("indent", "yes");
        serializer.setOutputProperty("method", "html");
        serializer.transform(domSource, streamResult);
        out.println("doc转换完毕!"+streamResult.getSystemId());


    }

}

依赖:

复制代码
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.example</groupId>
    <artifactId>demo</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>demo</name>
    <description>demo</description>

    <properties>
        <java.version>1.8</java.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <spring-boot.version>2.3.7.RELEASE</spring-boot.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.16.10</version>
        </dependency>
        <!--引入swagger的依赖-->
        <dependency>
            <groupId>io.springfox</groupId>
            <artifactId>springfox-swagger2</artifactId>
            <version>2.9.2</version>
        </dependency>

        <dependency>
            <groupId>io.springfox</groupId>
            <artifactId>springfox-swagger-ui</artifactId>
            <version>2.9.2</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.junit.vintage</groupId>
                    <artifactId>junit-vintage-engine</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-examples</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-excelant</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>ooxml-schemas</artifactId>
            <version>1.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.9</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>xdocreport</artifactId>
            <version>1.0.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.xmlbeans</groupId>
            <artifactId>xmlbeans</artifactId>
            <version>2.6.0</version>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.16.10</version>
        </dependency>
    </dependencies>

    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-dependencies</artifactId>
                <version>${spring-boot.version}</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.8.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <version>2.3.7.RELEASE</version>
                <configuration>
                    <mainClass>com.lly.demo.DemoApplication</mainClass>
                </configuration>
                <executions>
                    <execution>
                        <id>repackage</id>
                        <goals>
                            <goal>repackage</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

</project>

导航样式:

<style type="text/css">

body{background:#e6d8d8;text-align:center;}

div{width:100%;margin:0 auto;background:#ecdfdf;text-align:left;}

* {

padding: 0;

margin: 0;

box-sizing: border-box;

}

.aside {

width: 240px;

height: 100%;

position: fixed;

left: -240px;

top: 0px;

border-right: 1px solid #ccc;

-ms-transition: all 0.3s linear;

-moz-transition: all 0.3s linear;

-webkit-transition: all 0.3s linear;

transition: all 0.3s linear;

}

.aside:hover{

left: 0;

}

.aside:hover + .article{

padding-left: 260px;

}

.nav-list{

width: 100%;

height: 100%;

overflow: auto;

padding: 10px 0px;

}

.nav-mark{

position: absolute;

right: -20px;

top: 50%;

z-index: 2;

height: 80px;

width: 20px;

margin-top: -40px;

background-color: #44a7ff;

box-shadow: 2px 0px 3px #eee;

border-radius: 0 40px 40px 0;

font-size: 12px;

text-align: center;

line-height: 24px;

padding-top: 16px;

color: #fff;

}

.nav {

display: block;

width: 100%;

height: 32px;

line-height: 32px;

font-size: 16px;

color: #333;

text-decoration: none;

padding-left: 20px;

}

.nav:hover {

background-color: #44a7ff;

color: #fff;

}

.grade2 {

text-indent: 1em;

}

.grade3 {

text-indent: 2em;

}

</style>

<body class="b1 b2" lang=ZH-CN link=blue vlink=purple style='tab-interval:21.0pt;text-justify-trim:punctuation'>

<aside class="aside">

<div class="nav-list">

<p class="p8">

<span> </span><span class="s3">第1章 前言 </span><a href="#_Toc6535"><span class="s3">4</span></a>

</p>

<p class="p9">

<span>1.1 前言 </span><a href="#_Toc22214"><span>4</span></a>

</p>

<p class="p9">

<span>1.2 配置说明 </span><a href="#_Toc28992"><span>4</span></a>

</p>

<p class="p9">

<span>1.3 约定 </span><a href="#_Toc4833"><span>4</span></a>

</p>

<p class="p10">

<span>第2章 系统功能概述 </span><a href="#_Toc25542"><span>4</span></a>

</p>

<p class="p9">

<span>2.1 范围 </span><a href="#_Toc29936"><span>4</span></a>

</p>

<p class="p9">

<span>2.2 功能概述 </span><a href="#_Toc1621"><span>4</span></a>

</p>

<p class="p9">

<span>2.3 系统启动 </span><a href="#_Toc21033"><span>4</span></a>

</p>

<p class="p9">

<span>2.4 系统的右键菜单功能 </span><a href="#_Toc17040"><span>5</span></a>

</p>

<p class="p11">

<span>2.4.1 针对非标签文件 </span><a href="#_Toc14818"><span>5</span></a>

</p>

<p class="p11">

<span>2.4.2 针对标签文件右键菜单 </span><a href="#_Toc28107"><span>5</span></a>

</p>

<p class="p10">

<span>第3章 系统菜单功能详解 </span><a href="#_Toc27175"><span>6</span></a>

</p>

<p class="p9">

<span>3.1 查看标签 </span><a href="#_Toc15292"><span>6</span></a>

</p>

<p class="p11">

<span>3.1.1 查看方法 </span><a href="#_Toc1580"><span>7</span></a>

</p>

<p class="p9">

<span>3.2 设置标签 </span><a href="#_Toc28607"><span>7</span></a>

</p>

<p class="p11">

<span>3.2.1 自动设置标签 </span><a href="#_Toc1292"><span>7</span></a>

</p>

<p class="p11">

<span>3.2.2 主动设置标签 </span><a href="#_Toc2308"><span>7</span></a>

</p>

<p class="p9">

<span>3.3 修改标签 </span><a href="#_Toc17328"><span>9</span></a>

</p>

<p class="p11">

<span>3.3.1 修改方法 </span><a href="#_Toc18786"><span>9</span></a>

</p>

<p class="p9">

<span>3.4 申请分离 </span><a href="#_Toc15068"><span>9</span></a>

</p>

<p class="p11">

<span>3.4.1 分离方法 </span><a href="#_Toc13228"><span>10</span></a>

</p>

<p class="p9">

<span>3.5 标签恢复 </span><a href="#_Toc8881"><span>12</span></a>

</p>

<p class="p11">

<span>3.5.1 恢复方法 </span><a href="#_Toc17845"><span>13</span></a>

</p>

<p class="p9">

<span>3.5 辅助定密 </span><a href="#_Toc22306"><span>14</span></a>

</p>

<p class="p10">

<span>第4章 标签管理工具 </span><a href="#_Toc4965"><span>15</span></a>

</p>

<p class="p9">

<span>4.1 功能概述和启动方法 </span><a href="#_Toc11565"><span>15</span></a>

</p>

<p class="p11">

<span>4.1.1 功能概述 </span><a href="#_Toc21392"><span>15</span></a>

</p>

<p class="p11">

<span>4.1.2 启动方法 </span><a href="#_Toc9953"><span>15</span></a>

</p>

<p class="p11">

<span>4.1.3 标签初始化 </span><a href="#_Toc13424"><span>15</span></a>

</p>

<p class="p9">

<span>4.2 基本信息 </span><a href="#_Toc9325"><span>16</span></a>

</p>

<p class="p11">

<span>4.2.1 基本信息概述 </span><a href="#_Toc15593"><span>16</span></a>

</p>

<p class="p9">

<span>4.3 标签信息 </span><a href="#_Toc25293"><span>17</span></a>

</p>

<p class="p11">

<span>4.3.1 文件列表 </span><a href="#_Toc26886"><span>17</span></a>

</p>

<p class="p11">

<span>4.3.2 操作日志 </span><a href="#_Toc23770"><span>17</span></a>

</p>

<p class="p9">

<span>4.4 标签分离 </span><a href="#_Toc25427"><span>18</span></a>

</p>

<p class="p11">

<span>4.4.1 分离申请 </span><a href="#_Toc2491"><span>18</span></a>

</p>

<p class="p9">

<span>4.5 系统信息 </span><a href="#_Toc2552"><span>18</span></a>

</p>

<p class="p11">

<span>4.5.1 系统日志 </span><a href="#_Toc3090"><span>18</span></a>

</p>

<p class="p11">

<span>4.5.2 策略管理 </span><a href="#_Toc9669"><span>18</span></a>

</p>

<p class="p9">

<span>4.6 系统管理员 </span><a href="#_Toc16215"><span>19</span></a>

</p>

<p class="p11">

<span>4.6.1 分离审批 </span><a href="#_Toc10899"><span>19</span></a>

</p>

<p class="p11">

<span>4.6.2 用户管理 </span><a href="#_Toc15675"><span>20</span></a>

</p>

<p class="p11">

<span>4.6.3 用户切换 </span><a href="#_Toc21405"><span>21</span></a>

</p>

</div>

<div class="nav-mark">导航</div>

</aside>

<body>

相关推荐
子燕若水1 分钟前
Unreal Engine 5 (UE5) Metahuman 的头部材质
前端·ue5·材质
武昌库里写JAVA2 分钟前
React方向:react中5种Dom的操作方式
java·开发语言·spring boot·学习·课程设计
ThetaarSofVenice32 分钟前
一个个顺序挨着来 - 责任链模式(Chain of Responsibility Pattern)
java·设计模式·责任链模式
数据小小爬虫1 小时前
利用Java爬虫获取义乌购店铺所有商品列表:技术探索与实践
java·开发语言·爬虫
互联网-小阿宇1 小时前
【HTML+CSS+JS+VUE】web前端教程-31-css3新特性
前端·javascript·css
Dong雨1 小时前
Java的Stream流和Option类
java·新特性
!!!5251 小时前
maven的生命周期
java·数据库·maven
han_2 小时前
为实现前端截图功能,我的dom-to-image踩坑之旅!
前端·javascript
Hello Dam2 小时前
基于 FastExcel 与消息队列高效生成及导入机构用户数据
java·数据库·spring boot·excel·easyexcel·fastexcel
ShyTan2 小时前
java项目启动时,执行某方法
java·开发语言