SpringBoot集成阿里云文档格式转换实现pdf转换word,excel

一、前置条件

1.1 创建accessKey

如何申请:https://help.aliyun.com/zh/ram/user-guide/create-an-accesskey-pair

1.2 开通服务

官方地址:https://docmind.console.aliyun.com/doc-overview

未开通服务时需要点击开通按钮,然后才能调用相关api。

二、代码实现

2.1 引入依赖

xml 复制代码
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>tea-openapi</artifactId>
    <version>0.2.5</version>
</dependency>
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>docmind_api20220711</artifactId>
    <version>2.0.3</version>
</dependency>
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>2.0.50</version>
</dependency>

2.2 pdf转换word

官方文档:https://help.aliyun.com/zh/document-mind/developer-reference/convertpdftoword

java 复制代码
package net.lab1024.sa.admin.util;

import com.aliyun.docmind_api20220711.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.docmind_api20220711.Client;
import com.aliyun.teautil.models.RuntimeOptions;

import java.io.FileInputStream;
import java.util.List;

public class PdfConvertUtil {

    private static final String OK = "200";

    private static final String ACCESS_KEY_ID = "xxx";

    private static final String ACCESS_KEY_SECRET = "xxx";

    public static void main(String[] args) throws Exception {
        String id = submitPdfToWord("C:\\Users\\admin\\Desktop\\example.pdf");

        // 10秒后再查询结果,等阿里云处理一会儿
        Thread.sleep(10000);

        List<GetDocumentConvertResultResponseBody.GetDocumentConvertResultResponseBodyData> data = queryPdfToWord(id);
    }

    /**
     * 客户端
     *
     * @return
     * @throws Exception
     */
    private static Client getClient() throws Exception {
        Config config = new Config();
        config.setAccessKeyId(ACCESS_KEY_ID);
        config.setAccessKeySecret(ACCESS_KEY_SECRET);
        // 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
        config.setEndpoint("docmind-api.cn-hangzhou.aliyuncs.com");
        return new Client(config);
    }

    /**
     * 提交pdf转换word转换任务
     *
     * @return
     * @throws Exception
     */
    public static String submitPdfToWord(String filePath) throws Exception {
        Client client = getClient();

        // 请求参数
        SubmitConvertPdfToWordJobAdvanceRequest advanceRequest = new SubmitConvertPdfToWordJobAdvanceRequest();
        advanceRequest.setFileUrlObject(new FileInputStream(filePath));
        advanceRequest.setFileName("example.pdf");

        // 运行参数
        RuntimeOptions runtime = new RuntimeOptions();


        // 发送请求
        SubmitConvertPdfToWordJobResponse response = client.submitConvertPdfToWordJobAdvance(advanceRequest, runtime);

        // 处理结果
        SubmitConvertPdfToWordJobResponseBody body = response.getBody();
        if (!OK.equals(body.getCode())) {
            throw new RuntimeException("pdf转换word任务提交失败");
        }
        return body.getData().getId();
    }

    /**
     * 查询pdf转换word转换任务
     *
     * @param id
     * @return
     * @throws Exception
     */
    public static List<GetDocumentConvertResultResponseBody.GetDocumentConvertResultResponseBodyData> queryPdfToWord(String id) throws Exception {
        Client client = getClient();

        // 请求参数
        GetDocumentConvertResultRequest resultRequest = new GetDocumentConvertResultRequest();
        resultRequest.setId(id);

        // todo 这里是简单处理 需要轮询120分钟,10秒一次

        GetDocumentConvertResultResponse response = client.getDocumentConvertResult(resultRequest);
        GetDocumentConvertResultResponseBody body = response.getBody();
        if (!OK.equals(body.getCode())) {
            throw new RuntimeException("pdf转换word任务查询失败");
        }

        Boolean completed = body.getCompleted();
        if (!completed) {
            throw new RuntimeException("pdf转换word任务未完成");
        }

        String status = body.getStatus();
        if (!"Success".equals(status)) {
            throw new RuntimeException("pdf转换word任务转换失败");
        }
        return body.getData();
    }

}

2.3 pdf转换excel

官方文档:https://help.aliyun.com/zh/document-mind/developer-reference/convertpdftoexcel

java 复制代码
package net.lab1024.sa.admin.util;

import com.aliyun.docmind_api20220711.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.docmind_api20220711.Client;
import com.aliyun.teautil.models.RuntimeOptions;

import java.io.FileInputStream;
import java.util.List;

public class PdfConvertUtil {

    private static final String OK = "200";

    private static final String ACCESS_KEY_ID = "xxx";

    private static final String ACCESS_KEY_SECRET = "xxx";

    public static void main(String[] args) throws Exception {
        String id = submitPdfToExcel("C:\\Users\\admin\\Desktop\\example.pdf");

        // 10秒后再查询结果,等阿里云处理一会儿
        Thread.sleep(10000);

        List<GetDocumentConvertResultResponseBody.GetDocumentConvertResultResponseBodyData> data = queryPdfToExcel(id);
    }

    /**
     * 客户端
     *
     * @return
     * @throws Exception
     */
    private static Client getClient() throws Exception {
        Config config = new Config();
        config.setAccessKeyId(ACCESS_KEY_ID);
        config.setAccessKeySecret(ACCESS_KEY_SECRET);
        // 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
        config.setEndpoint("docmind-api.cn-hangzhou.aliyuncs.com");
        return new Client(config);
    }

    /**
     * 提交pdf转换excel转换任务
     * @return
     * @throws Exception
     */
    public static String submitPdfToExcel(String filePath) throws Exception {
        Client client = getClient();

        // 请求参数
        SubmitConvertPdfToExcelJobAdvanceRequest advanceRequest = new SubmitConvertPdfToExcelJobAdvanceRequest();
        advanceRequest.setFileUrlObject(new FileInputStream(filePath));
        advanceRequest.setFileName("example.pdf");
        // 合并为1个sheet
        advanceRequest.setForceMergeExcel(true);

        // 运行参数
        RuntimeOptions runtime = new RuntimeOptions();

        // 发送请求
        SubmitConvertPdfToExcelJobResponse response = client.submitConvertPdfToExcelJobAdvance(advanceRequest, runtime);

        // 处理结果
        SubmitConvertPdfToExcelJobResponseBody body = response.getBody();
        if (!OK.equals(body.getCode())) {
            throw new RuntimeException("pdf转换excel任务提交失败");
        }
        return body.getData().getId();
    }

    /**
     * 查询pdf转换excel转换任务
     * @param id
     * @return
     * @throws Exception
     */
    public static List<GetDocumentConvertResultResponseBody.GetDocumentConvertResultResponseBodyData> queryPdfToExcel(String id) throws Exception {
        Client client = getClient();

        // 请求参数
        GetDocumentConvertResultRequest resultRequest = new GetDocumentConvertResultRequest();
        resultRequest.setId(id);

        // todo 这里是简单处理 需要轮询120分钟,10秒一次

        GetDocumentConvertResultResponse response = client.getDocumentConvertResult(resultRequest);
        GetDocumentConvertResultResponseBody body = response.getBody();
        if (!OK.equals(body.getCode())) {
            throw new RuntimeException("pdf转换excel任务查询失败");
        }

        Boolean completed = body.getCompleted();
        if (!completed) {
            throw new RuntimeException("pdf转换excel任务未完成");
        }

        String status = body.getStatus();
        if (!"Success".equals(status)) {
            throw new RuntimeException("pdf转换excel任务转换失败");
        }
        return body.getData();
    }

}    
相关推荐
TG_yunshuguoji6 分钟前
阿里云代理商:阿里云部署 WordPress的3 种方案
人工智能·阿里云·云计算·wordpress·ai智能体
张小凡vip9 分钟前
Spring Boot集成Kafka完整版
spring boot·kafka·linq
摇滚侠25 分钟前
SpringBoot 升级,依赖冲突如何解决
java·spring boot·spring
小江的记录本34 分钟前
【JVM虚拟机】类加载机制:类加载器、双亲委派模型、好处、破坏双亲委派的场景(附《思维导图》+《面试高频考点清单》)
java·jvm·spring boot·后端·python·spring·面试
Devin~Y1 小时前
智慧物流+AIGC客服Java大厂面试:Spring Boot、Kafka、Redis、JVM与RAG Agent实战
java·jvm·spring boot·redis·spring cloud·kafka·rag
闪电悠米1 小时前
黑马点评-分布式锁-02_simple_redis_lock_setnx
java·数据库·spring boot·redis·分布式·缓存·wpf
Database_Cool_1 小时前
阿里云 AnalyticDB MySQL 免运维实践:分析型数据库不需要专人运维
数据库·数据仓库·mysql·阿里云
李白的天不白2 小时前
spring boot + vue3项目部署须知
java·spring boot·后端
小马爱打代码2 小时前
SpringBoot + Redis Stream + 消费组:替代 Kafka 轻量级消息队列,低延迟高吞吐
spring boot·redis