Java调用百度云语音识别【音频转写】

复制代码
百度云文档  ttps://ai.baidu.com/ai-doc/SPEECH/Bk5difx01
复制代码
依赖:
<!--JSONObject-->
<dependency>
	<groupId>org.json</groupId>
	<artifactId>json</artifactId>
	<version>20210307</version>
</dependency>
复制代码
示例代码: 
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.json.JSONObject;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;

/**
 * 语音识别
 */
@Slf4j
@Component
public class AasrUtils {


    public static final String API_KEY = "U92RRV****ag9xZv";
    public static final String SECRET_KEY = "SU05xD****0ziDkM";


    static final OkHttpClient HTTP_CLIENT = new OkHttpClient.Builder()
            .connectTimeout(60, TimeUnit.SECONDS) // Set the connection timeout
            .readTimeout(300, TimeUnit.SECONDS)    // Set the read timeout
            .build();


    /**
     * 音频转写
     *
     * @param speechUrl  音频url, 音频大小不超过500MB
     * @return
     * @throws IOException
     */
    public static String createAasr(String speechUrl) throws IOException {
        MediaType mediaType = MediaType.parse("application/json");
        RequestBody body = RequestBody.create(mediaType, "{\n" +
                "    \"speech_url\": \"" + speechUrl + "\",\n" +
                "    \"format\": \"mp3\",\n" +
                "    \"pid\": 80001,\n" +
                "    \"rate\": 16000\n" +
                "}");

        Request request = new Request.Builder()
                .url("https://aip.baidubce.com/rpc/2.0/aasr/v1/create?access_token=" + getAccessToken())
                .post(body)
                .addHeader("Content-Type", "application/json")
                .addHeader("Accept", "application/json")
                .build();

        try (Response response = HTTP_CLIENT.newCall(request).execute()) {
            if (response.isSuccessful()) {
                String responseBody = response.body().string();
                log.info("创建AACR任务响应: {}", responseBody);

                String taskId = JSON.parseObject(responseBody).getString("task_id");
                return taskId;
            } else {
                log.info("创建AACR任务失败: {}", response.code());
                return null;
            }
        }
    }


    public static String checkASRStatus(String taskId) throws IOException {
        if (StringUtils.isEmpty(taskId)) {
            return "";
        }

        String taskStatus = "";

        while (!("Success".equals(taskStatus) || "Failed".equals(taskStatus))) {
            // Add a delay before the next query
            try {
                Thread.sleep(1000); // Sleep for 1 seconds (adjust as needed)
            } catch (InterruptedException e) {
                e.printStackTrace();
            }

            HashMap<String, Object> map = new HashMap<>();
            ArrayList<Object> list = new ArrayList<>();
            list.add(taskId);
            map.put("task_ids", list);

            JSONObject jsonObject = new JSONObject(map);
            String toJSON = jsonObject.toString();

            MediaType mediaType = MediaType.parse("application/json");
            RequestBody body = RequestBody.create(mediaType, toJSON);
            Request request = new Request.Builder()
                    .url("https://aip.baidubce.com/rpc/2.0/aasr/v1/query?access_token=" + getAccessToken())
                    .method("POST", body)
                    .addHeader("Content-Type", "application/json")
                    .addHeader("Accept", "application/json")
                    .build();

            try (Response response = HTTP_CLIENT.newCall(request).execute()) {
                if (response.isSuccessful()) {
                    String responseBody = response.body().string();
                    log.info("创建AACR任务响应: {}", responseBody);

                    JSONArray tasksInfo = JSON.parseObject(responseBody).getJSONArray("tasks_info");

                    if (tasksInfo.size() > 0) {
                        com.alibaba.fastjson.JSONObject taskInfo = tasksInfo.getJSONObject(0);
                        taskStatus = taskInfo.getString("task_status");

                        if ("Success".equals(taskStatus)) {
                            String resultText = taskInfo.getJSONObject("task_result").getString("result");
                            // 使用正则表达式替换[""]
                            resultText = resultText.replaceAll("\\[\"|\"\\]", "");
                            log.info("语音转文字: {}", resultText);
                            return resultText;
                        } else if ("Failed".equals(taskStatus)) {
                            log.info("任务失败: {}", taskStatus);
                        }
                    } else {
                        log.info("未找到任务信息");
                    }
                } else {
                    log.info("检查任务状态失败: {}", response.code());
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return "";
    }


    /**
     * 从用户的AK,SK生成鉴权签名(Access Token)
     *
     * @return 鉴权签名(Access Token)
     * @throws IOException IO异常
     */
    static String getAccessToken() throws IOException {
        MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
        RequestBody body = RequestBody.create(mediaType, "grant_type=client_credentials&client_id=" + API_KEY
                + "&client_secret=" + SECRET_KEY);
        Request request = new Request.Builder()
                .url("https://aip.baidubce.com/oauth/2.0/token")
                .method("POST", body)
                .addHeader("Content-Type", "application/x-www-form-urlencoded")
                .build();
        Response response = HTTP_CLIENT.newCall(request).execute();
        String access_token = new JSONObject(response.body().string()).getString("access_token");
        return access_token;
    }


    public static void main(String[] args) throws IOException {
        checkASRStatus(createAasr("https://openai-demo-1319322874.cos.na-siliconvalley.myqcloud.com/audio/2023-12-05/1701764504763878.mp3"));
        checkASRStatus(createAasr("https://aipe-speech.bj.bcebos.com/text_to_speech/2024-01-04/65966e5370172800014d6524/speech/0.mp3?authorization=bce-auth-v1%2F8a6ca9b78c124d89bb6bca18c6fc5944%2F2024-01-04T08%3A38%3A04Z%2F259200%2F%2F7ca69d814bacad356114a0ff7573734c5ceb20b363dee6c2e971b16617787ac4"));
    }

}
相关推荐
AI浩16 分钟前
【面试总结】FFN(前馈神经网络)在Transformer模型中先升维再降维的原因
人工智能·深度学习·计算机视觉·transformer
可为测控25 分钟前
图像处理基础(4):高斯滤波器详解
人工智能·算法·计算机视觉
一水鉴天1 小时前
为AI聊天工具添加一个知识系统 之63 详细设计 之4:AI操作系统 之2 智能合约
开发语言·人工智能·python
倔强的石头1061 小时前
解锁辅助驾驶新境界:基于昇腾 AI 异构计算架构 CANN 的应用探秘
人工智能·架构
佛州小李哥2 小时前
Agent群舞,在亚马逊云科技搭建数字营销多代理(Multi-Agent)(下篇)
人工智能·科技·ai·语言模型·云计算·aws·亚马逊云科技
说私域2 小时前
社群裂变+2+1链动新纪元:S2B2C小程序如何重塑企业客户管理版图?
大数据·人工智能·小程序·开源
程序猿阿伟3 小时前
《探秘鸿蒙Next:如何保障AI模型轻量化后多设备协同功能一致》
人工智能·华为·harmonyos
2401_897579653 小时前
AI赋能Flutter开发:ScriptEcho助你高效构建跨端应用
前端·人工智能·flutter
CM莫问3 小时前
python实战(十五)——中文手写体数字图像CNN分类
人工智能·python·深度学习·算法·cnn·图像分类·手写体识别
程序猿阿伟3 小时前
《探秘鸿蒙Next:人工智能助力元宇宙高效渲染新征程》
人工智能·华为·harmonyos