Java调用百度云语音识别【音频转写】

复制代码
百度云文档  ttps://ai.baidu.com/ai-doc/SPEECH/Bk5difx01
复制代码
依赖:
<!--JSONObject-->
<dependency>
	<groupId>org.json</groupId>
	<artifactId>json</artifactId>
	<version>20210307</version>
</dependency>
复制代码
示例代码: 
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.json.JSONObject;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;

/**
 * 语音识别
 */
@Slf4j
@Component
public class AasrUtils {


    public static final String API_KEY = "U92RRV****ag9xZv";
    public static final String SECRET_KEY = "SU05xD****0ziDkM";


    static final OkHttpClient HTTP_CLIENT = new OkHttpClient.Builder()
            .connectTimeout(60, TimeUnit.SECONDS) // Set the connection timeout
            .readTimeout(300, TimeUnit.SECONDS)    // Set the read timeout
            .build();


    /**
     * 音频转写
     *
     * @param speechUrl  音频url, 音频大小不超过500MB
     * @return
     * @throws IOException
     */
    public static String createAasr(String speechUrl) throws IOException {
        MediaType mediaType = MediaType.parse("application/json");
        RequestBody body = RequestBody.create(mediaType, "{\n" +
                "    \"speech_url\": \"" + speechUrl + "\",\n" +
                "    \"format\": \"mp3\",\n" +
                "    \"pid\": 80001,\n" +
                "    \"rate\": 16000\n" +
                "}");

        Request request = new Request.Builder()
                .url("https://aip.baidubce.com/rpc/2.0/aasr/v1/create?access_token=" + getAccessToken())
                .post(body)
                .addHeader("Content-Type", "application/json")
                .addHeader("Accept", "application/json")
                .build();

        try (Response response = HTTP_CLIENT.newCall(request).execute()) {
            if (response.isSuccessful()) {
                String responseBody = response.body().string();
                log.info("创建AACR任务响应: {}", responseBody);

                String taskId = JSON.parseObject(responseBody).getString("task_id");
                return taskId;
            } else {
                log.info("创建AACR任务失败: {}", response.code());
                return null;
            }
        }
    }


    public static String checkASRStatus(String taskId) throws IOException {
        if (StringUtils.isEmpty(taskId)) {
            return "";
        }

        String taskStatus = "";

        while (!("Success".equals(taskStatus) || "Failed".equals(taskStatus))) {
            // Add a delay before the next query
            try {
                Thread.sleep(1000); // Sleep for 1 seconds (adjust as needed)
            } catch (InterruptedException e) {
                e.printStackTrace();
            }

            HashMap<String, Object> map = new HashMap<>();
            ArrayList<Object> list = new ArrayList<>();
            list.add(taskId);
            map.put("task_ids", list);

            JSONObject jsonObject = new JSONObject(map);
            String toJSON = jsonObject.toString();

            MediaType mediaType = MediaType.parse("application/json");
            RequestBody body = RequestBody.create(mediaType, toJSON);
            Request request = new Request.Builder()
                    .url("https://aip.baidubce.com/rpc/2.0/aasr/v1/query?access_token=" + getAccessToken())
                    .method("POST", body)
                    .addHeader("Content-Type", "application/json")
                    .addHeader("Accept", "application/json")
                    .build();

            try (Response response = HTTP_CLIENT.newCall(request).execute()) {
                if (response.isSuccessful()) {
                    String responseBody = response.body().string();
                    log.info("创建AACR任务响应: {}", responseBody);

                    JSONArray tasksInfo = JSON.parseObject(responseBody).getJSONArray("tasks_info");

                    if (tasksInfo.size() > 0) {
                        com.alibaba.fastjson.JSONObject taskInfo = tasksInfo.getJSONObject(0);
                        taskStatus = taskInfo.getString("task_status");

                        if ("Success".equals(taskStatus)) {
                            String resultText = taskInfo.getJSONObject("task_result").getString("result");
                            // 使用正则表达式替换[""]
                            resultText = resultText.replaceAll("\\[\"|\"\\]", "");
                            log.info("语音转文字: {}", resultText);
                            return resultText;
                        } else if ("Failed".equals(taskStatus)) {
                            log.info("任务失败: {}", taskStatus);
                        }
                    } else {
                        log.info("未找到任务信息");
                    }
                } else {
                    log.info("检查任务状态失败: {}", response.code());
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return "";
    }


    /**
     * 从用户的AK,SK生成鉴权签名(Access Token)
     *
     * @return 鉴权签名(Access Token)
     * @throws IOException IO异常
     */
    static String getAccessToken() throws IOException {
        MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
        RequestBody body = RequestBody.create(mediaType, "grant_type=client_credentials&client_id=" + API_KEY
                + "&client_secret=" + SECRET_KEY);
        Request request = new Request.Builder()
                .url("https://aip.baidubce.com/oauth/2.0/token")
                .method("POST", body)
                .addHeader("Content-Type", "application/x-www-form-urlencoded")
                .build();
        Response response = HTTP_CLIENT.newCall(request).execute();
        String access_token = new JSONObject(response.body().string()).getString("access_token");
        return access_token;
    }


    public static void main(String[] args) throws IOException {
        checkASRStatus(createAasr("https://openai-demo-1319322874.cos.na-siliconvalley.myqcloud.com/audio/2023-12-05/1701764504763878.mp3"));
        checkASRStatus(createAasr("https://aipe-speech.bj.bcebos.com/text_to_speech/2024-01-04/65966e5370172800014d6524/speech/0.mp3?authorization=bce-auth-v1%2F8a6ca9b78c124d89bb6bca18c6fc5944%2F2024-01-04T08%3A38%3A04Z%2F259200%2F%2F7ca69d814bacad356114a0ff7573734c5ceb20b363dee6c2e971b16617787ac4"));
    }

}
相关推荐
边缘计算社区26 分钟前
首个!艾灵参编的工业边缘计算国家标准正式发布
大数据·人工智能·边缘计算
游客52037 分钟前
opencv中的各种滤波器简介
图像处理·人工智能·python·opencv·计算机视觉
一位小说男主37 分钟前
编码器与解码器:从‘乱码’到‘通话’
人工智能·深度学习
深圳南柯电子1 小时前
深圳南柯电子|电子设备EMC测试整改:常见问题与解决方案
人工智能
Kai HVZ1 小时前
《OpenCV计算机视觉》--介绍及基础操作
人工智能·opencv·计算机视觉
biter00881 小时前
opencv(15) OpenCV背景减除器(Background Subtractors)学习
人工智能·opencv·学习
吃个糖糖1 小时前
35 Opencv 亚像素角点检测
人工智能·opencv·计算机视觉
IT古董2 小时前
【漫话机器学习系列】017.大O算法(Big-O Notation)
人工智能·机器学习
凯哥是个大帅比2 小时前
人工智能ACA(五)--深度学习基础
人工智能·深度学习
m0_748232922 小时前
DALL-M:基于大语言模型的上下文感知临床数据增强方法 ,补充
人工智能·语言模型·自然语言处理