微软云语音识别ASR示例Demo

对象存储服务 OSS 对应 Azure Blob Storage

语音识别 ASR 对应 Azure Speech-to-Text

语音合成 TTS 对应 Azure Text-to-Speech

上传..mp3文件或者上传OSS地址 返回音频的文字示例demo

依赖

复制代码
<dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-webflux</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <!--   microsoft ASR     -->
        <dependency>
            <groupId>com.microsoft.cognitiveservices.speech</groupId>
            <artifactId>client-sdk</artifactId>
            <version>1.43.0</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>io.projectreactor</groupId>
            <artifactId>reactor-test</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>

代码 在application.properties或者yaml中配置key和endpoint

复制代码
package com.example.microsoftasr.controller;

import com.microsoft.cognitiveservices.speech.*;
import com.microsoft.cognitiveservices.speech.audio.AudioConfig;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.net.URI;
import java.nio.file.Files;

@RestController
@RequestMapping("/asr")
public class TestController {

    @Value("${azure.speech.key}")
    private String speechKey;

    @Value("${azure.speech.endpoint}")
    private String speechEndpoint;

    @GetMapping("/hello")
    public String test() {
        return "Hello World";
    }

    @PostMapping("/recognize")
    public String recognize(@RequestParam(value = "file", required = false) MultipartFile file,
                            @RequestParam(value = "url", required = false) String ossUrl) {
        if ((file == null || file.isEmpty()) && (ossUrl == null || ossUrl.isBlank())) {
            return "未提供音频文件或音频地址";
        }

        File tempInput = null;
        File tempWav = null;

        try {
            // 1. 保存临时原始音频
            if (file != null && !file.isEmpty()) {
                String suffix = getSuffix(file.getOriginalFilename());
                tempInput = File.createTempFile("audio-input-", "." + suffix);
                file.transferTo(tempInput);
            } else {
                String suffix = getSuffix(ossUrl);
                tempInput = File.createTempFile("audio-input-", "." + suffix);
                try (var in = new java.net.URL(ossUrl).openStream()) {
                    Files.copy(in, tempInput.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
                }
            }

            // 2. 转换成 WAV(16kHz 单声道)
            tempWav = File.createTempFile("audio-output-", ".wav");
            if (!getSuffix(tempInput.getName()).equalsIgnoreCase("wav")) {
                ProcessBuilder pb = new ProcessBuilder(
                        "F:\\ffmpeg-7.1.1-full_build\\ffmpeg-7.1.1-full_build\\bin\\ffmpeg.exe", "-y",
                        "-i", tempInput.getAbsolutePath(),
                        "-ar", "16000",
                        "-ac", "1",
                        tempWav.getAbsolutePath()
                );
                Process process = pb.inheritIO().start();
                int exitCode = process.waitFor();
                if (exitCode != 0) return "ffmpeg 转换失败,exitCode=" + exitCode;
            } else {
                Files.copy(tempInput.toPath(), tempWav.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
            }

            // 3. 调用微软 ASR 识别
            SpeechConfig speechConfig = SpeechConfig.fromEndpoint(new URI(speechEndpoint), speechKey);
            speechConfig.setSpeechRecognitionLanguage("zh-CN");

            try (AudioConfig audioConfig = AudioConfig.fromWavFileInput(tempWav.getAbsolutePath());
                 SpeechRecognizer recognizer = new SpeechRecognizer(speechConfig, audioConfig)) {
                SpeechRecognitionResult result = recognizer.recognizeOnceAsync().get();
                if (result.getReason() == ResultReason.RecognizedSpeech) {
                    return result.getText();
                } else {
                    return "识别失败: " + result.getReason();
                }
            }

        } catch (Exception e) {
            e.printStackTrace();
            return "识别异常: " + e.getMessage();
        } finally {
            try {
                if (tempInput != null) Files.deleteIfExists(tempInput.toPath());
                if (tempWav != null) Files.deleteIfExists(tempWav.toPath());
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }
    }

    private String getSuffix(String filenameOrUrl) {
        if (filenameOrUrl == null || !filenameOrUrl.contains(".")) return "tmp";
        return filenameOrUrl.substring(filenameOrUrl.lastIndexOf('.') + 1);
    }

}
相关推荐
Pocker_Spades_A4 分钟前
AI 对话高效输入指令攻略(四):AI+Apache ECharts:生成各种专业图表
人工智能·echarts
拓端研究室21 分钟前
专题:2025生命科学与生物制药全景报告:产业图谱、投资方向及策略洞察|附130+份报告PDF、原数据表汇总下载
大数据·人工智能
正在走向自律21 分钟前
豆包新模型与 PromptPilot 实操体验测评,AI 辅助创作的新范式探索
人工智能·火山引擎·promptpilot·豆包新模型·seed-1.6-think·火山方舟·ai产品体验
绿炮火39 分钟前
【MATLAB】(八)矩阵
人工智能·matlab·矩阵
十年一梦实验室1 小时前
工业机器人控制系统 IP-PP-EXEC 流水线
网络·人工智能·网络协议·tcp/ip·机器人
爆改模型1 小时前
【arXiv2025】计算机视觉|即插即用|LWGA:即插即用!LWGA模块,视觉性能炸裂!
人工智能·计算机视觉
崔庆才丨静觅1 小时前
Veo Videos Generation API 对接说明
人工智能·api
Maynor9961 小时前
一次完整的 Docker 启动失败排错之旅:从 `start-limit` 到 `network not found
人工智能
zzywxc7872 小时前
深入探讨AI在测试领域的三大核心应用:自动化测试框架、智能缺陷检测和A/B测试优化,并通过代码示例、流程图和图表详细解析其实现原理和应用场景。
运维·人工智能·低代码·架构·自动化·流程图·ai编程
zskj_zhyl2 小时前
七彩喜智慧康养:用“适老化设计”让科技成为老人的“温柔拐杖”
大数据·人工智能·科技·机器人·生活