对象存储服务 OSS 对应 Azure Blob Storage
语音识别 ASR 对应 Azure Speech-to-Text
语音合成 TTS 对应 Azure Text-to-Speech
上传..mp3文件或者上传OSS地址 返回音频的文字示例demo
依赖
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webflux</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- microsoft ASR -->
<dependency>
<groupId>com.microsoft.cognitiveservices.speech</groupId>
<artifactId>client-sdk</artifactId>
<version>1.43.0</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.projectreactor</groupId>
<artifactId>reactor-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
代码 在application.properties或者yaml中配置key和endpoint
package com.example.microsoftasr.controller;
import com.microsoft.cognitiveservices.speech.*;
import com.microsoft.cognitiveservices.speech.audio.AudioConfig;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.net.URI;
import java.nio.file.Files;
@RestController
@RequestMapping("/asr")
public class TestController {
@Value("${azure.speech.key}")
private String speechKey;
@Value("${azure.speech.endpoint}")
private String speechEndpoint;
@GetMapping("/hello")
public String test() {
return "Hello World";
}
@PostMapping("/recognize")
public String recognize(@RequestParam(value = "file", required = false) MultipartFile file,
@RequestParam(value = "url", required = false) String ossUrl) {
if ((file == null || file.isEmpty()) && (ossUrl == null || ossUrl.isBlank())) {
return "未提供音频文件或音频地址";
}
File tempInput = null;
File tempWav = null;
try {
// 1. 保存临时原始音频
if (file != null && !file.isEmpty()) {
String suffix = getSuffix(file.getOriginalFilename());
tempInput = File.createTempFile("audio-input-", "." + suffix);
file.transferTo(tempInput);
} else {
String suffix = getSuffix(ossUrl);
tempInput = File.createTempFile("audio-input-", "." + suffix);
try (var in = new java.net.URL(ossUrl).openStream()) {
Files.copy(in, tempInput.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
}
}
// 2. 转换成 WAV(16kHz 单声道)
tempWav = File.createTempFile("audio-output-", ".wav");
if (!getSuffix(tempInput.getName()).equalsIgnoreCase("wav")) {
ProcessBuilder pb = new ProcessBuilder(
"F:\\ffmpeg-7.1.1-full_build\\ffmpeg-7.1.1-full_build\\bin\\ffmpeg.exe", "-y",
"-i", tempInput.getAbsolutePath(),
"-ar", "16000",
"-ac", "1",
tempWav.getAbsolutePath()
);
Process process = pb.inheritIO().start();
int exitCode = process.waitFor();
if (exitCode != 0) return "ffmpeg 转换失败,exitCode=" + exitCode;
} else {
Files.copy(tempInput.toPath(), tempWav.toPath(), java.nio.file.StandardCopyOption.REPLACE_EXISTING);
}
// 3. 调用微软 ASR 识别
SpeechConfig speechConfig = SpeechConfig.fromEndpoint(new URI(speechEndpoint), speechKey);
speechConfig.setSpeechRecognitionLanguage("zh-CN");
try (AudioConfig audioConfig = AudioConfig.fromWavFileInput(tempWav.getAbsolutePath());
SpeechRecognizer recognizer = new SpeechRecognizer(speechConfig, audioConfig)) {
SpeechRecognitionResult result = recognizer.recognizeOnceAsync().get();
if (result.getReason() == ResultReason.RecognizedSpeech) {
return result.getText();
} else {
return "识别失败: " + result.getReason();
}
}
} catch (Exception e) {
e.printStackTrace();
return "识别异常: " + e.getMessage();
} finally {
try {
if (tempInput != null) Files.deleteIfExists(tempInput.toPath());
if (tempWav != null) Files.deleteIfExists(tempWav.toPath());
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
private String getSuffix(String filenameOrUrl) {
if (filenameOrUrl == null || !filenameOrUrl.contains(".")) return "tmp";
return filenameOrUrl.substring(filenameOrUrl.lastIndexOf('.') + 1);
}
}
