csharp
在unity中录制声音,调用百度云-语音转文字标准版接口,获取转换后的文字内容
调用示例:
BtnStartVoice.onClick.AddListener(() =>
{
//开始录音
MicrophoneRecorderManager.Instance.StartRecording();
});
BtnEndVoice.onClick.AddListener(() =>
{
//结束录音
MicrophoneRecorderManager.Instance.StopRecording(strContent =>
{
InputChat.text = strContent;
});
});
百度云-语音识别API及代码示例地址:
百度智能云-语音识别
以下为相关代码实现:
csharp
using System;
using System.Collections;
using System.Collections.Generic;
using QFramework;
using UnityEngine;
//麦克风录音管理器
public class MicrophoneRecorderManager : MonoSingleton<MicrophoneRecorderManager>
{
public override void OnSingletonInit()
{
Init();
base.OnSingletonInit();
}
private AudioClip currentRecording;
private string _selectedDevice;
private AudioSource _audioSource; //播放录音,用于测试是否录音成功
private int sampleRate = 16000;//固定采样率
private int maxRecordingTime = 60; // 最大录音时长(秒)
//是否正在录音
private bool isRecording;
void Init()
{
// 获取所有麦克风设备
string[] devices = Microphone.devices;
if (devices.Length == 0)
{
Debug.LogError("没有检测到麦克风设备");
return;
}
devices.ForEach(x => Debug.Log($"devices:{x}"));
// 选择第一个可用设备
_selectedDevice = devices[0];
Debug.Log($"已选择麦克风设备: {_selectedDevice}");
/*
// 添加并配置AudioSource组件,用于测试是否录音成功
_audioSource = gameObject.AddComponent<AudioSource>();
_audioSource.loop = false; // 不循环播放
*/
}
// 开始录音
public void StartRecording()
{
if (string.IsNullOrEmpty(_selectedDevice))
{
Debug.LogError("没有可用的麦克风设备");
return;
}
//如果正在录音
if (Microphone.IsRecording(_selectedDevice))
{
return;
}
/*
// 停止之前可能正在播放的音频,用于测试录音时使用
if (_audioSource.isPlaying)
{
_audioSource.Stop();
}
*/
// 开始录音,采样率44100Hz,时长10秒
currentRecording = Microphone.Start(_selectedDevice, false, maxRecordingTime, sampleRate);
isRecording = true;
Debug.Log("录音已开始");
}
// 停止录音
public void StopRecording(Action<string> callBack)
{
if (!isRecording) return;
// 获取当前录音位置
int recordingLength = Microphone.GetPosition(_selectedDevice);
// 停止录音
Microphone.End(_selectedDevice);
isRecording = false;
Debug.Log("录音已停止");
// 创建新的AudioClip,只包含实际录音的部分
if (recordingLength > 0)
{
AudioClip recordedClip = AudioClip.Create(
"Recording",
recordingLength,
currentRecording.channels,
sampleRate,
false
);
// 获取录音数据
float[] samples = new float[recordingLength * currentRecording.channels];
currentRecording.GetData(samples, 0);
recordedClip.SetData(samples, 0);
// 保存并播放
currentRecording = recordedClip;
LogUtilits.LogWarning($"采样率:{currentRecording.frequency}");
byte[] audioData = WavUtility.ConvertAudioClipToPCM(currentRecording);
StartCoroutine(BDSmartManager.Instance.SpeechToText(audioData, callBack, null));
//PlayRecording();
Debug.Log("播放录音中..."); ;
}
else
{
Debug.LogError("录音失败或时长太短"); ;
}
}
/// <summary>
/// 播放录音,用于测试是否正确录音成功
/// </summary>
private void PlayRecording()
{
if (currentRecording == null)
{
Debug.LogWarning("没有可播放的录音");
return;
}
// 设置音频源并播放
_audioSource.clip = currentRecording;
_audioSource.Play();
}
// 在组件被销毁时停止录音
private void OnDestroy()
{
if (Microphone.IsRecording(_selectedDevice))
{
Microphone.End(_selectedDevice);
}
if (_audioSource != null && _audioSource.isPlaying)
{
_audioSource.Stop();
}
}
}
csharp
using Newtonsoft.Json;
using QFramework;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Net.NetworkInformation;
using Unity.Burst.Intrinsics;
using UnityEngine;
using UnityEngine.Networking;
//百度智能语音
public class BDSmartManager : MonoSingleton<BDSmartManager>
{
private string accessToken;//http通信的请求token
private bool isTokenValid = false;//是否有效token
#region 获取token
/// <summary>
/// 使用 AK, SK 生成鉴权签名(Access Token)
/// </summary>
/// <returns>鉴权签名信息(Access Token)</returns>
public IEnumerator GetAccessToken()
{
string apiKey = BDInterfaceDefine.ApiKey;
string secretKey = BDInterfaceDefine.SecretKey;
string url = $"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={apiKey}&client_secret={secretKey}";
using UnityWebRequest request = UnityWebRequest.Post(url, new WWWForm());
request.timeout = -1;
yield return request.SendWebRequest();
if (request.result == UnityWebRequest.Result.Success)
{
string responseContent = request.downloadHandler.text;
Debug.Log($"responseContent:{responseContent}");
BaiduTokenResponse result = JsonConvert.DeserializeObject<BaiduTokenResponse>(responseContent);
accessToken = result.access_token;
isTokenValid = true;
// 这里可以返回token或者执行其他操作
Debug.Log($"获取Token成功: {accessToken}");
}
else
{
Debug.LogError($"获取Token失败: {request.error}");
}
}
#endregion
#region 语音识别-标准版
/// <summary>
/// 语音识别-标准版
/// </summary>
public IEnumerator SpeechToText(byte[] audioData, Action<string> onSuccess, Action<string> onError)
{
if (!isTokenValid)
{
Debug.LogWarning("Token无效,正在重新获取...");
yield return StartCoroutine(GetAccessToken());
}
// 准备请求数据
var requestData = new Dictionary<string, object>
{
{ "format", "pcm" },//语音文件的格式,pcm/wav/amr/m4a。不区分大小写。推荐pcm文件
{ "rate", 16000 },//采样率,16000、8000,固定值
{ "channel", 1 },//声道数,仅支持单声道,请填写固定值 1
{ "token", accessToken },
{ "cuid", SystemInfo.deviceUniqueIdentifier },//用户唯一标识,用来区分用户,计算UV值。建议填写能区分用户的机器 MAC 地址或 IMEI 码,长度为60字符以内。
{ "len", audioData.Length },//本地语音文件的的字节数,单位字节
{ "speech", Convert.ToBase64String(audioData) }//本地语音文件的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。
};
string jsonData = JsonConvert.SerializeObject(requestData);
LogUtilits.LogMsg($"requestData: {jsonData}");
byte[] postData = System.Text.Encoding.UTF8.GetBytes(jsonData);
// 发送请求
//string url = "https://vop.baidu.com/server_api";
string url = BDInterfaceDefine.SpeechRecognition_Standard;
UnityWebRequest request = UnityWebRequest.Post(url, new WWWForm());
request.timeout = -1;
request.SetRequestHeader("Content-Type", "application/json");
request.SetRequestHeader("Accept", "application/json");
//
request.uploadHandler = new UploadHandlerRaw(postData);
request.downloadHandler = new DownloadHandlerBuffer();
yield return request.SendWebRequest();
if (request.result == UnityWebRequest.Result.Success)
{
LogUtilits.LogMsg($"识别结果:{request.downloadHandler.text}");
var response = JsonConvert.DeserializeObject<BaiduASRResponse>(request.downloadHandler.text);
if (response.err_no == 0 && response.result != null && response.result.Length > 0)
{
string recognizedText = response.result[0];
onSuccess?.Invoke(recognizedText);
Debug.Log($"识别成功: {recognizedText}");
}
else
{
string errorMsg = $"err_msg识别失败: {response.err_msg} (err_no错误码: {response.err_no})";
onError?.Invoke(errorMsg);
Debug.LogError(errorMsg);
}
}
else
{
string errorMsg = $"网络请求失败: {request.error}";
onError?.Invoke(errorMsg);
Debug.LogError(errorMsg);
}
}
#endregion
}
csharp
//百度云接口定义
public static class BDInterfaceDefine
{
public const string ApiKey = "替换为自己的ApiKey";
public const string SecretKey = "替换为自己的SecretKey";
/// <summary>
/// 短语音识别标准版
/// </summary>
public const string SpeechRecognition_Standard = "https://vop.baidu.com/server_api";
}
// 百度智能云API响应数据结构
[System.Serializable]
public class BaiduASRResponse
{
public string corpus_no;
public string err_msg;
public int err_no;
public string[] result;
public string sn;
}
// Token获取响应
[System.Serializable]
public class BaiduTokenResponse
{
public string refresh_token;
public int expires_in;
public string session_key;
public string access_token;
public string scope;
public string session_secret;
}
csharp
using System;
using UnityEngine;
//录音数据转换工具类
public static class RecordAudioUtility
{
public static byte[] ConvertAudioClipToPCM(AudioClip clip)
{
// 获取音频数据
float[] samples = new float[clip.samples * clip.channels];
clip.GetData(samples, 0);
// 转换为16位PCM
byte[] pcmData = ConvertFloatToPCM16(samples);
// 如果是立体声,转换为单声道
if (clip.channels == 2)
{
pcmData = ConvertStereoToMono(pcmData);
}
return pcmData;
}
private static byte[] ConvertStereoToMono(byte[] stereoData)
{
// 16位PCM,每个采样2字节
int sampleCount = stereoData.Length / 4; // 每个通道的采样数
byte[] monoData = new byte[sampleCount * 2];
for (int i = 0; i < sampleCount; i++)
{
int stereoPos = i * 4;
int monoPos = i * 2;
// 获取左右声道采样
short left = BitConverter.ToInt16(stereoData, stereoPos);
short right = BitConverter.ToInt16(stereoData, stereoPos + 2);
// 计算平均值
short mono = (short)((left + right) / 2);
// 写入单声道数据
byte[] monoBytes = BitConverter.GetBytes(mono);
monoData[monoPos] = monoBytes[0];
monoData[monoPos + 1] = monoBytes[1];
}
return monoData;
}
private static byte[] ConvertFloatToPCM16(float[] samples)
{
byte[] pcmBytes = new byte[samples.Length * 2];
int position = 0;
for (int i = 0; i < samples.Length; i++)
{
// 限制在[-1, 1]范围内
float sample = Mathf.Clamp(samples[i], -1f, 1f);
// 转换为16位整型
short intSample = (short)(sample * 32767f);
// 小端字节序
pcmBytes[position++] = (byte)(intSample & 0xFF);
pcmBytes[position++] = (byte)((intSample >> 8) & 0xFF);
}
return pcmBytes;
}
}