实现Unity录音、百度云语音转文字

csharp 复制代码
在unity中录制声音,调用百度云-语音转文字标准版接口,获取转换后的文字内容

调用示例:

            BtnStartVoice.onClick.AddListener(() =>
            {
	            //开始录音
                MicrophoneRecorderManager.Instance.StartRecording();
            });          
            BtnEndVoice.onClick.AddListener(() =>
            {
	            //结束录音
                MicrophoneRecorderManager.Instance.StopRecording(strContent =>
                {
                    InputChat.text = strContent;
                });
            });

百度云-语音识别API及代码示例地址:
百度智能云-语音识别

以下为相关代码实现:

csharp 复制代码
using System;
using System.Collections;
using System.Collections.Generic;
using QFramework;
using UnityEngine;


//麦克风录音管理器
public class MicrophoneRecorderManager : MonoSingleton<MicrophoneRecorderManager>
{
    public override void OnSingletonInit()
    {
        Init(); 
        base.OnSingletonInit();
    }

    private AudioClip currentRecording;
    private string _selectedDevice;

    private AudioSource _audioSource; //播放录音,用于测试是否录音成功

    private int sampleRate = 16000;//固定采样率
    private int maxRecordingTime = 60; // 最大录音时长(秒)

    //是否正在录音
    private bool isRecording;


    void Init()
    {
        // 获取所有麦克风设备
        string[] devices = Microphone.devices;
        if (devices.Length == 0)
        {
            Debug.LogError("没有检测到麦克风设备");
            return;
        }

        devices.ForEach(x => Debug.Log($"devices:{x}"));
        // 选择第一个可用设备
        _selectedDevice = devices[0];
        Debug.Log($"已选择麦克风设备: {_selectedDevice}");

        /*
        // 添加并配置AudioSource组件,用于测试是否录音成功
        _audioSource = gameObject.AddComponent<AudioSource>();
        _audioSource.loop = false; // 不循环播放
        */
    }

    // 开始录音
    public void StartRecording()
    {
        if (string.IsNullOrEmpty(_selectedDevice))
        {
            Debug.LogError("没有可用的麦克风设备");
            return;
        }
        //如果正在录音
        if (Microphone.IsRecording(_selectedDevice))
        {
            return;
        }
        /*
        // 停止之前可能正在播放的音频,用于测试录音时使用
        if (_audioSource.isPlaying)
        {
            _audioSource.Stop();
        }
        */

        // 开始录音,采样率44100Hz,时长10秒
        currentRecording = Microphone.Start(_selectedDevice, false, maxRecordingTime, sampleRate);
        isRecording = true;
        Debug.Log("录音已开始");
    }

    // 停止录音
    public void StopRecording(Action<string> callBack)
    {
        if (!isRecording) return;
        // 获取当前录音位置
        int recordingLength = Microphone.GetPosition(_selectedDevice);
        // 停止录音
        Microphone.End(_selectedDevice);
        isRecording = false;
        Debug.Log("录音已停止");

        // 创建新的AudioClip,只包含实际录音的部分
        if (recordingLength > 0)
        {
            AudioClip recordedClip = AudioClip.Create(
                "Recording",
                recordingLength,
                currentRecording.channels,
                sampleRate,
                false
            );

            // 获取录音数据
            float[] samples = new float[recordingLength * currentRecording.channels];
            currentRecording.GetData(samples, 0);
            recordedClip.SetData(samples, 0);

            // 保存并播放
            currentRecording = recordedClip;

            LogUtilits.LogWarning($"采样率:{currentRecording.frequency}");
            byte[] audioData = WavUtility.ConvertAudioClipToPCM(currentRecording);
            StartCoroutine(BDSmartManager.Instance.SpeechToText(audioData, callBack, null));


            //PlayRecording();
            Debug.Log("播放录音中..."); ;
        }
        else
        {
            Debug.LogError("录音失败或时长太短"); ;
        }
    }
    /// <summary>
    /// 播放录音,用于测试是否正确录音成功
    /// </summary>
    private void PlayRecording()
    {
        if (currentRecording == null)
        {
            Debug.LogWarning("没有可播放的录音");
            return;
        }
        // 设置音频源并播放
        _audioSource.clip = currentRecording;
        _audioSource.Play();
    }

    // 在组件被销毁时停止录音
    private void OnDestroy()
    {
        if (Microphone.IsRecording(_selectedDevice))
        {
            Microphone.End(_selectedDevice);
        }

        if (_audioSource != null && _audioSource.isPlaying)
        {
            _audioSource.Stop();
        }
    }
}
csharp 复制代码
using Newtonsoft.Json;
using QFramework;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Net.NetworkInformation;
using Unity.Burst.Intrinsics;
using UnityEngine;
using UnityEngine.Networking;


//百度智能语音
public class BDSmartManager : MonoSingleton<BDSmartManager>
{

    private string accessToken;//http通信的请求token
    private bool isTokenValid = false;//是否有效token

    #region 获取token

    /// <summary>
    /// 使用 AK, SK 生成鉴权签名(Access Token)
    /// </summary>
    /// <returns>鉴权签名信息(Access Token)</returns>
    public IEnumerator GetAccessToken()
    {
        string apiKey = BDInterfaceDefine.ApiKey;
        string secretKey = BDInterfaceDefine.SecretKey;

        string url = $"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={apiKey}&client_secret={secretKey}";
        using UnityWebRequest request = UnityWebRequest.Post(url, new WWWForm());
        request.timeout = -1;
        yield return request.SendWebRequest();
        if (request.result == UnityWebRequest.Result.Success)
        {

            string responseContent = request.downloadHandler.text;
            Debug.Log($"responseContent:{responseContent}");

            BaiduTokenResponse result = JsonConvert.DeserializeObject<BaiduTokenResponse>(responseContent);
            accessToken = result.access_token;
            isTokenValid = true;
            // 这里可以返回token或者执行其他操作
            Debug.Log($"获取Token成功: {accessToken}");
        }
        else
        {
            Debug.LogError($"获取Token失败: {request.error}");
        }
    }

    #endregion


    #region 语音识别-标准版
    /// <summary>
    /// 语音识别-标准版
    /// </summary>
    public IEnumerator SpeechToText(byte[] audioData, Action<string> onSuccess, Action<string> onError)
    {
        if (!isTokenValid)
        {
            Debug.LogWarning("Token无效,正在重新获取...");
            yield return StartCoroutine(GetAccessToken());
        }
        // 准备请求数据
        var requestData = new Dictionary<string, object>
        {
            { "format", "pcm" },//语音文件的格式,pcm/wav/amr/m4a。不区分大小写。推荐pcm文件
            { "rate", 16000 },//采样率,16000、8000,固定值
            { "channel", 1 },//声道数,仅支持单声道,请填写固定值 1
            { "token", accessToken },
            { "cuid", SystemInfo.deviceUniqueIdentifier },//用户唯一标识,用来区分用户,计算UV值。建议填写能区分用户的机器 MAC 地址或 IMEI 码,长度为60字符以内。
            { "len", audioData.Length },//本地语音文件的的字节数,单位字节
            { "speech", Convert.ToBase64String(audioData) }//本地语音文件的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。
        };
        string jsonData = JsonConvert.SerializeObject(requestData);
        LogUtilits.LogMsg($"requestData: {jsonData}");
        byte[] postData = System.Text.Encoding.UTF8.GetBytes(jsonData);
        // 发送请求
        //string url = "https://vop.baidu.com/server_api";
        string url = BDInterfaceDefine.SpeechRecognition_Standard;

        UnityWebRequest request = UnityWebRequest.Post(url, new WWWForm());
        request.timeout = -1;
        request.SetRequestHeader("Content-Type", "application/json");
        request.SetRequestHeader("Accept", "application/json");
        //
        request.uploadHandler = new UploadHandlerRaw(postData);
        request.downloadHandler = new DownloadHandlerBuffer();

        yield return request.SendWebRequest();

        if (request.result == UnityWebRequest.Result.Success)
        {
            LogUtilits.LogMsg($"识别结果:{request.downloadHandler.text}");

            var response = JsonConvert.DeserializeObject<BaiduASRResponse>(request.downloadHandler.text);

            if (response.err_no == 0 && response.result != null && response.result.Length > 0)
            {
                string recognizedText = response.result[0];
                onSuccess?.Invoke(recognizedText);
                Debug.Log($"识别成功: {recognizedText}");
            }
            else
            {
                string errorMsg = $"err_msg识别失败: {response.err_msg} (err_no错误码: {response.err_no})";
                onError?.Invoke(errorMsg);
                Debug.LogError(errorMsg);
            }
        }
        else
        {
            string errorMsg = $"网络请求失败: {request.error}";
            onError?.Invoke(errorMsg);
            Debug.LogError(errorMsg);
        }
    }

    #endregion
}
csharp 复制代码
//百度云接口定义
public static class BDInterfaceDefine 
{

    public const string ApiKey = "替换为自己的ApiKey";
    public const string SecretKey = "替换为自己的SecretKey";


    /// <summary>
    /// 短语音识别标准版
    /// </summary>
    public const string SpeechRecognition_Standard = "https://vop.baidu.com/server_api";
}



// 百度智能云API响应数据结构
[System.Serializable]
public class BaiduASRResponse
{
    public string corpus_no;
    public string err_msg;
    public int err_no;
    public string[] result;
    public string sn;
}

// Token获取响应
[System.Serializable]
public class BaiduTokenResponse
{
    public string refresh_token;
    public int expires_in;
    public string session_key;
    public string access_token;
    public string scope;
    public string session_secret;
}
csharp 复制代码
using System;
using UnityEngine;

//录音数据转换工具类
public static class RecordAudioUtility
{
    public static byte[] ConvertAudioClipToPCM(AudioClip clip)
    {
        // 获取音频数据
        float[] samples = new float[clip.samples * clip.channels];
        clip.GetData(samples, 0);
        // 转换为16位PCM
        byte[] pcmData = ConvertFloatToPCM16(samples);
        // 如果是立体声,转换为单声道
        if (clip.channels == 2)
        {
            pcmData = ConvertStereoToMono(pcmData);
        }
        return pcmData;
    }

    private static byte[] ConvertStereoToMono(byte[] stereoData)
    {
        // 16位PCM,每个采样2字节
        int sampleCount = stereoData.Length / 4; // 每个通道的采样数
        byte[] monoData = new byte[sampleCount * 2];

        for (int i = 0; i < sampleCount; i++)
        {
            int stereoPos = i * 4;
            int monoPos = i * 2;

            // 获取左右声道采样
            short left = BitConverter.ToInt16(stereoData, stereoPos);
            short right = BitConverter.ToInt16(stereoData, stereoPos + 2);

            // 计算平均值
            short mono = (short)((left + right) / 2);

            // 写入单声道数据
            byte[] monoBytes = BitConverter.GetBytes(mono);
            monoData[monoPos] = monoBytes[0];
            monoData[monoPos + 1] = monoBytes[1];
        }

        return monoData;
    }

    private static byte[] ConvertFloatToPCM16(float[] samples)
    {
        byte[] pcmBytes = new byte[samples.Length * 2];
        int position = 0;

        for (int i = 0; i < samples.Length; i++)
        {
            // 限制在[-1, 1]范围内
            float sample = Mathf.Clamp(samples[i], -1f, 1f);

            // 转换为16位整型
            short intSample = (short)(sample * 32767f);

            // 小端字节序
            pcmBytes[position++] = (byte)(intSample & 0xFF);
            pcmBytes[position++] = (byte)((intSample >> 8) & 0xFF);
        }
        return pcmBytes;
    }
}
相关推荐
CreasyChan2 小时前
unity 对象池实测可用
unity·c#
weixin_424294672 小时前
Unity项目的Artifacts文件夹过大怎么解决?
unity·游戏引擎
没事写写笔记10 小时前
Unity HDRP14.0.12 Volume 配置参数
unity
红黑色的圣西罗12 小时前
手游手动异形屏适配方案,类“明日方舟”
unity
广州灵眸科技有限公司18 小时前
瑞芯微(EASY EAI)RV1126B 语音识别
人工智能·语音识别
开开心心_Every1 天前
手机端课程表管理工具:支持课程导入自定义
python·游戏·微信·django·pdf·excel·语音识别
agicall.com1 天前
信创电话助手录音模式说明:单轨混音 vs 双轨立体声
人工智能·语音识别·自动录音·电话录音盒·固话座机·统信uos电话录音
开开心心_Every1 天前
视频无损压缩工具:大幅减小体积并保持画质
游戏·微信·pdf·excel·音视频·语音识别·tornado
开开心心_Every1 天前
安卓语音转文字工具:免费支持实时转换视频
python·游戏·微信·django·pdf·excel·语音识别