Unity使用豆包语音模型

环境

使用日期:2026.3.23

OS:Windows10

引擎:Unity2021.3.45

文档

语音合成模型V3

https://www.volcengine.com/docs/6561/1598757?lang=zh#_2-http-chunked格式接口说明

必须参数

  1. baseUrl:
    https://openspeech.bytedance.com/api/v3/tts/unidirectional
  2. XApiAppId
    语音模型/Doubao-语音合成-立即使用/ APP ID
  3. XApiAccessKey
    语音模型/Doubao-语音合成-立即使用/ Access Token
  4. XApiResourceId
    设置语音模型。例如:seed-tts-2.0
  5. speaker
    发言者。例如:zh_female_vv_uranus_bigtts

示例

发送请求,将文本合成为语音(wav);

接收响应,将响应信息解析后合成为AudioClip。

wav格式的字节数组抓换为AudioClilp

csharp 复制代码
public static class WavUtility
{
    public static AudioClip ToAudioClip(byte[] wavBytes, string clipName)
    {
        //解析wav头
        int channels = wavBytes[22];//声道数
        int sampleRate = BitConverter.ToInt32(wavBytes, 24);//采样率

        //data块起始位置
        int dataStartIndex = 44;//标准PCM WAV头
        int dataLength = wavBytes.Length - dataStartIndex;//数据长度

        //将16-bit pcm转换为 float[-1,1]
        float[] samples = new float[dataLength / 2];
        for (int i = 0; i < samples.Length; i++)
        {
            short sample = BitConverter.ToInt16(wavBytes, dataStartIndex + i * 2);
            samples[i] = sample / 32768f;//归一化 [-1,1]
        }

        AudioClip clip = AudioClip.Create(clipName,
        samples.Length / channels, channels, sampleRate, false);

        clip.SetData(samples, 0);

        return clip;
    }
}

数据类

csharp 复制代码
[Serializable]
public class RequestAudio
{
    public AudioUser user;
    public RequestAudioParams req_params;
}
[Serializable]
public class AudioUser
{
    public string uid;
}
[Serializable]
public class RequestAudioParams
{
    public string text;
    public string speaker;
    public AudioParams audio_params;
}
[Serializable]
public class AudioParams
{
    public string format;
    public int sample_rate;
}
[Serializable]
public class ResponeAudio
{
    public int code;
    public string message;
    public string data;
}

自定义下载脚本

解析json脚本,返回一个完整的json对象。

csharp 复制代码
public class TTSDownload : DownloadHandlerScript
{
    private StringBuilder buffer = new StringBuilder();
    public event Action<string> OnObjectReceived;
    public event Action OnComplete;

    public TTSDownload() : base(new byte[4096]) { }

    protected override bool ReceiveData(byte[] data, int dataLength)
    {
        string chunk = Encoding.UTF8.GetString(data, 0, dataLength);
        buffer.Append(chunk);
        ProcessBuffer();
        return true;
    }

    private void ProcessBuffer()
    {
        string content = buffer.ToString();
        int splitStartIndex = -1;
        int braceCount = 0;
        int lastProcessed = 0;
        for (int i = 0; i < content.Length; i++)
        {
            if (content[i] == '{')
            {
                if (braceCount == 0) splitStartIndex = i;
                braceCount++;
            }
            else if (content[i] == '}')
            {
                braceCount--;
                if (braceCount == 0 && splitStartIndex != -1)
                {
                    string obj = content.Substring(splitStartIndex, i - splitStartIndex + 1);
                    OnObjectReceived?.Invoke(obj);
                    splitStartIndex = -1;
                    lastProcessed = i + 1;
                }
            }
        }

        if (lastProcessed > 0)
            buffer.Remove(0, lastProcessed);
    }

    protected override void CompleteContent()
    {
        OnComplete?.Invoke();
    }
}

请求文本合成为音频

csharp 复制代码
using System;
using System.Collections;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;

public class DoubaoAudioAPI : MonoBehaviour
{
    [Header("接口")]
    [SerializeField] string baseUrl;
    [SerializeField] string XApiAppId;
    [SerializeField] string XApiAccessKey;
    [SerializeField] string XApiResourceId;
    [Header("发音对象")]
    [SerializeField] string speaker;
    [Header("存放音频")]
    [SerializeField] bool save;

    MemoryStream memoryStream;

    public event Action<AudioClip> OnGetAudio;

    public void RequestTextToAudio(string text)
    {
        RequestAudio requestAudio = new RequestAudio();
        AudioUser user = new AudioUser();
        user.uid = Guid.NewGuid().ToString();
        requestAudio.user = user;
        RequestAudioParams requestAudioParams = new RequestAudioParams();
        requestAudioParams.speaker = speaker;
        requestAudioParams.text = text;
        AudioParams audioParams = new AudioParams();
        audioParams.format = "wav";
        audioParams.sample_rate = 44100;
        requestAudioParams.audio_params = audioParams;
        requestAudio.req_params = requestAudioParams;
        var message = JsonUtility.ToJson(requestAudio);

        if (memoryStream != null)
        {
            memoryStream.Dispose();
            memoryStream = null;
        }
        memoryStream = new MemoryStream();

        StartCoroutine(PostTextToAudio(message, OnObjectReceive, OnCompleted));
    }

    void OnObjectReceive(string content)
    {
        try
        {
            var result = JsonUtility.FromJson<ResponeAudio>(content);
            if (result != null && result.code == 0 && !string.IsNullOrEmpty(result.data))
            {
                byte[] chunk = Convert.FromBase64String(result.data);
                memoryStream.Write(chunk, 0, chunk.Length);
            }
        }
        catch (Exception e)
        {
            Debug.LogError(e.Message);
        }
    }

    void OnCompleted()
    {
        byte[] audioData = null;
        if (memoryStream != null)
        {
            audioData = memoryStream.ToArray();
            memoryStream.Dispose();
            memoryStream = null;
        }
        if (audioData != null && audioData.Length > 0)
        {
            OnGetAudio?.Invoke(WavUtility.ToAudioClip(audioData, "tts"));
            if (save)
                File.WriteAllBytes(Guid.NewGuid() + ".wav", audioData);
        }
    }

    IEnumerator PostTextToAudio(string jsonData, Action<string> OnObjectReceive, Action OnComplete)
    {
        using (UnityWebRequest request = new UnityWebRequest(baseUrl, UnityWebRequest.kHttpVerbPOST))
        {
            byte[] bodyRaw = System.Text.Encoding.UTF8.GetBytes(jsonData);
            request.uploadHandler = new UploadHandlerRaw(bodyRaw);

            var audioDownload = new TTSDownload();
            request.downloadHandler = audioDownload;
            audioDownload.OnComplete += OnComplete;
            audioDownload.OnObjectReceived += OnObjectReceive;

            request.SetRequestHeader("Content-Type", "application/json");
            request.SetRequestHeader("X-Api-App-Id", XApiAppId);
            request.SetRequestHeader("X-Api-Access-Key", XApiAccessKey);
            request.SetRequestHeader("X-Api-Resource-Id", XApiResourceId);

            yield return request.SendWebRequest();

            if (request.result != UnityWebRequest.Result.Success)
                Debug.LogError("请求失败:" + request.error);

            audioDownload.OnComplete -= OnComplete;
            audioDownload.OnObjectReceived -= OnObjectReceive;
        }
    }

    void OnDestroy()
    {
        if (memoryStream != null)
        {
            memoryStream.Dispose();
            memoryStream = null;
        }
    }
}

测试

设置转换文本,启用程序后发送请求,注册语音合成回调;

按下空格键,播放转换后的音频。

合成速度很快。

csharp 复制代码
using UnityEngine;
public class TestAudio : MonoBehaviour
{
    [SerializeField] DoubaoAudioAPI doubaoAudioAPI;
    [SerializeField] AudioSource audioSource;

    [Header("转换文本")]
    [SerializeField] string text = "你好,豆包";

    void Awake()
    {
        doubaoAudioAPI.OnGetAudio += OnGetAudio;
        doubaoAudioAPI.RequestTextToAudio(text);
    }

    void Update()
    {
        if (audioSource.clip && Input.GetKeyDown(KeyCode.Space))
        {
            audioSource.Play();
        }
    }

    void OnDestroy()
    {
        doubaoAudioAPI.OnGetAudio -= OnGetAudio;
    }

    void OnGetAudio(AudioClip audioClip)
    {
        audioSource.clip = audioClip;
        audioSource.Play();
    }
}
相关推荐
张老师带你学2 小时前
unity资源:星际飞船 陨石 虫族 星球
科技·游戏·unity·模型·游戏美术
心前阳光2 小时前
Unity使用豆包语言模型
unity·语言模型
魔士于安2 小时前
unity宇宙飞船
游戏·unity·游戏引擎·贴图·模型
RReality2 小时前
【Unity Shader】高级光照与阴影总结:渲染路径、多光源、透明阴影
unity·游戏引擎
浪客川2 小时前
godot-rust入门案例
rust·游戏引擎·godot
RPGMZ3 小时前
RPGMakerMZ游戏引擎 地图角色顶部显示称号
javascript·游戏引擎·rpgmz·rpgmakermz
harrain20 小时前
拟合模型与虚幻引擎
游戏引擎·数字孪生·虚幻
努力长头发的程序猿1 天前
在Unity2d中,根据Y轴决定渲染顺序(URP项目适用)
unity
DaLiangChen1 天前
Unity 精准 Mesh 点击检测:穿透遮挡 + 单击双击识别
unity·游戏引擎