Unity使用豆包语音模型

环境

使用日期:2026.3.23

OS:Windows10

引擎:Unity2021.3.45

文档

语音合成模型V3

https://www.volcengine.com/docs/6561/1598757?lang=zh#_2-http-chunked格式接口说明

必须参数

  1. baseUrl:
    https://openspeech.bytedance.com/api/v3/tts/unidirectional
  2. XApiAppId
    语音模型/Doubao-语音合成-立即使用/ APP ID
  3. XApiAccessKey
    语音模型/Doubao-语音合成-立即使用/ Access Token
  4. XApiResourceId
    设置语音模型。例如:seed-tts-2.0
  5. speaker
    发言者。例如:zh_female_vv_uranus_bigtts

示例

发送请求,将文本合成为语音(wav);

接收响应,将响应信息解析后合成为AudioClip。

wav格式的字节数组抓换为AudioClilp

csharp 复制代码
public static class WavUtility
{
    public static AudioClip ToAudioClip(byte[] wavBytes, string clipName)
    {
        //解析wav头
        int channels = wavBytes[22];//声道数
        int sampleRate = BitConverter.ToInt32(wavBytes, 24);//采样率

        //data块起始位置
        int dataStartIndex = 44;//标准PCM WAV头
        int dataLength = wavBytes.Length - dataStartIndex;//数据长度

        //将16-bit pcm转换为 float[-1,1]
        float[] samples = new float[dataLength / 2];
        for (int i = 0; i < samples.Length; i++)
        {
            short sample = BitConverter.ToInt16(wavBytes, dataStartIndex + i * 2);
            samples[i] = sample / 32768f;//归一化 [-1,1]
        }

        AudioClip clip = AudioClip.Create(clipName,
        samples.Length / channels, channels, sampleRate, false);

        clip.SetData(samples, 0);

        return clip;
    }
}

数据类

csharp 复制代码
[Serializable]
public class RequestAudio
{
    public AudioUser user;
    public RequestAudioParams req_params;
}
[Serializable]
public class AudioUser
{
    public string uid;
}
[Serializable]
public class RequestAudioParams
{
    public string text;
    public string speaker;
    public AudioParams audio_params;
}
[Serializable]
public class AudioParams
{
    public string format;
    public int sample_rate;
}
[Serializable]
public class ResponeAudio
{
    public int code;
    public string message;
    public string data;
}

自定义下载脚本

解析json脚本,返回一个完整的json对象。

csharp 复制代码
public class TTSDownload : DownloadHandlerScript
{
    private StringBuilder buffer = new StringBuilder();
    public event Action<string> OnObjectReceived;
    public event Action OnComplete;

    public TTSDownload() : base(new byte[4096]) { }

    protected override bool ReceiveData(byte[] data, int dataLength)
    {
        string chunk = Encoding.UTF8.GetString(data, 0, dataLength);
        buffer.Append(chunk);
        ProcessBuffer();
        return true;
    }

    private void ProcessBuffer()
    {
        string content = buffer.ToString();
        int splitStartIndex = -1;
        int braceCount = 0;
        int lastProcessed = 0;
        for (int i = 0; i < content.Length; i++)
        {
            if (content[i] == '{')
            {
                if (braceCount == 0) splitStartIndex = i;
                braceCount++;
            }
            else if (content[i] == '}')
            {
                braceCount--;
                if (braceCount == 0 && splitStartIndex != -1)
                {
                    string obj = content.Substring(splitStartIndex, i - splitStartIndex + 1);
                    OnObjectReceived?.Invoke(obj);
                    splitStartIndex = -1;
                    lastProcessed = i + 1;
                }
            }
        }

        if (lastProcessed > 0)
            buffer.Remove(0, lastProcessed);
    }

    protected override void CompleteContent()
    {
        OnComplete?.Invoke();
    }
}

请求文本合成为音频

csharp 复制代码
using System;
using System.Collections;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;

public class DoubaoAudioAPI : MonoBehaviour
{
    [Header("接口")]
    [SerializeField] string baseUrl;
    [SerializeField] string XApiAppId;
    [SerializeField] string XApiAccessKey;
    [SerializeField] string XApiResourceId;
    [Header("发音对象")]
    [SerializeField] string speaker;
    [Header("存放音频")]
    [SerializeField] bool save;

    MemoryStream memoryStream;

    public event Action<AudioClip> OnGetAudio;

    public void RequestTextToAudio(string text)
    {
        RequestAudio requestAudio = new RequestAudio();
        AudioUser user = new AudioUser();
        user.uid = Guid.NewGuid().ToString();
        requestAudio.user = user;
        RequestAudioParams requestAudioParams = new RequestAudioParams();
        requestAudioParams.speaker = speaker;
        requestAudioParams.text = text;
        AudioParams audioParams = new AudioParams();
        audioParams.format = "wav";
        audioParams.sample_rate = 44100;
        requestAudioParams.audio_params = audioParams;
        requestAudio.req_params = requestAudioParams;
        var message = JsonUtility.ToJson(requestAudio);

        if (memoryStream != null)
        {
            memoryStream.Dispose();
            memoryStream = null;
        }
        memoryStream = new MemoryStream();

        StartCoroutine(PostTextToAudio(message, OnObjectReceive, OnCompleted));
    }

    void OnObjectReceive(string content)
    {
        try
        {
            var result = JsonUtility.FromJson<ResponeAudio>(content);
            if (result != null && result.code == 0 && !string.IsNullOrEmpty(result.data))
            {
                byte[] chunk = Convert.FromBase64String(result.data);
                memoryStream.Write(chunk, 0, chunk.Length);
            }
        }
        catch (Exception e)
        {
            Debug.LogError(e.Message);
        }
    }

    void OnCompleted()
    {
        byte[] audioData = null;
        if (memoryStream != null)
        {
            audioData = memoryStream.ToArray();
            memoryStream.Dispose();
            memoryStream = null;
        }
        if (audioData != null && audioData.Length > 0)
        {
            OnGetAudio?.Invoke(WavUtility.ToAudioClip(audioData, "tts"));
            if (save)
                File.WriteAllBytes(Guid.NewGuid() + ".wav", audioData);
        }
    }

    IEnumerator PostTextToAudio(string jsonData, Action<string> OnObjectReceive, Action OnComplete)
    {
        using (UnityWebRequest request = new UnityWebRequest(baseUrl, UnityWebRequest.kHttpVerbPOST))
        {
            byte[] bodyRaw = System.Text.Encoding.UTF8.GetBytes(jsonData);
            request.uploadHandler = new UploadHandlerRaw(bodyRaw);

            var audioDownload = new TTSDownload();
            request.downloadHandler = audioDownload;
            audioDownload.OnComplete += OnComplete;
            audioDownload.OnObjectReceived += OnObjectReceive;

            request.SetRequestHeader("Content-Type", "application/json");
            request.SetRequestHeader("X-Api-App-Id", XApiAppId);
            request.SetRequestHeader("X-Api-Access-Key", XApiAccessKey);
            request.SetRequestHeader("X-Api-Resource-Id", XApiResourceId);

            yield return request.SendWebRequest();

            if (request.result != UnityWebRequest.Result.Success)
                Debug.LogError("请求失败:" + request.error);

            audioDownload.OnComplete -= OnComplete;
            audioDownload.OnObjectReceived -= OnObjectReceive;
        }
    }

    void OnDestroy()
    {
        if (memoryStream != null)
        {
            memoryStream.Dispose();
            memoryStream = null;
        }
    }
}

测试

设置转换文本,启用程序后发送请求,注册语音合成回调;

按下空格键,播放转换后的音频。

合成速度很快。

csharp 复制代码
using UnityEngine;
public class TestAudio : MonoBehaviour
{
    [SerializeField] DoubaoAudioAPI doubaoAudioAPI;
    [SerializeField] AudioSource audioSource;

    [Header("转换文本")]
    [SerializeField] string text = "你好,豆包";

    void Awake()
    {
        doubaoAudioAPI.OnGetAudio += OnGetAudio;
        doubaoAudioAPI.RequestTextToAudio(text);
    }

    void Update()
    {
        if (audioSource.clip && Input.GetKeyDown(KeyCode.Space))
        {
            audioSource.Play();
        }
    }

    void OnDestroy()
    {
        doubaoAudioAPI.OnGetAudio -= OnGetAudio;
    }

    void OnGetAudio(AudioClip audioClip)
    {
        audioSource.clip = audioClip;
        audioSource.Play();
    }
}
相关推荐
呆呆敲代码的小Y8 分钟前
【Unity工具篇】| 使用YooAsset接入自己的游戏项目,实现完整热更新流程
游戏·unity·游戏引擎·热更新·yooasset·资源热更新
张老师带你学12 分钟前
Unity 低多边形 赛博朋克城市 拼装 模型 道路 建筑 buildin
科技·游戏·unity·游戏引擎·模型
PassionY12 分钟前
Unity NGO 系列教程(四):多人抓取的权限争夺
unity·xr·network·ngo·multiplayer·ownership·多人竞态权
ฅ^•ﻌ•^ฅ114 分钟前
Unity mcp并使用claude code制作游戏
游戏·unity·游戏引擎
程序员正茂15 分钟前
Unity3d使用SRDebugger屏幕输出调试信息
unity·srdebugger
张老师带你学16 分钟前
unity资源 buildin 低多边形 小镇村
科技·游戏·unity·游戏引擎·模型
PassionY21 分钟前
Unity NGO 系列教程(五):如何构建多人联机区域触发系统
unity·rpc·ngo·网络触发器·serverrpc·networkvariable·authority
RReality1 小时前
【Unity UGUI】InputField 输入框全解
unity·游戏引擎
南無忘码至尊1 小时前
Unity学习90天-第3天-认识触屏输入(手游基础)并完成手机点击屏幕,物体向点击位置移动
学习·unity·c#·游戏引擎·游戏开发
南無忘码至尊1 小时前
Unity学习90天-第3天-认识C# 集合与常用类并实现生成随机位置的 10 个立方体
学习·unity·c#