环境
使用日期:2026.3.23
OS:Windows10
引擎:Unity2021.3.45
文档
语音合成模型V3
https://www.volcengine.com/docs/6561/1598757?lang=zh#_2-http-chunked格式接口说明
必须参数
- baseUrl:
https://openspeech.bytedance.com/api/v3/tts/unidirectional - XApiAppId
语音模型/Doubao-语音合成-立即使用/ APP ID - XApiAccessKey
语音模型/Doubao-语音合成-立即使用/ Access Token - XApiResourceId
设置语音模型。例如:seed-tts-2.0 - speaker
发言者。例如:zh_female_vv_uranus_bigtts
示例
发送请求,将文本合成为语音(wav);
接收响应,将响应信息解析后合成为AudioClip。
wav格式的字节数组抓换为AudioClilp
csharp
public static class WavUtility
{
public static AudioClip ToAudioClip(byte[] wavBytes, string clipName)
{
//解析wav头
int channels = wavBytes[22];//声道数
int sampleRate = BitConverter.ToInt32(wavBytes, 24);//采样率
//data块起始位置
int dataStartIndex = 44;//标准PCM WAV头
int dataLength = wavBytes.Length - dataStartIndex;//数据长度
//将16-bit pcm转换为 float[-1,1]
float[] samples = new float[dataLength / 2];
for (int i = 0; i < samples.Length; i++)
{
short sample = BitConverter.ToInt16(wavBytes, dataStartIndex + i * 2);
samples[i] = sample / 32768f;//归一化 [-1,1]
}
AudioClip clip = AudioClip.Create(clipName,
samples.Length / channels, channels, sampleRate, false);
clip.SetData(samples, 0);
return clip;
}
}
数据类
csharp
[Serializable]
public class RequestAudio
{
public AudioUser user;
public RequestAudioParams req_params;
}
[Serializable]
public class AudioUser
{
public string uid;
}
[Serializable]
public class RequestAudioParams
{
public string text;
public string speaker;
public AudioParams audio_params;
}
[Serializable]
public class AudioParams
{
public string format;
public int sample_rate;
}
[Serializable]
public class ResponeAudio
{
public int code;
public string message;
public string data;
}
自定义下载脚本
解析json脚本,返回一个完整的json对象。
csharp
public class TTSDownload : DownloadHandlerScript
{
private StringBuilder buffer = new StringBuilder();
public event Action<string> OnObjectReceived;
public event Action OnComplete;
public TTSDownload() : base(new byte[4096]) { }
protected override bool ReceiveData(byte[] data, int dataLength)
{
string chunk = Encoding.UTF8.GetString(data, 0, dataLength);
buffer.Append(chunk);
ProcessBuffer();
return true;
}
private void ProcessBuffer()
{
string content = buffer.ToString();
int splitStartIndex = -1;
int braceCount = 0;
int lastProcessed = 0;
for (int i = 0; i < content.Length; i++)
{
if (content[i] == '{')
{
if (braceCount == 0) splitStartIndex = i;
braceCount++;
}
else if (content[i] == '}')
{
braceCount--;
if (braceCount == 0 && splitStartIndex != -1)
{
string obj = content.Substring(splitStartIndex, i - splitStartIndex + 1);
OnObjectReceived?.Invoke(obj);
splitStartIndex = -1;
lastProcessed = i + 1;
}
}
}
if (lastProcessed > 0)
buffer.Remove(0, lastProcessed);
}
protected override void CompleteContent()
{
OnComplete?.Invoke();
}
}
请求文本合成为音频
csharp
using System;
using System.Collections;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;
public class DoubaoAudioAPI : MonoBehaviour
{
[Header("接口")]
[SerializeField] string baseUrl;
[SerializeField] string XApiAppId;
[SerializeField] string XApiAccessKey;
[SerializeField] string XApiResourceId;
[Header("发音对象")]
[SerializeField] string speaker;
[Header("存放音频")]
[SerializeField] bool save;
MemoryStream memoryStream;
public event Action<AudioClip> OnGetAudio;
public void RequestTextToAudio(string text)
{
RequestAudio requestAudio = new RequestAudio();
AudioUser user = new AudioUser();
user.uid = Guid.NewGuid().ToString();
requestAudio.user = user;
RequestAudioParams requestAudioParams = new RequestAudioParams();
requestAudioParams.speaker = speaker;
requestAudioParams.text = text;
AudioParams audioParams = new AudioParams();
audioParams.format = "wav";
audioParams.sample_rate = 44100;
requestAudioParams.audio_params = audioParams;
requestAudio.req_params = requestAudioParams;
var message = JsonUtility.ToJson(requestAudio);
if (memoryStream != null)
{
memoryStream.Dispose();
memoryStream = null;
}
memoryStream = new MemoryStream();
StartCoroutine(PostTextToAudio(message, OnObjectReceive, OnCompleted));
}
void OnObjectReceive(string content)
{
try
{
var result = JsonUtility.FromJson<ResponeAudio>(content);
if (result != null && result.code == 0 && !string.IsNullOrEmpty(result.data))
{
byte[] chunk = Convert.FromBase64String(result.data);
memoryStream.Write(chunk, 0, chunk.Length);
}
}
catch (Exception e)
{
Debug.LogError(e.Message);
}
}
void OnCompleted()
{
byte[] audioData = null;
if (memoryStream != null)
{
audioData = memoryStream.ToArray();
memoryStream.Dispose();
memoryStream = null;
}
if (audioData != null && audioData.Length > 0)
{
OnGetAudio?.Invoke(WavUtility.ToAudioClip(audioData, "tts"));
if (save)
File.WriteAllBytes(Guid.NewGuid() + ".wav", audioData);
}
}
IEnumerator PostTextToAudio(string jsonData, Action<string> OnObjectReceive, Action OnComplete)
{
using (UnityWebRequest request = new UnityWebRequest(baseUrl, UnityWebRequest.kHttpVerbPOST))
{
byte[] bodyRaw = System.Text.Encoding.UTF8.GetBytes(jsonData);
request.uploadHandler = new UploadHandlerRaw(bodyRaw);
var audioDownload = new TTSDownload();
request.downloadHandler = audioDownload;
audioDownload.OnComplete += OnComplete;
audioDownload.OnObjectReceived += OnObjectReceive;
request.SetRequestHeader("Content-Type", "application/json");
request.SetRequestHeader("X-Api-App-Id", XApiAppId);
request.SetRequestHeader("X-Api-Access-Key", XApiAccessKey);
request.SetRequestHeader("X-Api-Resource-Id", XApiResourceId);
yield return request.SendWebRequest();
if (request.result != UnityWebRequest.Result.Success)
Debug.LogError("请求失败:" + request.error);
audioDownload.OnComplete -= OnComplete;
audioDownload.OnObjectReceived -= OnObjectReceive;
}
}
void OnDestroy()
{
if (memoryStream != null)
{
memoryStream.Dispose();
memoryStream = null;
}
}
}
测试
设置转换文本,启用程序后发送请求,注册语音合成回调;
按下空格键,播放转换后的音频。
合成速度很快。
csharp
using UnityEngine;
public class TestAudio : MonoBehaviour
{
[SerializeField] DoubaoAudioAPI doubaoAudioAPI;
[SerializeField] AudioSource audioSource;
[Header("转换文本")]
[SerializeField] string text = "你好,豆包";
void Awake()
{
doubaoAudioAPI.OnGetAudio += OnGetAudio;
doubaoAudioAPI.RequestTextToAudio(text);
}
void Update()
{
if (audioSource.clip && Input.GetKeyDown(KeyCode.Space))
{
audioSource.Play();
}
}
void OnDestroy()
{
doubaoAudioAPI.OnGetAudio -= OnGetAudio;
}
void OnGetAudio(AudioClip audioClip)
{
audioSource.clip = audioClip;
audioSource.Play();
}
}