准备
安装NativeWebSocket插件
流式语音合成流程
1.连接TTS
2.建立会话
3.发送文本
4.关闭会话
5.TTS结束连接
一步骤完成后,重复234步骤,进行语音合成。
注意:
每次建立会话需要创建一个id
一次会话,可多次发送文本。
请求文字转语音
参数设置XApiAppId,XApiAccessKey,XApiResourceId
csharp
using UnityEngine;
using NativeWebSocket;
using System;
using System.Collections.Generic;
/// <summary>
/// 文字转语音
/// </summary>
public class TextToSpeech : MonoBehaviour
{
string wss = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
string XApiAppId = "";
string XApiAccessKey = "";
string XApiResourceId = "seed-tts-2.0";
[Header("发音人")]
[SerializeField] string speaker = "zh_female_vv_uranus_bigtts";
[Header("采样率")]
[SerializeField] int sampleRate = 24000;
WebSocket webSocket;
string currentSessionId = null;
public event Action OnConnectWeb;
public event Action OnConnectTTSSucess;
public event Action OnCreateSessionSucess;
public event Action OnSessionFinished;
public event Action OnSentenceStart;
public event Action OnSentenceEnd;
public event Action<byte[]> OnReceiveAuido;
public async void WebSocketConnect()
{
//请求参数
var id = Guid.NewGuid().ToString();
//请求头
var headers = new Dictionary<string, string>();
headers.Add("X-Api-App-Key", XApiAppId);
headers.Add("X-Api-Access-Key", XApiAccessKey);
headers.Add("X-Api-Resource-Id", XApiResourceId);
headers.Add("X-Api-Connect-Id", id);
//回调注册
webSocket = new WebSocket(wss, headers);
webSocket.OnError += OnWebSocketError;
webSocket.OnClose += OnWebSocketClose;
webSocket.OnOpen += OnWebSocketOpen;
webSocket.OnMessage += OnReceiveWebSocketMessage;
await webSocket.Connect();
}
public async void WebSocketDisconnect()
{
if (webSocket != null)
{
webSocket.OnError -= OnWebSocketError;
webSocket.OnClose -= OnWebSocketClose;
webSocket.OnOpen -= OnWebSocketOpen;
webSocket.OnMessage -= OnReceiveWebSocketMessage;
try
{
await webSocket.Close();
}
catch (System.Exception e)
{
Debug.LogError(e.Message);
}
}
webSocket = null;
}
public void BeginConnectTTS()
{
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.StartConnection,
Payload = System.Text.Encoding.UTF8.GetBytes("{}"),
};
Send(msg.Marshal());
}
public void BeginSession()
{
currentSessionId = Guid.NewGuid().ToString();
var payload = new SessionPayload
{
user = new UserInfo { uid = Guid.NewGuid().ToString() },
@event = (int)Speech.Protocols.EventType.StartSession,
req_params = new ReqParams
{
speaker = speaker,
text = "",
audio_params = new AudioParams
{
format = "pcm",
sample_rate = sampleRate
},
}
};
string json = JsonUtility.ToJson(payload);
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes(json);
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.StartSession,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public void Session(string info)
{
var taskRequest = new SessionPayload();
taskRequest.req_params = new ReqParams();
taskRequest.req_params.audio_params = new AudioParams
{
format = "pcm",
sample_rate = 24000
};
taskRequest.req_params.addtions = new Addtions()
{
disable_markdown_filter = true,
};
taskRequest.req_params.text = info;
taskRequest.user = new UserInfo() { uid = Guid.NewGuid().ToString() };
taskRequest.@event = (int)Speech.Protocols.EventType.TaskRequest;
string json = JsonUtility.ToJson(taskRequest);
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes(json);
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.TaskRequest,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public void EndSession()
{
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes("{}");
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.FinishSession,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public void EndConnectTTS()
{
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes("{}");
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.FinishConnection,
Payload = payloadBytes
};
Send(msg.Marshal());
}
void OnWebSocketError(string info)
{
Debug.LogError("错误信息:" + info);
}
void OnWebSocketClose(WebSocketCloseCode closeCode)
{
Debug.Log("关闭:" + closeCode);
}
void OnWebSocketOpen()
{
OnConnectWeb?.Invoke();
}
void OnReceiveWebSocketMessage(byte[] data)
{
try
{
var msg = Speech.Protocols.Message.FromBytes(data);
//接受音频数据
if (msg.MsgType == Speech.Protocols.MsgType.AudioOnlyServer)
{
if (msg.Payload != null && msg.Payload.Length > 0)
{
OnReceiveAuido?.Invoke(msg.Payload);
}
}
switch (msg.EventType)
{
case Speech.Protocols.EventType.ConnectionStarted:
//Debug.Log("连接成功,可以发送 StartSession");
OnConnectTTSSucess?.Invoke();
break;
case Speech.Protocols.EventType.SessionStarted:
//Debug.Log("会话已启动,可以发送 TaskRequest");
OnCreateSessionSucess?.Invoke();
break;
case Speech.Protocols.EventType.SessionFinished:
//Debug.Log("会话结束");
OnSessionFinished?.Invoke();
break;
case Speech.Protocols.EventType.TTSSentenceStart:
OnSentenceStart?.Invoke();//句子开始
break;
case Speech.Protocols.EventType.TTSSentenceEnd:
OnSentenceEnd?.Invoke();//句子结束
break;
case Speech.Protocols.EventType.ConnectionFailed:
case Speech.Protocols.EventType.SessionFailed:
string reason = msg.Payload.Length > 0
? System.Text.Encoding.UTF8.GetString(msg.Payload)
: "未知错误";
Debug.LogError($"{msg.EventType}: {reason}");
break;
}
}
catch (Exception e)
{
Debug.LogError("接受消息:" + e.Message);
}
}
async void Send(byte[] bytes)
{
if (webSocket.State == WebSocketState.Open)
await webSocket.Send(bytes);
}
[System.Serializable]
public class SessionPayload
{
public UserInfo user;
public int @event;
public ReqParams req_params;
}
[System.Serializable]
public class UserInfo
{
public string uid;
}
[System.Serializable]
public class ReqParams
{
public string speaker;
public string text;
public AudioParams audio_params;
public Addtions addtions;
}
[System.Serializable]
public class AudioParams
{
public string format;
public int sample_rate;
}
[System.Serializable]
public class Addtions
{
public bool disable_markdown_filter = true;
}
}
协议类
来源官方C#版本的Demo,移除不使用的部分
csharp
using System;
using System.Buffers.Binary;
using System.IO;
using System.Text;
namespace Speech.Protocols
{
/// <summary>
/// Defines the event type which determines the event of the message.
/// </summary>
public enum EventType : int
{
// Default event, applicable for scenarios not using events or not requiring event transmission,
// or for scenarios using events, non-zero values can be used to validate event legitimacy
None = 0,
// 1 ~ 49 for upstream Connection events
StartConnection = 1,
StartTask = 1, // Alias of "StartConnection"
FinishConnection = 2,
FinishTask = 2, // Alias of "FinishConnection"
// 50 ~ 99 for downstream Connection events
// Connection established successfully
ConnectionStarted = 50,
TaskStarted = 50, // Alias of "ConnectionStarted"
// Connection failed (possibly due to authentication failure)
ConnectionFailed = 51,
TaskFailed = 51, // Alias of "ConnectionFailed"
// Connection ended
ConnectionFinished = 52,
TaskFinished = 52, // Alias of "ConnectionFinished"
// 100 ~ 149 for upstream Session events
StartSession = 100,
CancelSession = 101,
FinishSession = 102,
// 150 ~ 199 for downstream Session events
SessionStarted = 150,
SessionCanceled = 151,
SessionFinished = 152,
SessionFailed = 153,
// Usage events
UsageResponse = 154,
ChargeData = 154, // Alias of "UsageResponse"
// 200 ~ 249 for upstream general events
TaskRequest = 200,
UpdateConfig = 201,
// 250 ~ 299 for downstream general events
AudioMuted = 250,
// 300 ~ 349 for upstream TTS events
SayHello = 300,
// 350 ~ 399 for downstream TTS events
TTSSentenceStart = 350,
TTSSentenceEnd = 351,
TTSResponse = 352,
TTSEnded = 359,
PodcastRoundStart = 360,
PodcastRoundResponse = 361,
PodcastRoundEnd = 362,
// 450 ~ 499 for downstream ASR events
ASRInfo = 450,
ASRResponse = 451,
ASREnded = 459,
// 500 ~ 549 for upstream dialogue events
// (Ground-Truth-Alignment) text for speech synthesis
ChatTTSText = 500,
// 550 ~ 599 for downstream dialogue events
ChatResponse = 550,
ChatEnded = 559,
// 650 ~ 699 for downstream dialogue events
// Events for source (original) language subtitle
SourceSubtitleStart = 650,
SourceSubtitleResponse = 651,
SourceSubtitleEnd = 652,
// Events for target (translation) language subtitle
TranslationSubtitleStart = 653,
TranslationSubtitleResponse = 654,
TranslationSubtitleEnd = 655
}
/// <summary>
/// Message type flags which determines how the message will be serialized with the protocol
/// </summary>
[Flags]
public enum MsgTypeFlagBits : byte
{
NoSeq = 0, // Non-terminal packet with no sequence
PositiveSeq = 0b1, // Non-terminal packet with sequence > 0
LastNoSeq = 0b10, // last packet with no sequence
NegativeSeq = 0b11, // last packet with sequence < 0
WithEvent = 0b100 // Payload contains event number (int32)
}
/// <summary>
/// Version bits defines the 4-bit version type
/// </summary>
public enum VersionBits : byte
{
Version1 = 1,
Version2 = 2,
Version3 = 3,
Version4 = 4
}
/// <summary>
/// Header size bits defines the 4-bit header-size type
/// </summary>
public enum HeaderSizeBits : byte
{
HeaderSize4 = 1,
HeaderSize8 = 2,
HeaderSize12 = 3,
HeaderSize16 = 4
}
/// <summary>
/// Serialization bits defines the 4-bit serialization method type
/// </summary>
public enum SerializationBits : byte
{
Raw = 0,
JSON = 0b1,
Thrift = 0b11,
Custom = 0b1111
}
/// <summary>
/// Compression bits defines the 4-bit compression method type
/// </summary>
public enum CompressionBits : byte
{
None = 0,
Gzip = 0b1,
Custom = 0b1111
}
/// <summary>
/// Message type which determines how the message will be serialized with the protocol
/// </summary>
public enum MsgType : byte
{
Invalid = 0,
FullClientRequest = 0b1,
AudioOnlyClient = 0b10,
FullServerResponse = 0b1001,
AudioOnlyServer = 0b1011,
FrontEndResultServer = 0b1100,
Error = 0b1111,
ServerACK = AudioOnlyServer
}
/// <summary>
/// Message structure for protocol communication
/// 0 1 2 3
/// | 0 1 2 3 4 5 6 7 | 0 1 2 3 4 5 6 7 | 0 1 2 3 4 5 6 7 | 0 1 2 3 4 5 6 7 |
/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/// | Version | Header Size | Msg Type | Flags |
/// | (4 bits) | (4 bits) | (4 bits) | (4 bits) |
/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/// | Serialization | Compression | Reserved |
/// | (4 bits) | (4 bits) | (8 bits) |
/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/// | |
/// | Optional Header Extensions |
/// | (if Header Size > 1) |
/// | |
/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/// | |
/// | Payload |
/// | (variable length) |
/// | |
/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/// </summary>
public class Message
{
public VersionBits Version { get; set; }
public HeaderSizeBits HeaderSize { get; set; }
public MsgType MsgType { get; set; }
public MsgTypeFlagBits MsgTypeFlag { get; set; }
public SerializationBits Serialization { get; set; }
public CompressionBits Compression { get; set; }
public EventType EventType { get; set; }
public string SessionId { get; set; }
public string ConnectId { get; set; }
public int Sequence { get; set; }
public uint ErrorCode { get; set; }
public byte[] Payload { get; set; }
/// <summary>
/// Creates a new message with default values
/// </summary>
public Message()
{
Version = VersionBits.Version1;
HeaderSize = HeaderSizeBits.HeaderSize4;
Serialization = SerializationBits.JSON;
Compression = CompressionBits.None;
Payload = Array.Empty<byte>();
}
/// <summary>
/// Creates a new message with specified message type and flag
/// </summary>
public static Message Create(MsgType msgType, MsgTypeFlagBits flag)
{
return new Message
{
MsgType = msgType,
MsgTypeFlag = flag
};
}
/// <summary>
/// Creates a message from byte array
/// </summary>
public static Message FromBytes(byte[] data)
{
if (data == null || data.Length < 4)
{
throw new ArgumentException("Invalid data length", nameof(data));
}
var message = new Message();
using var stream = new MemoryStream(data);
message.Unmarshal(stream);
return message;
}
/// <summary>
/// Converts the message to a byte array
/// </summary>
public byte[] Marshal()
{
using var stream = new MemoryStream();
// Write header bytes
byte header1 = (byte)((byte)Version << 4 | (byte)HeaderSize);
byte header2 = (byte)((byte)MsgType << 4 | (byte)MsgTypeFlag);
byte header3 = (byte)((byte)Serialization << 4 | (byte)Compression);
stream.WriteByte(header1);
stream.WriteByte(header2);
stream.WriteByte(header3);
// Write padding for header size
int headerSize = 4 * (int)HeaderSize;
int paddingSize = headerSize - 3;
for (int i = 0; i < paddingSize; i++)
{
stream.WriteByte(0);
}
// Write fields in Go writers() order
if ((MsgTypeFlag & MsgTypeFlagBits.WithEvent) != 0)
{
// Write event type
var eventBytes = new byte[4];
BinaryPrimitives.WriteInt32BigEndian(eventBytes, (int)EventType);
stream.Write(eventBytes, 0, 4);
// Write session ID
WriteSessionId(stream);
}
// Write sequence if needed
switch (MsgType)
{
case MsgType.FullClientRequest:
case MsgType.FullServerResponse:
case MsgType.FrontEndResultServer:
case MsgType.AudioOnlyClient:
case MsgType.AudioOnlyServer:
if (MsgTypeFlag == MsgTypeFlagBits.PositiveSeq || MsgTypeFlag == MsgTypeFlagBits.NegativeSeq)
{
var seqBytes = new byte[4];
BinaryPrimitives.WriteInt32BigEndian(seqBytes, Sequence);
stream.Write(seqBytes, 0, 4);
}
break;
case MsgType.Error:
var errorBytes = new byte[4];
BinaryPrimitives.WriteUInt32BigEndian(errorBytes, ErrorCode);
stream.Write(errorBytes, 0, 4);
break;
}
// Write payload with length prefix
WritePayload(stream);
return stream.ToArray();
}
private void WriteSessionId(MemoryStream stream)
{
// Skip session ID for connection events
switch (EventType)
{
case EventType.StartConnection:
case EventType.FinishConnection:
case EventType.ConnectionStarted:
case EventType.ConnectionFailed:
return;
}
var sessionBytes = string.IsNullOrEmpty(SessionId) ? Array.Empty<byte>() : Encoding.UTF8.GetBytes(SessionId!);
var lenBytes = new byte[4];
BinaryPrimitives.WriteUInt32BigEndian(lenBytes, (uint)sessionBytes.Length);
stream.Write(lenBytes, 0, 4);
if (sessionBytes.Length > 0)
{
stream.Write(sessionBytes, 0, sessionBytes.Length);
}
}
private void WritePayload(MemoryStream stream)
{
var payloadBytes = Payload ?? Array.Empty<byte>();
var lenBytes = new byte[4];
BinaryPrimitives.WriteUInt32BigEndian(lenBytes, (uint)payloadBytes.Length);
stream.Write(lenBytes, 0, 4);
if (payloadBytes.Length > 0)
{
stream.Write(payloadBytes, 0, payloadBytes.Length);
}
}
/// <summary>
/// Unmarshals a byte array into the message
/// </summary>
private void Unmarshal(MemoryStream stream)
{
// Read header bytes
int header1 = stream.ReadByte();
Version = (VersionBits)(header1 >> 4);
HeaderSize = (HeaderSizeBits)(header1 & 0x0F);
int header2 = stream.ReadByte();
MsgType = (MsgType)(header2 >> 4);
MsgTypeFlag = (MsgTypeFlagBits)(header2 & 0x0F);
int header3 = stream.ReadByte();
Serialization = (SerializationBits)(header3 >> 4);
Compression = (CompressionBits)(header3 & 0x0F);
// Skip padding bytes
int headerSize = 4 * (int)HeaderSize;
int paddingSize = headerSize - 3;
for (int i = 0; i < paddingSize; i++)
{
stream.ReadByte();
}
// Read fields in Go readers() order
// First, read sequence or error code based on message type
switch (MsgType)
{
case MsgType.FullClientRequest:
case MsgType.FullServerResponse:
case MsgType.FrontEndResultServer:
case MsgType.AudioOnlyClient:
case MsgType.AudioOnlyServer:
if (MsgTypeFlag == MsgTypeFlagBits.PositiveSeq || MsgTypeFlag == MsgTypeFlagBits.NegativeSeq)
{
var seqBytes = new byte[4];
stream.Read(seqBytes, 0, 4);
Sequence = BinaryPrimitives.ReadInt32BigEndian(seqBytes);
}
break;
case MsgType.Error:
var errorBytes = new byte[4];
stream.Read(errorBytes, 0, 4);
ErrorCode = BinaryPrimitives.ReadUInt32BigEndian(errorBytes);
break;
default:
throw new InvalidDataException($"Unsupported message type: {MsgType}");
}
// Then, if WithEvent flag is set, read event, session ID, and connect ID
if ((MsgTypeFlag & MsgTypeFlagBits.WithEvent) != 0)
{
var eventBytes = new byte[4];
stream.Read(eventBytes, 0, 4);
EventType = (EventType)BinaryPrimitives.ReadInt32BigEndian(eventBytes);
ReadSessionId(stream);
ReadConnectId(stream);
}
// Read payload with length prefix
ReadPayload(stream);
// Verify no unexpected data remains
if (stream.Position < stream.Length)
{
throw new InvalidDataException($"Unexpected data after message: {stream.Length - stream.Position} bytes remaining");
}
}
private void ReadSessionId(MemoryStream stream)
{
// Skip session ID for connection events
switch (EventType)
{
case EventType.StartConnection:
case EventType.FinishConnection:
case EventType.ConnectionStarted:
case EventType.ConnectionFailed:
case EventType.ConnectionFinished:
return;
}
var lenBytes = new byte[4];
stream.Read(lenBytes, 0, 4);
uint sessionIdLength = BinaryPrimitives.ReadUInt32BigEndian(lenBytes);
if (sessionIdLength > 0)
{
var sessionBytes = new byte[sessionIdLength];
stream.Read(sessionBytes, 0, (int)sessionIdLength);
SessionId = Encoding.UTF8.GetString(sessionBytes);
}
}
private void ReadConnectId(MemoryStream stream)
{
// Only read connect ID for specific connection events
switch (EventType)
{
case EventType.ConnectionStarted:
case EventType.ConnectionFailed:
case EventType.ConnectionFinished:
break;
default:
return;
}
var lenBytes = new byte[4];
stream.Read(lenBytes, 0, 4);
uint connectIdLength = BinaryPrimitives.ReadUInt32BigEndian(lenBytes);
if (connectIdLength > 0)
{
var connectBytes = new byte[connectIdLength];
stream.Read(connectBytes, 0, (int)connectIdLength);
ConnectId = Encoding.UTF8.GetString(connectBytes);
}
}
private void ReadPayload(MemoryStream stream)
{
var lenBytes = new byte[4];
stream.Read(lenBytes, 0, 4);
uint payloadLength = BinaryPrimitives.ReadUInt32BigEndian(lenBytes);
if (payloadLength > 0)
{
Payload = new byte[payloadLength];
stream.Read(Payload, 0, (int)payloadLength);
}
else
{
Payload = Array.Empty<byte>();
}
}
}
}
TTS音频流式播放
csharp
using System.Collections.Generic;
using UnityEngine;
/// <summary>
/// TTS音频流式播放
/// </summary>
public class TTSAudioStreamPlay : MonoBehaviour
{
[Header("播放器")]
[SerializeField] AudioSource audioSource;
AudioClip streamingClip;//播放的流式音频
bool stream = true;//是否使用流式播放模式。
int LengthSamples => Mathf.FloorToInt(audioClipDuration * frequency);//单个声道的采样点个数
int channels = 1;//声道数:1 为单声道,2 为立体声
[Header("音频剪辑名称")]
[SerializeField] string audioName = "TTS_Stream";//音频剪辑名称
[Header("音频剪辑长度(秒)")]
[SerializeField] float audioClipDuration = 1;
[Header("采样率(HZ)")]
[SerializeField] int frequency = 24000;
List<float> audioClipBuffer = new List<float>();//音频剪辑缓冲区
readonly object bufferLock = new object();//锁
byte? leftoverByte;//剩余字节
public void CreateAudioStream()
{
streamingClip = AudioClip.Create(
audioName,
LengthSamples,
channels,
frequency,
stream,
OnPCMReaderCallback);
audioSource.clip = streamingClip;
audioSource.loop = true;
audioSource.Play();
}
public void ReleaseAudioStream()
{
if (streamingClip)
{
audioSource.Stop();
GameObject.Destroy(streamingClip);
streamingClip = null;
audioSource.clip = null;
}
lock (bufferLock)
audioClipBuffer.Clear();
leftoverByte = null;
}
public void WriteAudioData(byte[] data)
{
// 偶对齐转换
List<byte> aligned = new List<byte>();
if (leftoverByte.HasValue)
{
aligned.Add(leftoverByte.Value);
leftoverByte = null;
}
aligned.AddRange(data);
//有效长度
int validLength = aligned.Count & ~1;
float[] samples = new float[validLength / 2];
for (int i = 0; i < validLength; i += 2)
{
short val = (short)(aligned[i] | (aligned[i + 1] << 8));
samples[i / 2] = val / 32768f;
}
//奇数长度 缓存最后一位
if (aligned.Count % 2 != 0)
leftoverByte = aligned[aligned.Count - 1];
//缓存音频数据
lock (bufferLock)
audioClipBuffer.AddRange(samples);
}
//当Unity需要音频数据时调用的回调。
//参数为一个 float[] 数组,长度由引擎决定(通常等于内部缓冲区大小)。
//你需要用音频样本填充整个数组,值范围为 [-1.0, 1.0]。
//对于流式音频(stream = true),此回调会被周期性调用;
//对于非流式,它可能只在 Create 时被调用一次或多次,直到填满 lengthSamples。
//不能调用主线程API
void OnPCMReaderCallback(float[] data)
{
lock (bufferLock)
{
//获取长度
int len = Mathf.Min(audioClipBuffer.Count, data.Length);
//填充音频数据
for (int i = 0; i < len; i++)
data[i] = audioClipBuffer[i];
//移除已填充音频
audioClipBuffer.RemoveRange(0, len);
//剩余部分填静音
for (int i = len; i < data.Length; i++)
data[i] = 0f;
}
}
}
流式语音合成并播放
结合请求文字转语音和TTS音频流式播放的功能
csharp
using System.Text;
using UnityEngine;
public class TextToSpeechOut : MonoBehaviour
{
[Header("文字合成语音")]
[SerializeField] TextToSpeech textToSpeech;
[Header("TTS音频流播放")]
[SerializeField] TTSAudioStreamPlay tTSAudioStreamPlay;
bool hasSession = false;//会话存在
bool isCreateSession;//创建会话
StringBuilder sentenceBuffer;//缓存文本
void Awake()
{
tTSAudioStreamPlay.CreateAudioStream();
sentenceBuffer = new StringBuilder();
textToSpeech.OnConnectWeb += OnConnectWeb;
textToSpeech.OnConnectTTSSucess += OnConnectTTSSucess;
textToSpeech.OnCreateSessionSucess += OnCreateSession;
textToSpeech.OnSessionFinished += OnSessionFinish;
textToSpeech.OnReceiveAuido += OnReceiveData;
textToSpeech.WebSocketConnect();
}
void OnDestroy()
{
tTSAudioStreamPlay.ReleaseAudioStream();
textToSpeech.OnConnectWeb -= OnConnectWeb;
textToSpeech.OnConnectTTSSucess -= OnConnectTTSSucess;
textToSpeech.OnCreateSessionSucess -= OnCreateSession;
textToSpeech.OnSessionFinished -= OnSessionFinish;
textToSpeech.OnReceiveAuido -= OnReceiveData;
textToSpeech.WebSocketDisconnect();
}
void OnConnectWeb()
{
Debug.Log("连接服务器成功,开始连接TTS");
textToSpeech.BeginConnectTTS();
}
void OnConnectTTSSucess()
{
Debug.Log("连接TTS成功,建立会话");
//避免重复建立会话
if (hasSession == false && isCreateSession == false)
{
isCreateSession = true;
//请求建立会话
textToSpeech.BeginSession();
}
}
void OnCreateSession()
{
Debug.Log("建立会话成功");
hasSession = true;
isCreateSession = false;
//发送缓存内容
if (sentenceBuffer.Length > 0)
{
var content = sentenceBuffer.ToString();
sentenceBuffer.Clear();
textToSpeech.Session(content);
}
}
void OnSessionFinish()
{
Debug.Log("会话结束");
hasSession = false;
isCreateSession = false;
}
void OnReceiveData(byte[] data)
{
tTSAudioStreamPlay.WriteAudioData(data);
}
public void ReadText(string text)
{
if (hasSession)
{
textToSpeech.Session(text);
}
else
{
//缓存文本
sentenceBuffer.Append(text);
//避免重复建立会话
if (isCreateSession == false)
{
isCreateSession = true;
//请求建立会话
textToSpeech.BeginSession();
}
}
}
public void EndReadText()
{
//请求结束会话
textToSpeech.EndSession();
}
}