初始版本
优化版本
优化内容:
- 原始版本使用NativeWebSocket插件,无法获取X-Tt-Logid,使用WebSocketSharp插件解决这个问题
- 提取两个插件的共性,可依据需要替换
示例
通信类
websocket通信基类
csharp
using UnityEngine;
using System;
/// <summary>
/// 文字转语音
/// </summary>
public abstract class BaseTextToSpeech : MonoBehaviour
{
public event Action OnConnectWeb;
public event Action OnConnectTTSSucess;
public event Action OnCreateSessionSucess;
public event Action OnSessionFinished;
public event Action OnSentenceStart;
public event Action OnSentenceEnd;
public event Action<byte[]> OnReceiveAudio;
public abstract void WebSocketConnect();
public abstract void WebSocketDisconnect();
public abstract void BeginConnectTTS();
public abstract void BeginSession();
public abstract void Session(string info);
public abstract void EndSession();
public abstract void EndConnectTTS();
protected void InvokeConnectWeb()
{
OnConnectWeb?.Invoke();
}
protected void InvokeConnectTTSSucess()
{
OnConnectTTSSucess?.Invoke();
}
protected void InvokeCreateSessionSucess()
{
OnCreateSessionSucess?.Invoke();
}
protected void InvokeSessionFinished()
{
OnSessionFinished?.Invoke();
}
protected void InvokeSentenceStart()
{
OnSentenceStart?.Invoke();
}
protected void InvokeSentenceEnd()
{
OnSentenceEnd?.Invoke();
}
protected void InvokeReceiveAudio(byte[] data)
{
OnReceiveAudio?.Invoke(data);
}
}
使用NativeWebSocket插件
csharp
using UnityEngine;
using NativeWebSocket;
using System;
using System.Collections.Generic;
/// <summary>
/// 文字转语音
/// NativeSocket
/// </summary>
public class TextToSpeech : BaseTextToSpeech
{
string wss = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
string XApiAppId = "";
string XApiAccessKey = "";
string XApiResourceId = "seed-tts-2.0";
[Header("发音人")]
[SerializeField] string speaker = "zh_female_vv_uranus_bigtts";
[Header("采样率")]
[SerializeField] int sampleRate = 24000;
WebSocket webSocket;
string currentSessionId = null;
public override async void WebSocketConnect()
{
//请求参数
var id = Guid.NewGuid().ToString();
//请求头
var headers = new Dictionary<string, string>();
headers.Add("X-Api-App-Key", XApiAppId);
headers.Add("X-Api-Access-Key", XApiAccessKey);
headers.Add("X-Api-Resource-Id", XApiResourceId);
headers.Add("X-Api-Connect-Id", id);
//回调注册
webSocket = new WebSocket(wss, headers);
webSocket.OnError += OnWebSocketError;
webSocket.OnClose += OnWebSocketClose;
webSocket.OnOpen += OnWebSocketOpen;
webSocket.OnMessage += OnReceiveWebSocketMessage;
await webSocket.Connect();
}
public override async void WebSocketDisconnect()
{
if (webSocket != null)
{
webSocket.OnError -= OnWebSocketError;
webSocket.OnClose -= OnWebSocketClose;
webSocket.OnOpen -= OnWebSocketOpen;
webSocket.OnMessage -= OnReceiveWebSocketMessage;
try
{
await webSocket.Close();
}
catch (System.Exception e)
{
Debug.LogError(e.Message);
}
}
webSocket = null;
}
public override void BeginConnectTTS()
{
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.StartConnection,
Payload = System.Text.Encoding.UTF8.GetBytes("{}"),
};
Send(msg.Marshal());
}
public override void BeginSession()
{
currentSessionId = Guid.NewGuid().ToString();
var payload = new SessionPayload
{
user = new UserInfo { uid = Guid.NewGuid().ToString() },
@event = (int)Speech.Protocols.EventType.StartSession,
req_params = new ReqParams
{
speaker = speaker,
text = "",
audio_params = new AudioParams
{
format = "pcm",
sample_rate = sampleRate
},
}
};
string json = JsonUtility.ToJson(payload);
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes(json);
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.StartSession,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public override void Session(string info)
{
var taskRequest = new SessionPayload();
taskRequest.req_params = new ReqParams();
taskRequest.req_params.audio_params = new AudioParams
{
format = "pcm",
sample_rate = 24000
};
taskRequest.req_params.addtions = new Addtions()
{
disable_markdown_filter = true,
};
taskRequest.req_params.text = info;
taskRequest.user = new UserInfo() { uid = Guid.NewGuid().ToString() };
taskRequest.@event = (int)Speech.Protocols.EventType.TaskRequest;
string json = JsonUtility.ToJson(taskRequest);
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes(json);
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.TaskRequest,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public override void EndSession()
{
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes("{}");
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.FinishSession,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public override void EndConnectTTS()
{
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes("{}");
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.FinishConnection,
Payload = payloadBytes
};
Send(msg.Marshal());
}
void OnWebSocketError(string info)
{
Debug.LogError("错误信息:" + info);
}
void OnWebSocketClose(WebSocketCloseCode closeCode)
{
Debug.Log("关闭:" + closeCode);
}
void OnWebSocketOpen()
{
InvokeConnectWeb();
}
void OnReceiveWebSocketMessage(byte[] data)
{
try
{
var msg = Speech.Protocols.Message.FromBytes(data);
Debug.Log($"接收语音合成消息:{msg.MsgType},{msg.EventType},{msg.Payload.Length}");
//接受音频数据
if (msg.MsgType == Speech.Protocols.MsgType.AudioOnlyServer)
{
Debug.Log("接收到音频消息");
if (msg.Payload != null && msg.Payload.Length > 0)
{
Debug.Log("音频数据回调");
//OnReceiveAuido?.Invoke(msg.Payload);
InvokeReceiveAudio(msg.Payload);
}
}
if (msg.MsgType == Speech.Protocols.MsgType.Error)
{
string reason = msg.Payload.Length > 0
? System.Text.Encoding.UTF8.GetString(msg.Payload)
: "未知错误";
Debug.LogError($"{msg.EventType}: {reason}");
}
switch (msg.EventType)
{
case Speech.Protocols.EventType.ConnectionStarted:
//Debug.Log("连接成功,可以发送 StartSession");
InvokeConnectTTSSucess();
break;
case Speech.Protocols.EventType.SessionStarted:
//Debug.Log("会话已启动,可以发送 TaskRequest");
InvokeCreateSessionSucess();
break;
case Speech.Protocols.EventType.SessionFinished:
//Debug.Log("会话结束");
InvokeSessionFinished();
break;
case Speech.Protocols.EventType.TTSSentenceStart:
InvokeSentenceStart();//句子开始
break;
case Speech.Protocols.EventType.TTSSentenceEnd:
InvokeSentenceEnd();//句子结束
break;
case Speech.Protocols.EventType.ConnectionFailed:
case Speech.Protocols.EventType.SessionFailed:
string reason = msg.Payload.Length > 0
? System.Text.Encoding.UTF8.GetString(msg.Payload)
: "未知错误";
Debug.LogError($"{msg.EventType}: {reason}");
break;
}
}
catch (Exception e)
{
Debug.LogError("接受消息:" + e.Message);
}
}
async void Send(byte[] bytes)
{
if (webSocket.State == WebSocketState.Open)
await webSocket.Send(bytes);
}
[System.Serializable]
public class SessionPayload
{
public UserInfo user;
public int @event;
public ReqParams req_params;
}
[System.Serializable]
public class UserInfo
{
public string uid;
}
[System.Serializable]
public class ReqParams
{
public string speaker;
public string text; // 可选,可以先不填或空字符串
public AudioParams audio_params;
public Addtions addtions;
}
[System.Serializable]
public class AudioParams
{
public string format;
public int sample_rate;
}
[System.Serializable]
public class Addtions
{
public bool disable_markdown_filter = true;
}
}
使用WebSocketSharp插件
csharp
using UnityEngine;
using System;
using WebSocketSharp;
/// <summary>
/// 文字转语音
/// websocket-sharp
/// </summary>
public class TextToSpeechSharp : BaseTextToSpeech
{
string wss = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
string XApiAppId = "";
string XApiAccessKey = "";
string XApiResourceId = "seed-tts-2.0";
[Header("发音人")]
[SerializeField] string speaker = "zh_female_vv_uranus_bigtts";
[Header("采样率")]
[SerializeField] int sampleRate = 24000;
WebSocket webSocket;
string currentSessionId = null;
public override void WebSocketConnect()
{
//请求参数
var id = Guid.NewGuid().ToString();
//回调注册
webSocket = new WebSocket(wss);
//请求头
webSocket.SetUserHeader("X-Api-App-Key", XApiAppId);
webSocket.SetUserHeader("X-Api-Access-Key", XApiAccessKey);
webSocket.SetUserHeader("X-Api-Resource-Id", XApiResourceId);
webSocket.SetUserHeader("X-Api-Connect-Id", id);
webSocket.OnError += OnWebSocketError;
webSocket.OnClose += OnWebSocketClose;
webSocket.OnOpen += OnWebSocketOpen;
webSocket.OnMessage += OnReceiveWebSocketMessage;
webSocket.Connect();
}
public override void WebSocketDisconnect()
{
if (webSocket != null)
{
webSocket.OnError -= OnWebSocketError;
webSocket.OnClose -= OnWebSocketClose;
webSocket.OnOpen -= OnWebSocketOpen;
webSocket.OnMessage -= OnReceiveWebSocketMessage;
try
{
webSocket.Close();
}
catch (System.Exception e)
{
Debug.LogError(e.Message);
}
}
webSocket = null;
}
public override void BeginConnectTTS()
{
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.StartConnection,
Payload = System.Text.Encoding.UTF8.GetBytes("{}"),
};
Send(msg.Marshal());
}
public override void BeginSession()
{
currentSessionId = Guid.NewGuid().ToString();
var payload = new SessionPayload
{
user = new UserInfo { uid = Guid.NewGuid().ToString() },
@event = (int)Speech.Protocols.EventType.StartSession,
req_params = new ReqParams
{
speaker = speaker,
text = "",
audio_params = new AudioParams
{
format = "pcm",
sample_rate = sampleRate
},
}
};
string json = JsonUtility.ToJson(payload);
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes(json);
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.StartSession,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public override void Session(string info)
{
var taskRequest = new SessionPayload();
taskRequest.req_params = new ReqParams();
taskRequest.req_params.audio_params = new AudioParams
{
format = "pcm",
sample_rate = 24000
};
taskRequest.req_params.addtions = new Addtions()
{
disable_markdown_filter = true,
};
taskRequest.req_params.text = info;
taskRequest.user = new UserInfo() { uid = Guid.NewGuid().ToString() };
taskRequest.@event = (int)Speech.Protocols.EventType.TaskRequest;
string json = JsonUtility.ToJson(taskRequest);
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes(json);
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.TaskRequest,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public override void EndSession()
{
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes("{}");
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.FinishSession,
SessionId = currentSessionId,
Payload = payloadBytes
};
Send(msg.Marshal());
}
public override void EndConnectTTS()
{
byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes("{}");
var msg = new Speech.Protocols.Message
{
MsgType = Speech.Protocols.MsgType.FullClientRequest,
MsgTypeFlag = Speech.Protocols.MsgTypeFlagBits.WithEvent,
EventType = Speech.Protocols.EventType.FinishConnection,
Payload = payloadBytes
};
Send(msg.Marshal());
}
void OnWebSocketError(object sender, ErrorEventArgs args)
{
Debug.LogError("错误信息:" + args.Message);
}
void OnWebSocketClose(object sender, CloseEventArgs args)
{
Debug.Log("关闭:" + args.Code);
}
void OnWebSocketOpen(object sender, EventArgs args)
{
var headers = webSocket.HandshakeResponseHeaders;
Debug.Log(("X-Tt-Logid",headers["X-Tt-Logid"]));
InvokeConnectWeb();
}
void OnReceiveWebSocketMessage(object sender, MessageEventArgs args)
{
try
{
var msg = Speech.Protocols.Message.FromBytes(args.RawData);
Debug.Log($"接收语音合成消息:{msg.MsgType},{msg.EventType},{msg.Payload.Length}");
//接受音频数据
if (msg.MsgType == Speech.Protocols.MsgType.AudioOnlyServer)
{
Debug.Log("接收到音频消息");
if (msg.Payload != null && msg.Payload.Length > 0)
{
Debug.Log("音频数据回调");
//OnReceiveAuido?.Invoke(msg.Payload);
InvokeReceiveAudio(msg.Payload);
}
}
if (msg.MsgType == Speech.Protocols.MsgType.Error)
{
string reason = msg.Payload.Length > 0
? System.Text.Encoding.UTF8.GetString(msg.Payload)
: "未知错误";
Debug.LogError($"{msg.EventType}: {reason}");
}
switch (msg.EventType)
{
case Speech.Protocols.EventType.ConnectionStarted:
//Debug.Log("连接成功,可以发送 StartSession");
InvokeConnectTTSSucess();
break;
case Speech.Protocols.EventType.SessionStarted:
//Debug.Log("会话已启动,可以发送 TaskRequest");
InvokeCreateSessionSucess();
break;
case Speech.Protocols.EventType.SessionFinished:
//Debug.Log("会话结束");
InvokeSessionFinished();
break;
case Speech.Protocols.EventType.TTSSentenceStart:
InvokeSentenceStart();//句子开始
break;
case Speech.Protocols.EventType.TTSSentenceEnd:
InvokeSentenceEnd();//句子结束
break;
case Speech.Protocols.EventType.ConnectionFailed:
case Speech.Protocols.EventType.SessionFailed:
string reason = msg.Payload.Length > 0
? System.Text.Encoding.UTF8.GetString(msg.Payload)
: "未知错误";
Debug.LogError($"{msg.EventType}: {reason}");
break;
}
}
catch (Exception e)
{
Debug.LogError("接受消息:" + e.Message);
}
}
void Send(byte[] bytes)
{
if (webSocket.ReadyState == WebSocketState.Open)
webSocket.Send(bytes);
}
[System.Serializable]
public class SessionPayload
{
public UserInfo user;
public int @event;
public ReqParams req_params;
}
[System.Serializable]
public class UserInfo
{
public string uid;
}
[System.Serializable]
public class ReqParams
{
public string speaker;
public string text; // 可选,可以先不填或空字符串
public AudioParams audio_params;
public Addtions addtions;
}
[System.Serializable]
public class AudioParams
{
public string format;
public int sample_rate;
}
[System.Serializable]
public class Addtions
{
public bool disable_markdown_filter = true;
}
}
语音合成流程类
连接TTS服务器成功后,不需要立即创建会话。
csharp
using System.Text;
using UnityEngine;
public class TextToSpeechOut : MonoBehaviour
{
[Header("文字合成语音")]
[SerializeField] BaseTextToSpeech textToSpeech;
[Header("TTS音频流播放")]
[SerializeField] TTSAudioStreamPlay tTSAudioStreamPlay;
bool hasSession = false;//会话存在
bool isCreateSession;//创建会话
StringBuilder sentenceBuffer;//缓存文本
bool receivedAudioData = false;
bool requestEndSession = false;
float endSessionTimer = 0;
[Header("结束会话等待时间")]
[SerializeField] float endSessionRequestTime = 2f;
void Awake()
{
tTSAudioStreamPlay.CreateAudioStream();
sentenceBuffer = new StringBuilder();
textToSpeech.OnConnectWeb += OnConnectWeb;
textToSpeech.OnConnectTTSSucess += OnConnectTTSSucess;
textToSpeech.OnCreateSessionSucess += OnCreateSession;
textToSpeech.OnSessionFinished += OnSessionFinish;
textToSpeech.OnReceiveAudio += OnReceiveData;
textToSpeech.WebSocketConnect();
}
void OnDestroy()
{
tTSAudioStreamPlay.ReleaseAudioStream();
textToSpeech.OnConnectWeb -= OnConnectWeb;
textToSpeech.OnConnectTTSSucess -= OnConnectTTSSucess;
textToSpeech.OnCreateSessionSucess -= OnCreateSession;
textToSpeech.OnSessionFinished -= OnSessionFinish;
textToSpeech.OnReceiveAudio -= OnReceiveData;
textToSpeech.WebSocketDisconnect();
}
void OnConnectWeb()
{
Debug.Log("连接服务器成功,开始连接TTS");
textToSpeech.BeginConnectTTS();
}
void OnConnectTTSSucess()
{
Debug.Log("连接TTS成功");
//避免重复建立会话
//5s内不发送文本,服务器不返回音频数据,超出5s,再次发送也不会返回音频数据
//会话10s内有效,不提前建立会话,需要时建立
// if (hasSession == false && isCreateSession == false)
// {
// isCreateSession = true;
// //请求建立会话
// textToSpeech.BeginSession();
// }
}
void OnCreateSession()
{
Debug.Log("建立会话成功");
hasSession = true;
isCreateSession = false;
// 重置所有会话状态
receivedAudioData = false;
requestEndSession = false;
endSessionTimer = 0f;
//发送缓存内容
if (sentenceBuffer.Length > 0)
{
var content = sentenceBuffer.ToString();
sentenceBuffer.Clear();
Debug.Log("Send TaskRequest: " + content);
textToSpeech.Session(content);
}
}
void OnSessionFinish()
{
Debug.Log("会话结束");
hasSession = false;
isCreateSession = false;
receivedAudioData = false;
requestEndSession = false;
endSessionTimer = 0;
}
void OnReceiveData(byte[] data)
{
receivedAudioData = true;
tTSAudioStreamPlay.WriteAudioData(data);
}
public void ReadText(string text)
{
if (hasSession)
{
Debug.Log("Send TaskRequest: " + text);
textToSpeech.Session(text);
}
else
{
//缓存文本
sentenceBuffer.Append(text);
//避免重复建立会话
if (isCreateSession == false)
{
isCreateSession = true;
//请求建立会话
textToSpeech.BeginSession();
}
}
}
public void EndReadText()
{
if (hasSession)
{
endSessionTimer = 0;
requestEndSession = true;
//请求结束会话
//textToSpeech.EndSession();
}
}
void Update()
{
if (requestEndSession)
{
if (receivedAudioData)
{
requestEndSession = false;
//请求结束会话
textToSpeech.EndSession();
}
else
{
endSessionTimer += Time.deltaTime;
if (endSessionTimer >= endSessionRequestTime)
{
requestEndSession = false;
endSessionTimer = 0;
//请求结束会话
textToSpeech.EndSession();
}
}
}
}
}