执行流程
- 输入音频文件,调用语音模型API,输出语音识别文字;
- 输入识别文字,调用语言模型API,输出回复文字(流式);
- 输入回复文字,调用语音模型API,输出音频数据(流式);
- 输入音频数据,通过流式AudioClip播放音频。
音频提问,流式语音回复
整合上述功能,实现音频提问,流式语音回复。
csharp
using System;
using System.Collections.Generic;
using System.Text;
using UnityEngine;
/// <summary>
/// 语音提问
/// </summary>
public class VoiceQuestion : MonoBehaviour
{
[Header("语音转文字")]
[SerializeField] SpeechRecognition speechRecognition;
[Header("文字转语音")]
[SerializeField] TextToSpeechOut textToSpeech;
[Header("文字提问")]
[SerializeField] TextQuestions textQuestions;
List<Message> messages;
StringBuilder thinkContentBuffer;
Message assistantMessage;
[Header("进行语音合成最少字符")]
[SerializeField] int minTTSSendLength = 4;
StringBuilder ttsSendBuffer;
[Header("使用附加提示词")]
[SerializeField] bool addtionPrompt;
[Header("附加提示词")]
[SerializeField] string prompt;
public event Action OnTextAnswerCompleted;
public event Action OnError;
void Awake()
{
messages = new List<Message>();
thinkContentBuffer = new StringBuilder();
ttsSendBuffer = new StringBuilder();
textQuestions.onReceive += OnReceiveAnswer;
textQuestions.onCompleted += OnReceiveAnswerCompleted;
textQuestions.OnError += OnReceiveAnswerError;
}
//1.输入提问音频
public void InputQuestionAudio(AudioClip audioClip, bool newDialogue = true)
{
Debug.Log("语音转文字 开始");
if (newDialogue)
{
messages.Clear();
Debug.Log("清除历史对话");
}
speechRecognition.Request(audioClip, OnGetAudioToText);
}
//2.问题音频转文字后
void OnGetAudioToText(bool sucess, string content)
{
Debug.Log("语音转文字 结束");
if (sucess)
{
Debug.Log("文字提问 开始");
if (addtionPrompt)
content += prompt;
thinkContentBuffer.Clear();
messages.Add(new Message()
{
role = "user",
content = content,
});
assistantMessage = new Message()
{
role = "assistant",
};
textQuestions.Request(messages);
}
else
{
Debug.LogError("语音转文字 失败");
OnError?.Invoke();
}
}
//3.接收回复文本
void OnReceiveAnswer(string part, bool stop)
{
Debug.Log("接收回复文本");
thinkContentBuffer.Append(part);
if (part.Contains("*"))
part = part.Replace("*", "");
if (part.Contains("#"))
part = part.Replace("#", "");
ttsSendBuffer.Append(part);
if (ttsSendBuffer.Length >= minTTSSendLength || stop)
{
var content = ttsSendBuffer.ToString();
ttsSendBuffer.Clear();
textToSpeech.ReadText(content);
}
}
//4.接收回复文本结束
void OnReceiveAnswerCompleted()
{
Debug.Log("文本提问 结束");
assistantMessage.content = thinkContentBuffer.ToString();
thinkContentBuffer.Clear();
messages.Add(assistantMessage);
textToSpeech.EndReadText();
OnTextAnswerCompleted?.Invoke();
}
void OnReceiveAnswerError()
{
OnError?.Invoke();
}
void OnDestroy()
{
if (textQuestions)
{
textQuestions.OnError -= OnReceiveAnswerError;
textQuestions.onReceive -= OnReceiveAnswer;
textQuestions.onCompleted -= OnReceiveAnswerCompleted;
}
messages?.Clear();
thinkContentBuffer?.Clear();
}
}