这里写目录标题
背景
近期尝试给小车加一个智能对话的功能,满大街的小智,想尝试下其他的
当前遇到的问题
可以websocket连接成功,也可以发起StartConnection事件,但是发送StartSession失败
39.407 -> [状态检查 70]
22:24:39.407 -> WiFi: 已连接
22:24:39.407 -> WebSocket: 断开
22:24:39.407 -> 豆包激活: 否
22:24:39.449 -> Connect ID: esp32_3CDC757254DC
22:24:39.449 -> Dialog ID:
22:24:39.449 -> [DouBao] 连接断开,尝试重连...
22:24:39.449 -> [DouBao] 初始化豆包语音...
22:24:39.449 -> [DouBao] Connect ID: esp32_3CDC757254DC
22:24:39.543 -> [DouBao] 设置头部信息
22:24:39.543 -> [DouBao] 连接豆包服务器: openspeech.bytedance.com:443
22:24:39.543 -> 麦克风已初始化,跳过
22:24:39.543 -> 扬声器已初始化,跳过
22:24:40.053 -> 测试麦克风...
22:24:40.053 -> 测试 1: 读取 512 字节,平均音量: 0
22:24:40.145 -> 测试 2: 读取 512 字节,平均音量: 0
22:24:40.227 -> 测试 3: 读取 512 字节,平均音量: 0
22:24:40.360 -> 测试扬声器...
22:24:40.423 -> 扬声器测试成功
22:24:41.063 -> [DouBao] WebSocket连接成功
22:24:41.189 -> [DouBao] 发送StartConnection...
22:24:41.189 -> [DouBao] StartConnection发送: 成功
22:24:41.371 -> [DouBao] 发送StartSession...
22:24:41.371 -> [DouBao] StartSession发送: 失败
22:24:41.405 -> [DouBao] StartSession发送失败,可能是消息太长或连接问题
22:24:41.405 -> [DouBao] WebSocket断开连接
代码
最新代码见连接:https://gitee.com/likexiang/like-code/blob/master/ESP32-S3-CAM/DouBaoVoic.ino
c++
// ===========================
// 豆包端到端实时语音大模型API适配 V2.0 - 修复版
// ===========================
#include <WiFi.h>
#include <WebSocketsClient.h>
#include <WiFiClientSecure.h>
#include <ArduinoJson.h>
#include <driver/i2s.h>
// ========== 豆包接口配置 ==========
#define DOUBAO_WSS_HOST "openspeech.bytedance.com"
#define DOUBAO_WSS_PORT 443
#define DOUBAO_APP_ID "9542649884"
#define DOUBAO_API_KEY "xxx"
#define DOUBAO_ACCESS_TOKEN "xx"
#define DOUBAO_RESOURCE_ID "volc.speech.dialog"
// ========== 音频参数配置 ==========
#define DOUBAO_FRAME_MS 20
#define DOUBAO_SAMPLE_RATE 16000
#define DOUBAO_BITS_PER_SAMPLE 16
#define DOUBAO_CHANNELS 1
#define DOUBAO_FRAME_BYTES (DOUBAO_SAMPLE_RATE * DOUBAO_BITS_PER_SAMPLE / 8 * DOUBAO_CHANNELS * DOUBAO_FRAME_MS / 1000)
// ========== 音频设备配置 ==========
#define I2S_MIC_PORT I2S_NUM_1
#define I2S_MIC_BCLK_PIN 4
#define I2S_MIC_LRCLK_PIN 5
#define I2S_MIC_DATA_PIN 2
#define I2S_SPEAKER_PORT I2S_NUM_0
#define I2S_SPEAKER_BCLK_PIN 12
#define I2S_SPEAKER_LRCLK_PIN 15
#define I2S_SPEAKER_DATA_PIN 16
// ========== WiFi配置 ==========
const char *ssid = "ChinaNet-6x8c";
const char *password = "8zeymm8c";
// ========== 全局状态 ==========
WebSocketsClient doubaoWs;
bool isDoubaoActive = false;
bool isDoubaoEnabled = true;
// 会话管理
String doubaoConnectId = "";
String doubaoDialogId = "";
// 任务句柄
TaskHandle_t doubaoCaptureTask = NULL;
// 音频设备状态
bool isMicrophoneInitialized = false;
bool isSpeakerInitialized = false;
// ========== JSON构建函数 ==========
String buildStartConnectionJson() {
DynamicJsonDocument doc(512);
doc["type"] = "start_connection";
doc["device_id"] = WiFi.macAddress();
doc["client_type"] = "esp32";
doc["version"] = "1.0.0";
String json;
serializeJson(doc, json);
return json;
}
String buildStartSessionJson() {
DynamicJsonDocument doc(512);
doc["type"] = "start_session";
JsonObject asr = doc.createNestedObject("asr");
JsonObject extra = asr.createNestedObject("extra");
extra["end_smooth_window_ms"] = 1500;
extra["enable_custom_vad"] = false;
JsonObject dialog = doc.createNestedObject("dialog");
dialog["dialog_id"] = "";
dialog["user_id"] = "";
String json;
serializeJson(doc, json);
return json;
}
// ========== WebSocket事件处理 ==========
void doubaoWsEvent(WStype_t type, uint8_t *payload, size_t length) {
switch (type) {
case WStype_DISCONNECTED:
Serial.println("[DouBao] WebSocket断开连接");
isDoubaoActive = false;
break;
case WStype_CONNECTED:
{
Serial.println("[DouBao] WebSocket连接成功");
// 连接成功后发送初始化消息
delay(100);
// 发送StartConnection
String startConn = buildStartConnectionJson();
Serial.println("[DouBao] 发送StartConnection...");
bool connSent = doubaoWs.sendTXT(startConn);
Serial.printf("[DouBao] StartConnection发送: %s\n", connSent ? "成功" : "失败");
delay(200);
// 发送StartSession
String startSess = buildStartSessionJson();
Serial.println("[DouBao] 发送StartSession...");
bool sessSent = doubaoWs.sendTXT(startSess);
Serial.printf("[DouBao] StartSession发送: %s\n", sessSent ? "成功" : "失败");
if (!sessSent) {
Serial.println("[DouBao] StartSession发送失败,可能是消息太长或连接问题");
}
}
break;
case WStype_TEXT:
{
String message = String((char*)payload).substring(0, min(length, (size_t)500));
Serial.printf("[DouBao] 收到消息: %s\n", message.c_str());
// 解析JSON响应
DynamicJsonDocument doc(1024);
DeserializationError error = deserializeJson(doc, message);
if (error) {
Serial.printf("[DouBao] JSON解析失败: %s\n", error.c_str());
return;
}
// 处理响应
if (doc.containsKey("type")) {
String responseType = doc["type"].as<String>();
Serial.printf("[DouBao] 响应类型: %s\n", responseType.c_str());
if (responseType == "connection_started") {
Serial.println("[DouBao] 连接建立成功");
}
else if (responseType == "session_started") {
if (doc.containsKey("dialog_id")) {
doubaoDialogId = doc["dialog_id"].as<String>();
Serial.printf("[DouBao] 会话启动成功,dialog_id: %s\n", doubaoDialogId.c_str());
isDoubaoActive = true;
// 启动音频采集任务
if (doubaoCaptureTask == NULL) {
xTaskCreatePinnedToCore([](void* param) {
Serial.println("[DouBao] 音频采集任务启动");
int16_t buffer[DOUBAO_FRAME_BYTES / 2];
size_t bytesRead;
uint32_t frameCount = 0;
while (isDoubaoEnabled && isDoubaoActive) {
if (doubaoWs.isConnected()) {
esp_err_t ret = i2s_read(I2S_MIC_PORT, buffer, DOUBAO_FRAME_BYTES, &bytesRead, portMAX_DELAY);
if (ret == ESP_OK && bytesRead == DOUBAO_FRAME_BYTES) {
// 简单的音频处理
for (int i = 0; i < DOUBAO_FRAME_BYTES / 2; i++) {
int32_t amplified = buffer[i] * 4;
buffer[i] = (int16_t)constrain(amplified, -32768, 32767);
}
frameCount++;
if (frameCount % 50 == 0) {
Serial.printf("[DouBao] 已采集 %d 帧音频\n", frameCount);
}
}
}
delay(DOUBAO_FRAME_MS);
}
Serial.println("[DouBao] 音频采集任务结束");
vTaskDelete(NULL);
}, "DouBaoCapture", 8192, NULL, 1, &doubaoCaptureTask, 1);
}
}
}
else if (responseType == "asr_response") {
if (doc.containsKey("results")) {
String text = doc["results"][0]["text"].as<String>();
bool isInterim = doc["results"][0]["is_interim"].as<bool>();
if (!isInterim && text.length() > 0) {
Serial.printf("[DouBao] 识别结果: %s\n", text.c_str());
}
}
}
else if (responseType == "error") {
if (doc.containsKey("error")) {
String errorMsg = doc["error"].as<String>();
Serial.printf("[DouBao] 错误: %s\n", errorMsg.c_str());
}
}
}
}
break;
case WStype_ERROR:
Serial.println("[DouBao] WebSocket错误");
break;
default:
break;
}
}
// ========== 音频设备初始化 ==========
esp_err_t initMicrophone() {
if (isMicrophoneInitialized) {
Serial.println("麦克风已初始化,跳过");
return ESP_OK;
}
Serial.println("初始化麦克风...");
// 先卸载可能存在的驱动
i2s_driver_uninstall(I2S_MIC_PORT);
delay(100);
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = DOUBAO_SAMPLE_RATE,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 4,
.dma_buf_len = 256,
.use_apll = false,
.tx_desc_auto_clear = false,
.fixed_mclk = 0
};
esp_err_t err = i2s_driver_install(I2S_MIC_PORT, &i2s_config, 0, NULL);
if (err != ESP_OK) {
Serial.printf("I2S麦克风驱动安装失败: %d\n", err);
return err;
}
i2s_pin_config_t pin_config = {
.bck_io_num = I2S_MIC_BCLK_PIN,
.ws_io_num = I2S_MIC_LRCLK_PIN,
.data_out_num = I2S_PIN_NO_CHANGE,
.data_in_num = I2S_MIC_DATA_PIN
};
err = i2s_set_pin(I2S_MIC_PORT, &pin_config);
if (err != ESP_OK) {
Serial.printf("I2S麦克风引脚配置失败: %d\n", err);
i2s_driver_uninstall(I2S_MIC_PORT);
return err;
}
isMicrophoneInitialized = true;
Serial.println("麦克风初始化完成");
return ESP_OK;
}
esp_err_t initSpeaker() {
if (isSpeakerInitialized) {
Serial.println("扬声器已初始化,跳过");
return ESP_OK;
}
Serial.println("初始化扬声器...");
// 先卸载可能存在的驱动
i2s_driver_uninstall(I2S_SPEAKER_PORT);
delay(100);
i2s_config_t i2sConfig = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX),
.sample_rate = 24000,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.dma_buf_count = 4,
.dma_buf_len = 256,
.use_apll = false,
.tx_desc_auto_clear = true,
.fixed_mclk = 0
};
esp_err_t err = i2s_driver_install(I2S_SPEAKER_PORT, &i2sConfig, 0, NULL);
if (err != ESP_OK) {
Serial.printf("I2S扬声器驱动安装失败: %d\n", err);
return err;
}
i2s_pin_config_t pin_config = {
.bck_io_num = I2S_SPEAKER_BCLK_PIN,
.ws_io_num = I2S_SPEAKER_LRCLK_PIN,
.data_out_num = I2S_SPEAKER_DATA_PIN,
.data_in_num = I2S_PIN_NO_CHANGE
};
err = i2s_set_pin(I2S_SPEAKER_PORT, &pin_config);
if (err != ESP_OK) {
Serial.printf("I2S扬声器引脚配置失败: %d\n", err);
i2s_driver_uninstall(I2S_SPEAKER_PORT);
return err;
}
isSpeakerInitialized = true;
Serial.println("扬声器初始化完成");
return ESP_OK;
}
// ========== 音频设备测试 ==========
void testMicrophone() {
Serial.println("测试麦克风...");
int16_t buffer[256];
size_t bytesRead;
for (int i = 0; i < 3; i++) {
esp_err_t ret = i2s_read(I2S_MIC_PORT, buffer, sizeof(buffer), &bytesRead, 100);
if (ret == ESP_OK && bytesRead > 0) {
// 计算平均音量
int32_t sum = 0;
for (int j = 0; j < bytesRead / 2; j++) {
sum += abs(buffer[j]);
}
int avgVolume = sum / (bytesRead / 2);
Serial.printf("测试 %d: 读取 %d 字节,平均音量: %d\n", i+1, bytesRead, avgVolume);
} else {
Serial.printf("测试 %d 失败,错误码: %d\n", i+1, ret);
}
delay(100);
}
}
void testSpeaker() {
Serial.println("测试扬声器...");
// 生成1kHz正弦波测试音(100ms)
const int sampleCount = 2400; // 24000Hz * 0.1s
int16_t testTone[sampleCount];
for (int i = 0; i < sampleCount; i++) {
testTone[i] = sin(2 * 3.14159 * 1000 * i / 24000) * 5000;
}
size_t bytesWritten;
esp_err_t err = i2s_write(I2S_SPEAKER_PORT, testTone, sizeof(testTone), &bytesWritten, portMAX_DELAY);
if (err == ESP_OK && bytesWritten == sizeof(testTone)) {
Serial.println("扬声器测试成功");
} else {
Serial.printf("扬声器测试失败,错误码: %d\n", err);
}
}
// ========== 豆包初始化 ==========
void initDoubao() {
Serial.println("[DouBao] 初始化豆包语音...");
// 生成Connect ID
doubaoConnectId = "esp32_" + WiFi.macAddress();
doubaoConnectId.replace(":", "");
Serial.printf("[DouBao] Connect ID: %s\n", doubaoConnectId.c_str());
// 断开现有连接
doubaoWs.disconnect();
delay(100);
// 重新初始化WebSocket
doubaoWs.onEvent(doubaoWsEvent);
doubaoWs.setReconnectInterval(5000);
doubaoWs.enableHeartbeat(15000, 3000, 2);
// 设置头部信息 - 确保格式正确
String headers =
"X-Api-App-ID: " + String(DOUBAO_APP_ID) + "\r\n" +
"X-Api-Access-Key: " + String(DOUBAO_ACCESS_TOKEN) + "\r\n" +
"X-Api-Resource-Id: " + String(DOUBAO_RESOURCE_ID) + "\r\n" +
"X-Api-App-Key: " + String(DOUBAO_API_KEY) + "\r\n" +
"X-Api-Connect-Id: " + doubaoConnectId + "\r\n";
Serial.println("[DouBao] 设置头部信息");
doubaoWs.setExtraHeaders(headers.c_str());
// 开始SSL连接
Serial.printf("[DouBao] 连接豆包服务器: %s:%d\n", DOUBAO_WSS_HOST, DOUBAO_WSS_PORT);
doubaoWs.beginSSL(DOUBAO_WSS_HOST, DOUBAO_WSS_PORT, "/api/v3/realtime/dialogue");
// 初始化音频设备
initMicrophone();
initSpeaker();
// 测试音频设备
delay(500);
testMicrophone();
testSpeaker();
}
// ========== 清理函数 ==========
void cleanupAudio() {
Serial.println("清理音频设备...");
if (doubaoCaptureTask != NULL) {
vTaskDelete(doubaoCaptureTask);
doubaoCaptureTask = NULL;
}
if (isMicrophoneInitialized) {
i2s_driver_uninstall(I2S_MIC_PORT);
isMicrophoneInitialized = false;
Serial.println("麦克风已卸载");
}
if (isSpeakerInitialized) {
i2s_driver_uninstall(I2S_SPEAKER_PORT);
isSpeakerInitialized = false;
Serial.println("扬声器已卸载");
}
}
// ========== 主程序 ==========
void setup() {
Serial.begin(115200);
Serial.println();
Serial.println("=== 豆包语音精简版 V2.0 ===");
delay(1000);
// 初始化WiFi
Serial.println("正在连接WiFi...");
WiFi.begin(ssid, password);
WiFi.setSleep(false);
int attempts = 0;
while (WiFi.status() != WL_CONNECTED && attempts < 30) {
delay(500);
Serial.print(".");
attempts++;
}
if (WiFi.status() == WL_CONNECTED) {
Serial.println("\nWiFi连接成功");
Serial.print("IP地址: ");
Serial.println(WiFi.localIP());
} else {
Serial.println("\nWiFi连接失败");
while(1) delay(1000);
}
delay(2000);
// 初始化豆包语音
initDoubao();
Serial.println("=== 系统启动完成 ===");
Serial.println("输入 'help' 查看可用命令");
}
void loop() {
// WebSocket循环处理
doubaoWs.loop();
// 定期状态检查
static unsigned long lastStatusCheck = 0;
if (millis() - lastStatusCheck > 10000) {
lastStatusCheck = millis();
bool wsConnected = doubaoWs.isConnected();
Serial.printf("\n[状态检查 %lu]\n", millis() / 1000);
Serial.printf("WiFi: %s\n", WiFi.status() == WL_CONNECTED ? "已连接" : "断开");
Serial.printf("WebSocket: %s\n", wsConnected ? "已连接" : "断开");
Serial.printf("豆包激活: %s\n", isDoubaoActive ? "是" : "否");
Serial.printf("Connect ID: %s\n", doubaoConnectId.c_str());
Serial.printf("Dialog ID: %s\n", doubaoDialogId.c_str());
// 检查内存
static unsigned long lastMemCheck = 0;
if (millis() - lastMemCheck > 30000) {
lastMemCheck = millis();
uint32_t freeHeap = ESP.getFreeHeap();
Serial.printf("可用堆内存: %d bytes\n", freeHeap);
}
// 如果未连接,尝试重连
if (isDoubaoEnabled && !wsConnected) {
Serial.println("[DouBao] 连接断开,尝试重连...");
initDoubao();
}
}
delay(10);
}
// ========== 串口命令处理 ==========
void serialEvent() {
while (Serial.available()) {
String command = Serial.readStringUntil('\n');
command.trim();
Serial.printf("> %s\n", command.c_str());
if (command == "status") {
Serial.println("=== 当前状态 ===");
Serial.printf("WiFi: %s\n", WiFi.status() == WL_CONNECTED ? "已连接" : "断开");
Serial.printf("WebSocket: %s\n", doubaoWs.isConnected() ? "已连接" : "断开");
Serial.printf("豆包激活: %s\n", isDoubaoActive ? "是" : "否");
Serial.printf("Connect ID: %s\n", doubaoConnectId.c_str());
Serial.printf("Dialog ID: %s\n", doubaoDialogId.c_str());
Serial.printf("麦克风: %s\n", isMicrophoneInitialized ? "已初始化" : "未初始化");
Serial.printf("扬声器: %s\n", isSpeakerInitialized ? "已初始化" : "未初始化");
}
else if (command == "reconnect") {
Serial.println("重新连接豆包...");
cleanupAudio();
initDoubao();
}
else if (command == "test_mic") {
testMicrophone();
}
else if (command == "test_speaker") {
testSpeaker();
}
else if (command == "start_session") {
if (doubaoWs.isConnected()) {
String startSess = buildStartSessionJson();
Serial.println("手动发送StartSession...");
bool sent = doubaoWs.sendTXT(startSess);
Serial.printf("发送结果: %s\n", sent ? "成功" : "失败");
} else {
Serial.println("WebSocket未连接");
}
}
else if (command == "cleanup") {
cleanupAudio();
Serial.println("音频设备已清理");
}
else if (command == "restart") {
Serial.println("重启系统...");
delay(1000);
ESP.restart();
}
else if (command == "help") {
Serial.println("=== 可用命令 ===");
Serial.println("status - 查看当前状态");
Serial.println("reconnect - 重新连接豆包");
Serial.println("test_mic - 测试麦克风");
Serial.println("test_speaker - 测试扬声器");
Serial.println("start_session - 手动发送StartSession");
Serial.println("cleanup - 清理音频设备");
Serial.println("restart - 重启系统");
Serial.println("help - 显示帮助");
}
else {
Serial.println("未知命令,输入 'help' 查看可用命令");
}
}
}
参考
豆包对接链接:https://www.volcengine.com/docs/6561/1594356?lang=zh#客户端事件