四博 AI 双目智能音箱方案:把"会说话的音箱"升级成"会表达、会感知、会控制"的 AI 终端
传统智能音箱主要解决"语音问答"和"音频播放"问题,而四博 AI 双目方案更进一步:它不仅能听、能说,还能通过双目屏显示表情,通过四路触控感应识别用户操作,通过震动马达提供触觉反馈,通过三轴传感器感知姿态变化,让 AI 设备具备更完整的人机交互能力。
四博 AI 双目智能音箱可以定位为:
AI 语音入口 + 双目情绪表达 + 触控交互 + 姿态感知 + 智能家居控制中枢
该方案基于四博 ESP32-S3 AI 硬件平台,适合开发 AI 智能音箱、AI 桌宠、AI 陪伴机器人、儿童学习音箱、智能家居中控、AI 玩具、AI 手办、酒店客控语音终端等产品。
一、方案核心卖点
四博 AI 双目方案相比普通 AI 音箱,最大的差异在于"多模态交互"。
1. 双目光屏:支持 0.71 / 1.28 双目屏,显示眼神、表情、情绪动画
2. 四路触控:支持摸头、拍打、左右触摸、长按等交互
3. 震动马达:支持点击反馈、情绪反馈、闹钟提醒、唤醒提示
4. 三轴传感:支持姿态检测、拿起检测、摇晃检测、方向判断
5. 语音交互:支持唤醒、对话、打断、知识库问答、大模型接入
6. 智能控制:通过 MCP / MQTT / HTTP 控制灯具、插座、窗帘、空调
7. OTA 升级:支持固件、表情资源、提示音、参数在线更新
四博 AI-S3 双目资料中,双目方案配套主板、双屏幕、排线和外壳,主板支持 5V 供电、电池、咪头、喇叭、小智功能、2.4G Wi-Fi,并可支持 4G 模组扩展网络;屏幕支持 0.71 寸和 1.28 寸两类配置。
二、硬件架构设计
推荐硬件配置如下:
| 模块 | 推荐配置 |
|---|---|
| 主控 | ESP32-S3R8 |
| Flash | 16MB |
| PSRAM | 8MB |
| 离线语音 | VB6824 可选 |
| 语音输入 | I2S 麦克风 / Codec Mic |
| 音频输出 | I2S Codec + Class-D 功放 |
| 双目显示 | 0.71 寸 / 1.28 寸 LCD × 2 |
| 触控 | 四路触控感应 |
| 震动 | ERM / LRA 震动马达 |
| 姿态 | 三轴加速度传感器 |
| 网络 | Wi-Fi + BLE / BluFi |
| 电源 | Type-C 5V,可选锂电池 |
| 扩展 | UART / I2C / SPI / GPIO / PWM |
ESP32-S3 系列在四博乐鑫系模组选型中定位于音视频 / AI 市场,ESPS3-32 系列支持 N4、N8、N8R2、N16R2、N16R8 等规格,可匹配双屏、音频、AI 交互类产品。
整体硬件连接关系可以这样设计:
┌─────────────────────────┐
│ ESP32-S3R8 │
│ Wi-Fi / BLE / AI 控制 │
└───────────┬─────────────┘
│
┌───────────────┬───────────┼───────────────┬────────────────┐
│ │ │ │ │
▼ ▼ ▼ ▼ ▼
I2S Mic I2S Codec SPI LCD-L SPI LCD-R I2C 三轴传感器
│ │ │ │ │
▼ ▼ ▼ ▼ ▼
语音采集 喇叭播放 左眼动画 右眼动画 姿态/摇晃/拿起
│
▼
四路触控 + 震动马达
三、系统软件架构
四博 AI 双目智能音箱的软件可以拆成 10 个核心模块:
main/
├── app_main.c // 系统入口
├── board_config.h // 板级引脚配置
├── audio_capture.c // 麦克风采集
├── audio_player.c // TTS 播放
├── eye_display.c // 双目屏驱动与表情动画
├── touch_service.c // 四路触控检测
├── motor_service.c // 震动马达控制
├── imu_service.c // 三轴传感器姿态检测
├── ai_ws_client.c // AI WebSocket 通信
├── mcp_service.c // MCP 工具调用
├── wifi_manager.c // Wi-Fi / BluFi 配网
├── scene_service.c // 场景模式
└── ota_service.c // OTA 升级
系统状态机建议如下:
typedef enum {
AI_STATE_BOOT = 0,
AI_STATE_WIFI_CONFIG,
AI_STATE_IDLE,
AI_STATE_WAKEUP,
AI_STATE_LISTENING,
AI_STATE_THINKING,
AI_STATE_SPEAKING,
AI_STATE_TOUCHING,
AI_STATE_SHAKING,
AI_STATE_SLEEPING,
AI_STATE_OTA,
AI_STATE_ERROR
} ai_state_t;
状态和表情绑定:
typedef enum {
EYE_EXPR_NORMAL = 0,
EYE_EXPR_HAPPY,
EYE_EXPR_BLINK,
EYE_EXPR_SLEEP,
EYE_EXPR_LISTENING,
EYE_EXPR_THINKING,
EYE_EXPR_SPEAKING,
EYE_EXPR_ANGRY,
EYE_EXPR_DIZZY,
EYE_EXPR_LOVE
} eye_expr_t;
四、板级配置代码
下面是一个适合四博 AI 双目智能音箱的 board_config.h 模板。实际 GPIO 需要按原理图修改。
#pragma once
#include "driver/gpio.h"
#include "driver/spi_master.h"
#include "driver/i2c.h"
#define BOARD_NAME "DOIT_AI_DUAL_EYE_SPEAKER"
/* I2S Audio */
#define PIN_I2S_BCLK GPIO_NUM_4
#define PIN_I2S_WS GPIO_NUM_5
#define PIN_I2S_DIN GPIO_NUM_6
#define PIN_I2S_DOUT GPIO_NUM_7
#define PIN_I2S_MCLK GPIO_NUM_16
/* I2C: IMU / Codec */
#define I2C_PORT_MAIN I2C_NUM_0
#define PIN_I2C_SDA GPIO_NUM_8
#define PIN_I2C_SCL GPIO_NUM_9
/* Left Eye LCD */
#define LCD_LEFT_HOST SPI2_HOST
#define PIN_LCD_L_MOSI GPIO_NUM_11
#define PIN_LCD_L_SCLK GPIO_NUM_12
#define PIN_LCD_L_CS GPIO_NUM_10
#define PIN_LCD_L_DC GPIO_NUM_13
#define PIN_LCD_L_RST GPIO_NUM_14
#define PIN_LCD_L_BL GPIO_NUM_15
/* Right Eye LCD */
#define LCD_RIGHT_HOST SPI3_HOST
#define PIN_LCD_R_MOSI GPIO_NUM_35
#define PIN_LCD_R_SCLK GPIO_NUM_36
#define PIN_LCD_R_CS GPIO_NUM_37
#define PIN_LCD_R_DC GPIO_NUM_38
#define PIN_LCD_R_RST GPIO_NUM_39
#define PIN_LCD_R_BL GPIO_NUM_40
/* Touch */
#define PIN_TOUCH_1 GPIO_NUM_1
#define PIN_TOUCH_2 GPIO_NUM_2
#define PIN_TOUCH_3 GPIO_NUM_3
#define PIN_TOUCH_4 GPIO_NUM_21
/* Vibration Motor */
#define PIN_MOTOR GPIO_NUM_17
#define MOTOR_LEDC_TIMER LEDC_TIMER_0
#define MOTOR_LEDC_CHANNEL LEDC_CHANNEL_0
/* Buttons */
#define PIN_KEY_WAKE GPIO_NUM_0
#define PIN_KEY_VOL_UP GPIO_NUM_18
#define PIN_KEY_VOL_DOWN GPIO_NUM_19
/* Audio Parameters */
#define AUDIO_SAMPLE_RATE 16000
#define AUDIO_BITS_PER_SAMPLE 16
#define AUDIO_CHANNELS 1
#define AUDIO_FRAME_MS 20
#define AUDIO_FRAME_SAMPLES (AUDIO_SAMPLE_RATE * AUDIO_FRAME_MS / 1000)
五、主程序框架
#include "esp_log.h"
#include "nvs_flash.h"
#include "esp_event.h"
#include "esp_netif.h"
#include "board_config.h"
#include "wifi_manager.h"
#include "audio_capture.h"
#include "audio_player.h"
#include "eye_display.h"
#include "touch_service.h"
#include "motor_service.h"
#include "imu_service.h"
#include "ai_ws_client.h"
#include "mcp_service.h"
#include "ota_service.h"
static const char *TAG = "AI_DUAL_EYE";
void app_main(void)
{
ESP_LOGI(TAG, "%s booting...", BOARD_NAME);
esp_err_t ret = nvs_flash_init();
if (ret == ESP_ERR_NVS_NO_FREE_PAGES ||
ret == ESP_ERR_NVS_NEW_VERSION_FOUND) {
ESP_ERROR_CHECK(nvs_flash_erase());
ESP_ERROR_CHECK(nvs_flash_init());
}
ESP_ERROR_CHECK(esp_netif_init());
ESP_ERROR_CHECK(esp_event_loop_create_default());
eye_display_init();
eye_display_set_expr(EYE_EXPR_BLINK);
motor_service_init();
touch_service_init();
imu_service_init();
audio_capture_init();
audio_player_init();
wifi_manager_init();
ai_ws_client_init();
mcp_service_init();
ota_service_init();
wifi_manager_start();
xTaskCreatePinnedToCore(audio_capture_task,
"audio_capture",
8192,
NULL,
5,
NULL,
0);
xTaskCreatePinnedToCore(audio_player_task,
"audio_player",
8192,
NULL,
5,
NULL,
1);
xTaskCreatePinnedToCore(ai_ws_client_task,
"ai_ws_client",
10240,
NULL,
6,
NULL,
1);
xTaskCreatePinnedToCore(touch_service_task,
"touch_service",
4096,
NULL,
4,
NULL,
0);
xTaskCreatePinnedToCore(imu_service_task,
"imu_service",
4096,
NULL,
4,
NULL,
0);
xTaskCreatePinnedToCore(ota_service_task,
"ota_service",
6144,
NULL,
3,
NULL,
0);
eye_display_set_expr(EYE_EXPR_NORMAL);
ESP_LOGI(TAG, "system initialized");
}
六、双目屏表情控制
双目屏是该方案最重要的"情绪表达器官"。普通音箱只能用语音反馈,而双目音箱可以用眼神表达状态。
建议设计以下表情映射:
待机:普通眼神
唤醒:睁眼 / 闪烁
倾听:眼睛聚焦
思考:眼睛转动 / 点点动画
说话:嘴型或眼神节奏变化
开心:弯眼笑
睡眠:闭眼
摸头:爱心眼
摇晃:眩晕眼
网络异常:疑惑眼
OTA:加载动画
表情控制接口:
#include "eye_display.h"
#include "esp_log.h"
static const char *TAG = "EYE_DISPLAY";
static eye_expr_t g_current_expr = EYE_EXPR_NORMAL;
void eye_display_init(void)
{
/*
* 初始化左右两个 SPI LCD。
* 实际项目中可接入 ST7789 / GC9A01 / 自定义屏驱动。
*/
ESP_LOGI(TAG, "dual eye display init");
}
void eye_display_set_expr(eye_expr_t expr)
{
g_current_expr = expr;
switch (expr) {
case EYE_EXPR_NORMAL:
eye_draw_normal();
break;
case EYE_EXPR_HAPPY:
eye_draw_happy();
break;
case EYE_EXPR_BLINK:
eye_draw_blink();
break;
case EYE_EXPR_SLEEP:
eye_draw_sleep();
break;
case EYE_EXPR_LISTENING:
eye_draw_listening();
break;
case EYE_EXPR_THINKING:
eye_draw_thinking();
break;
case EYE_EXPR_SPEAKING:
eye_draw_speaking();
break;
case EYE_EXPR_LOVE:
eye_draw_love();
break;
case EYE_EXPR_DIZZY:
eye_draw_dizzy();
break;
default:
eye_draw_normal();
break;
}
}
画眼睛的简化示例:
void eye_draw_normal(void)
{
lcd_left_fill(0x0000);
lcd_right_fill(0x0000);
lcd_left_draw_round_rect(40, 45, 80, 60, 20, 0xFFFF);
lcd_right_draw_round_rect(40, 45, 80, 60, 20, 0xFFFF);
}
void eye_draw_happy(void)
{
lcd_left_fill(0x0000);
lcd_right_fill(0x0000);
lcd_left_draw_arc(40, 40, 100, 100, 20, 160, 0xFFFF);
lcd_right_draw_arc(40, 40, 100, 100, 20, 160, 0xFFFF);
}
void eye_draw_sleep(void)
{
lcd_left_fill(0x0000);
lcd_right_fill(0x0000);
lcd_left_draw_line(35, 75, 115, 75, 0xFFFF);
lcd_right_draw_line(35, 75, 115, 75, 0xFFFF);
}
void eye_draw_love(void)
{
lcd_left_fill(0x0000);
lcd_right_fill(0x0000);
lcd_left_draw_heart(60, 50, 0xF800);
lcd_right_draw_heart(60, 50, 0xF800);
}
屏幕型号适配建议:
typedef enum {
EYE_SCREEN_071 = 0,
EYE_SCREEN_128
} eye_screen_type_t;
typedef struct {
eye_screen_type_t type;
int width;
int height;
int offset_x;
int offset_y;
int brightness_max;
} eye_screen_config_t;
static eye_screen_config_t g_eye_screen_071 = {
.type = EYE_SCREEN_071,
.width = 160,
.height = 160,
.offset_x = 0,
.offset_y = 0,
.brightness_max = 100
};
static eye_screen_config_t g_eye_screen_128 = {
.type = EYE_SCREEN_128,
.width = 240,
.height = 240,
.offset_x = 0,
.offset_y = 0,
.brightness_max = 100
};
开发宝典中 RoPet_ESPS3_AI_EYE 快速入门部分提到,双目板存在 ES8311 方案、VB6824 方案 V1 / V2 等版本,并涉及 1.28 寸、0.71 寸等屏幕类型适配;实际开发前需要确认板子版本和屏幕类型。
七、四路触控交互设计
四路触控可以让设备从"语音交互"升级到"语音 + 触摸交互"。
建议功能定义:
| 触控通道 | 动作 | 功能 |
|---|---|---|
| TOUCH_1 | 摸头 | AI 开心 / 爱心眼 / 震动反馈 |
| TOUCH_2 | 左侧触摸 | 上一个表情 / 上一首 |
| TOUCH_3 | 右侧触摸 | 下一个表情 / 下一首 |
| TOUCH_4 | 长按 | 进入配网 / 唤醒 / 打断 |
触控事件枚举:
typedef enum {
TOUCH_EVENT_NONE = 0,
TOUCH_EVENT_HEAD,
TOUCH_EVENT_LEFT,
TOUCH_EVENT_RIGHT,
TOUCH_EVENT_LONG_PRESS,
TOUCH_EVENT_DOUBLE_CLICK
} touch_event_t;
触控任务:
#include "driver/gpio.h"
#include "esp_log.h"
#include "touch_service.h"
#include "eye_display.h"
#include "motor_service.h"
#include "ai_ws_client.h"
#include "audio_capture.h"
#include "audio_player.h"
static const char *TAG = "TOUCH";
static int read_touch_gpio(gpio_num_t gpio)
{
return gpio_get_level(gpio) == 0;
}
void touch_service_init(void)
{
gpio_config_t io_conf = {
.pin_bit_mask = (1ULL << PIN_TOUCH_1) |
(1ULL << PIN_TOUCH_2) |
(1ULL << PIN_TOUCH_3) |
(1ULL << PIN_TOUCH_4),
.mode = GPIO_MODE_INPUT,
.pull_up_en = GPIO_PULLUP_ENABLE,
.pull_down_en = GPIO_PULLDOWN_DISABLE,
.intr_type = GPIO_INTR_DISABLE,
};
gpio_config(&io_conf);
ESP_LOGI(TAG, "touch service init");
}
static void handle_touch_event(touch_event_t event)
{
switch (event) {
case TOUCH_EVENT_HEAD:
ESP_LOGI(TAG, "摸头事件");
eye_display_set_expr(EYE_EXPR_LOVE);
motor_vibrate_once(80);
ai_ws_send_control("{\"type\":\"event\",\"name\":\"touch_head\"}");
break;
case TOUCH_EVENT_LEFT:
ESP_LOGI(TAG, "左侧触摸");
eye_display_set_expr(EYE_EXPR_HAPPY);
motor_vibrate_once(40);
break;
case TOUCH_EVENT_RIGHT:
ESP_LOGI(TAG, "右侧触摸");
eye_display_set_expr(EYE_EXPR_BLINK);
motor_vibrate_once(40);
break;
case TOUCH_EVENT_LONG_PRESS:
ESP_LOGI(TAG, "长按触摸,进入配网");
eye_display_set_expr(EYE_EXPR_THINKING);
ai_ws_send_control("{\"type\":\"wifi_config\"}");
break;
case TOUCH_EVENT_DOUBLE_CLICK:
ESP_LOGI(TAG, "双击触摸,打断当前语音");
audio_player_stop();
ai_ws_send_control("{\"type\":\"interrupt\"}");
audio_capture_start();
eye_display_set_expr(EYE_EXPR_LISTENING);
break;
default:
break;
}
}
void touch_service_task(void *arg)
{
int last_t1 = 0;
int last_t2 = 0;
int last_t3 = 0;
int last_t4 = 0;
while (1) {
int t1 = read_touch_gpio(PIN_TOUCH_1);
int t2 = read_touch_gpio(PIN_TOUCH_2);
int t3 = read_touch_gpio(PIN_TOUCH_3);
int t4 = read_touch_gpio(PIN_TOUCH_4);
if (t1 && !last_t1) {
handle_touch_event(TOUCH_EVENT_HEAD);
}
if (t2 && !last_t2) {
handle_touch_event(TOUCH_EVENT_LEFT);
}
if (t3 && !last_t3) {
handle_touch_event(TOUCH_EVENT_RIGHT);
}
if (t4 && !last_t4) {
handle_touch_event(TOUCH_EVENT_LONG_PRESS);
}
last_t1 = t1;
last_t2 = t2;
last_t3 = t3;
last_t4 = t4;
vTaskDelay(pdMS_TO_TICKS(20));
}
}
如果使用 ESP32-S3 内置 Touch Sensor,也可以用触摸阈值方式:
#include "driver/touch_sensor.h"
#define TOUCH_THRESHOLD 30000
void touch_pad_init_simple(void)
{
touch_pad_init();
touch_pad_config(TOUCH_PAD_NUM1);
touch_pad_config(TOUCH_PAD_NUM2);
touch_pad_config(TOUCH_PAD_NUM3);
touch_pad_config(TOUCH_PAD_NUM4);
touch_pad_filter_set_config(&(touch_filter_config_t) {
.mode = TOUCH_PAD_FILTER_IIR_16,
.debounce_cnt = 1,
.noise_thr = 0,
.jitter_step = 4,
.smh_lvl = TOUCH_PAD_SMOOTH_IIR_2,
});
touch_pad_filter_enable();
}
bool touch_is_pressed(touch_pad_t pad)
{
uint32_t value = 0;
touch_pad_read_raw_data(pad, &value);
return value < TOUCH_THRESHOLD;
}
八、震动马达控制
震动马达用于增强交互反馈。比如摸头震一下、唤醒震一下、错误提示连续震动、OTA 完成震动。
#include "driver/ledc.h"
#include "motor_service.h"
#include "board_config.h"
void motor_service_init(void)
{
ledc_timer_config_t timer = {
.speed_mode = LEDC_LOW_SPEED_MODE,
.timer_num = MOTOR_LEDC_TIMER,
.duty_resolution = LEDC_TIMER_10_BIT,
.freq_hz = 2000,
.clk_cfg = LEDC_AUTO_CLK,
};
ledc_timer_config(&timer);
ledc_channel_config_t channel = {
.gpio_num = PIN_MOTOR,
.speed_mode = LEDC_LOW_SPEED_MODE,
.channel = MOTOR_LEDC_CHANNEL,
.timer_sel = MOTOR_LEDC_TIMER,
.duty = 0,
.hpoint = 0,
};
ledc_channel_config(&channel);
}
void motor_set_strength(uint8_t percent)
{
if (percent > 100) {
percent = 100;
}
uint32_t duty = percent * 1023 / 100;
ledc_set_duty(LEDC_LOW_SPEED_MODE, MOTOR_LEDC_CHANNEL, duty);
ledc_update_duty(LEDC_LOW_SPEED_MODE, MOTOR_LEDC_CHANNEL);
}
void motor_stop(void)
{
motor_set_strength(0);
}
void motor_vibrate_once(uint8_t strength)
{
motor_set_strength(strength);
vTaskDelay(pdMS_TO_TICKS(80));
motor_stop();
}
void motor_vibrate_pattern_error(void)
{
for (int i = 0; i < 3; i++) {
motor_set_strength(70);
vTaskDelay(pdMS_TO_TICKS(80));
motor_stop();
vTaskDelay(pdMS_TO_TICKS(80));
}
}
void motor_vibrate_pattern_success(void)
{
motor_set_strength(60);
vTaskDelay(pdMS_TO_TICKS(120));
motor_stop();
}
九、三轴姿态感应设计
三轴传感器可以让双目音箱感知"拿起、放下、摇晃、倾斜、翻转"等动作。
建议交互设计:
| 姿态事件 | 功能 |
|---|---|
| 拿起设备 | 自动唤醒 |
| 轻轻摇晃 | 切换表情 / 重新回答 |
| 倾斜左侧 | 上一首 / 上一个功能 |
| 倾斜右侧 | 下一首 / 下一个功能 |
| 翻转朝下 | 静音 / 睡眠 |
| 剧烈摇晃 | 眩晕表情 + 震动反馈 |
姿态事件枚举:
typedef enum {
IMU_EVENT_NONE = 0,
IMU_EVENT_PICK_UP,
IMU_EVENT_SHAKE,
IMU_EVENT_TILT_LEFT,
IMU_EVENT_TILT_RIGHT,
IMU_EVENT_FACE_DOWN,
IMU_EVENT_STABLE
} imu_event_t;
I2C 初始化:
#include "driver/i2c.h"
#include "board_config.h"
void i2c_main_init(void)
{
i2c_config_t conf = {
.mode = I2C_MODE_MASTER,
.sda_io_num = PIN_I2C_SDA,
.scl_io_num = PIN_I2C_SCL,
.sda_pullup_en = GPIO_PULLUP_ENABLE,
.scl_pullup_en = GPIO_PULLUP_ENABLE,
.master.clk_speed = 400000,
};
i2c_param_config(I2C_PORT_MAIN, &conf);
i2c_driver_install(I2C_PORT_MAIN, conf.mode, 0, 0, 0);
}
三轴数据结构:
typedef struct {
int16_t x;
int16_t y;
int16_t z;
} accel_raw_t;
typedef struct {
float x_g;
float y_g;
float z_g;
} accel_g_t;
姿态判断逻辑:
#include <math.h>
#include "imu_service.h"
#include "eye_display.h"
#include "motor_service.h"
#include "audio_capture.h"
static accel_g_t read_accel_g(void)
{
accel_raw_t raw = imu_read_raw();
accel_g_t g = {
.x_g = raw.x / 16384.0f,
.y_g = raw.y / 16384.0f,
.z_g = raw.z / 16384.0f,
};
return g;
}
static imu_event_t imu_detect_event(accel_g_t g)
{
float abs_x = fabsf(g.x_g);
float abs_y = fabsf(g.y_g);
float abs_z = fabsf(g.z_g);
if (abs_x > 1.8f || abs_y > 1.8f || abs_z > 1.8f) {
return IMU_EVENT_SHAKE;
}
if (g.x_g > 0.65f) {
return IMU_EVENT_TILT_RIGHT;
}
if (g.x_g < -0.65f) {
return IMU_EVENT_TILT_LEFT;
}
if (g.z_g < -0.75f) {
return IMU_EVENT_FACE_DOWN;
}
return IMU_EVENT_STABLE;
}
static void imu_handle_event(imu_event_t event)
{
switch (event) {
case IMU_EVENT_SHAKE:
eye_display_set_expr(EYE_EXPR_DIZZY);
motor_vibrate_once(90);
break;
case IMU_EVENT_TILT_LEFT:
eye_display_set_expr(EYE_EXPR_BLINK);
break;
case IMU_EVENT_TILT_RIGHT:
eye_display_set_expr(EYE_EXPR_HAPPY);
break;
case IMU_EVENT_FACE_DOWN:
eye_display_set_expr(EYE_EXPR_SLEEP);
audio_capture_stop();
break;
case IMU_EVENT_STABLE:
default:
break;
}
}
void imu_service_task(void *arg)
{
while (1) {
accel_g_t g = read_accel_g();
imu_event_t event = imu_detect_event(g);
imu_handle_event(event);
vTaskDelay(pdMS_TO_TICKS(100));
}
}
十、AI 语音对话链路
AI 智能音箱的语音链路建议采用:
Mic → ESP32-S3 I2S → WebSocket → ASR → LLM → TTS → ESP32-S3 → Codec / Amp → Speaker
WebSocket 上线消息:
{
"type": "hello",
"device_id": "doit_dual_eye_speaker_001",
"product": "doit_ai_dual_eye_speaker",
"firmware": "1.0.0",
"features": {
"dual_eye": true,
"touch": 4,
"vibration": true,
"imu": true,
"mcp": true,
"ota": true,
"interrupt": true
},
"audio": {
"format": "pcm_s16le",
"sample_rate": 16000,
"channels": 1
}
}
AI WebSocket 事件处理:
static void handle_ai_json_message(const char *data, int len)
{
char *buf = calloc(1, len + 1);
if (!buf) return;
memcpy(buf, data, len);
cJSON *root = cJSON_Parse(buf);
if (!root) {
free(buf);
return;
}
cJSON *type = cJSON_GetObjectItem(root, "type");
if (cJSON_IsString(type)) {
if (strcmp(type->valuestring, "stt") == 0) {
eye_display_set_expr(EYE_EXPR_LISTENING);
} else if (strcmp(type->valuestring, "llm") == 0) {
eye_display_set_expr(EYE_EXPR_THINKING);
} else if (strcmp(type->valuestring, "tts_start") == 0) {
eye_display_set_expr(EYE_EXPR_SPEAKING);
audio_player_start();
} else if (strcmp(type->valuestring, "tts_end") == 0) {
eye_display_set_expr(EYE_EXPR_NORMAL);
audio_player_stop();
} else if (strcmp(type->valuestring, "tool_call") == 0) {
mcp_service_handle(root);
} else if (strcmp(type->valuestring, "emotion") == 0) {
cJSON *emotion = cJSON_GetObjectItem(root, "value");
if (cJSON_IsString(emotion)) {
eye_set_emotion_by_name(emotion->valuestring);
}
}
}
cJSON_Delete(root);
free(buf);
}
情绪映射:
void eye_set_emotion_by_name(const char *emotion)
{
if (strcmp(emotion, "happy") == 0) {
eye_display_set_expr(EYE_EXPR_HAPPY);
} else if (strcmp(emotion, "love") == 0) {
eye_display_set_expr(EYE_EXPR_LOVE);
motor_vibrate_once(60);
} else if (strcmp(emotion, "sleep") == 0) {
eye_display_set_expr(EYE_EXPR_SLEEP);
} else if (strcmp(emotion, "thinking") == 0) {
eye_display_set_expr(EYE_EXPR_THINKING);
} else {
eye_display_set_expr(EYE_EXPR_NORMAL);
}
}
十一、实时打断设计
AI 音箱必须支持打断,否则体验会很差。
打断触发来源:
1. 用户再次说唤醒词
2. 用户按键
3. 用户双击触控区
4. 用户拿起设备
5. 用户摇晃设备
打断代码:
void ai_interrupt_current_session(void)
{
audio_player_stop();
ai_ws_send_control("{\"type\":\"interrupt\"}");
eye_display_set_expr(EYE_EXPR_LISTENING);
motor_vibrate_once(50);
audio_capture_start();
}
按键 / 触控绑定:
void user_wakeup_event_handler(void)
{
if (audio_player_is_playing()) {
ai_interrupt_current_session();
} else {
eye_display_set_expr(EYE_EXPR_LISTENING);
motor_vibrate_once(40);
audio_capture_start();
}
}
十二、MCP 工具调用:让音箱控制设备
四博 AI 双目智能音箱不只是陪伴设备,也可以作为智能家居入口。
用户说:
打开客厅灯
把卧室灯调成暖光
关闭所有插座
启动睡眠模式
打开窗帘
AI 后端返回 MCP 工具调用:
{
"type": "tool_call",
"tool": "home.device.control",
"arguments": {
"device_id": "light_livingroom_01",
"action": "power",
"value": 1
}
}
设备端处理:
typedef struct {
char device_id[32];
char action[32];
int value;
} control_cmd_t;
esp_err_t home_control_device(control_cmd_t *cmd)
{
char json[256];
snprintf(json, sizeof(json),
"{"
"\"device_id\":\"%s\","
"\"action\":\"%s\","
"\"value\":%d"
"}",
cmd->device_id,
cmd->action,
cmd->value
);
return http_post_json("https://api.customer-platform.com/device/control", json);
}
void mcp_handle_device_control(cJSON *args)
{
cJSON *device_id = cJSON_GetObjectItem(args, "device_id");
cJSON *action = cJSON_GetObjectItem(args, "action");
cJSON *value = cJSON_GetObjectItem(args, "value");
if (!cJSON_IsString(device_id) ||
!cJSON_IsString(action) ||
!cJSON_IsNumber(value)) {
return;
}
control_cmd_t cmd = {0};
strncpy(cmd.device_id, device_id->valuestring, sizeof(cmd.device_id) - 1);
strncpy(cmd.action, action->valuestring, sizeof(cmd.action) - 1);
cmd.value = value->valueint;
home_control_device(&cmd);
}
MCP 工具白名单:
typedef enum {
MCP_TOOL_DEVICE_CONTROL = 0,
MCP_TOOL_SCENE_RUN,
MCP_TOOL_SET_EYE,
MCP_TOOL_SET_MOTOR,
MCP_TOOL_QUERY_SENSOR,
MCP_TOOL_UNKNOWN
} mcp_tool_id_t;
typedef struct {
const char *name;
mcp_tool_id_t id;
} mcp_tool_map_t;
static const mcp_tool_map_t g_mcp_tools[] = {
{"home.device.control", MCP_TOOL_DEVICE_CONTROL},
{"home.scene.run", MCP_TOOL_SCENE_RUN},
{"robot.eye.set", MCP_TOOL_SET_EYE},
{"robot.motor.vibrate", MCP_TOOL_SET_MOTOR},
{"home.sensor.query", MCP_TOOL_QUERY_SENSOR},
};
static mcp_tool_id_t mcp_get_tool_id(const char *name)
{
for (int i = 0; i < sizeof(g_mcp_tools) / sizeof(g_mcp_tools[0]); i++) {
if (strcmp(name, g_mcp_tools[i].name) == 0) {
return g_mcp_tools[i].id;
}
}
return MCP_TOOL_UNKNOWN;
}
十三、AI 控制双目表情和震动
MCP 不仅能控制智能家居,也能控制设备自身表情。
例如 AI 说"我很开心",后端可以返回:
{
"type": "tool_call",
"tool": "robot.eye.set",
"arguments": {
"emotion": "happy"
}
}
设备执行:
void mcp_handle_eye_set(cJSON *args)
{
cJSON *emotion = cJSON_GetObjectItem(args, "emotion");
if (!cJSON_IsString(emotion)) {
return;
}
eye_set_emotion_by_name(emotion->valuestring);
}
震动工具:
{
"type": "tool_call",
"tool": "robot.motor.vibrate",
"arguments": {
"strength": 80,
"duration_ms": 120
}
}
设备执行:
void mcp_handle_motor_vibrate(cJSON *args)
{
cJSON *strength = cJSON_GetObjectItem(args, "strength");
cJSON *duration = cJSON_GetObjectItem(args, "duration_ms");
if (!cJSON_IsNumber(strength) || !cJSON_IsNumber(duration)) {
return;
}
uint8_t s = strength->valueint;
uint32_t d = duration->valueint;
if (s > 100) s = 100;
if (d > 1000) d = 1000;
motor_set_strength(s);
vTaskDelay(pdMS_TO_TICKS(d));
motor_stop();
}
十四、Wi-Fi / BluFi 配网
四博开发宝典中说明,小智 AI 系统硬件设备开源代码可以在 VSCode 中安装 ESP-IDF 扩展和编译工具进行开发、编译和烧录;设备配网部分包含 SoftAP 配网和四博小助手 BluFi 配网。
Wi-Fi 连接代码示例:
void wifi_start_sta(const char *ssid, const char *password)
{
wifi_config_t wifi_config = {0};
strncpy((char *)wifi_config.sta.ssid,
ssid,
sizeof(wifi_config.sta.ssid));
strncpy((char *)wifi_config.sta.password,
password,
sizeof(wifi_config.sta.password));
esp_netif_create_default_wifi_sta();
wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT();
esp_wifi_init(&cfg);
esp_wifi_set_mode(WIFI_MODE_STA);
esp_wifi_set_config(WIFI_IF_STA, &wifi_config);
esp_wifi_start();
esp_wifi_connect();
}
配网状态和眼睛动画联动:
void wifi_event_to_eye(ai_wifi_state_t state)
{
switch (state) {
case WIFI_STATE_PROVISIONING:
eye_display_set_expr(EYE_EXPR_THINKING);
break;
case WIFI_STATE_CONNECTED:
eye_display_set_expr(EYE_EXPR_HAPPY);
motor_vibrate_pattern_success();
break;
case WIFI_STATE_FAILED:
eye_display_set_expr(EYE_EXPR_ANGRY);
motor_vibrate_pattern_error();
break;
default:
eye_display_set_expr(EYE_EXPR_NORMAL);
break;
}
}
十五、OTA 升级设计
双目音箱 OTA 不只是升级固件,还可以升级表情资源、提示音、动作参数和唤醒词配置。
OTA 版本文件建议:
{
"project": "doit_ai_dual_eye_speaker",
"version": "1.0.3",
"chip": "esp32s3",
"url": "https://ota.customer-platform.com/dual_eye/1.0.3.bin",
"eye_asset_url": "https://ota.customer-platform.com/dual_eye/eyes_v3.bin",
"md5": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"force": false,
"note": "新增摸头爱心眼、摇晃眩晕眼、智能家居MCP控制"
}
OTA 代码:
#include "esp_https_ota.h"
#include "esp_http_client.h"
void ota_check_update(void)
{
esp_http_client_config_t config = {
.url = "https://ota.customer-platform.com/dual_eye/latest.bin",
.timeout_ms = 10000,
.keep_alive_enable = true,
};
esp_https_ota_config_t ota_config = {
.http_config = &config,
};
eye_display_set_expr(EYE_EXPR_THINKING);
esp_err_t ret = esp_https_ota(&ota_config);
if (ret == ESP_OK) {
eye_display_set_expr(EYE_EXPR_HAPPY);
motor_vibrate_pattern_success();
esp_restart();
} else {
eye_display_set_expr(EYE_EXPR_ANGRY);
motor_vibrate_pattern_error();
}
}
分区表示例:
# Name, Type, SubType, Offset, Size, Flags
nvs, data, nvs, 0x9000, 0x6000,
otadata, data, ota, 0xf000, 0x2000,
phy_init, data, phy, 0x11000, 0x1000,
factory, app, factory, 0x20000, 0x300000,
ota_0, app, ota_0, , 0x300000,
ota_1, app, ota_1, , 0x300000,
eyes, data, spiffs, , 0x200000,
storage, data, spiffs, , 0x100000,
十六、智能家居控制场景
四博 AI 双目音箱也可以作为家庭控制入口。
场景一:回家模式
void scene_home_mode(void)
{
home_control_simple("light_livingroom_01", "power", 1);
home_control_simple("light_livingroom_01", "brightness", 80);
home_control_simple("curtain_livingroom_01", "open", 100);
home_control_simple("air_conditioner_livingroom", "cooling", 26);
eye_display_set_expr(EYE_EXPR_HAPPY);
motor_vibrate_once(60);
}
场景二:睡眠模式
void scene_sleep_mode(void)
{
home_control_simple("light_bedroom_01", "brightness", 10);
home_control_simple("light_bedroom_01", "color_temp", 2700);
home_control_simple("curtain_bedroom_01", "close", 100);
home_control_simple("socket_tv_01", "power", 0);
eye_display_set_expr(EYE_EXPR_SLEEP);
}
场景三:离家模式
void scene_away_mode(void)
{
home_control_simple("all_lights", "power", 0);
home_control_simple("all_sockets", "power", 0);
home_control_simple("all_air_conditioners", "power", 0);
eye_display_set_expr(EYE_EXPR_NORMAL);
}
十七、产品级功能总结
四博 AI 双目智能音箱最终可以形成以下产品能力:
1. AI 对话:支持小智、豆包、ChatGPT 等大模型接入
2. 双目表达:0.71 / 1.28 双屏显示表情动画
3. 触控交互:四路触控,支持摸头、长按、双击、左右操作
4. 震动反馈:触摸、唤醒、错误、成功、闹钟等触觉反馈
5. 姿态感应:拿起、摇晃、倾斜、翻转等动作识别
6. 实时打断:说话过程中可按键、触控或语音打断
7. 智能家居:通过 MCP / MQTT / HTTP 控制家居设备
8. OTA 升级:固件、眼睛素材、提示音、参数在线升级
9. 二次开发:基于 ESP-IDF / RoPet_ESPS3_AI_EYE 工程扩展
10. 量产适配:支持品牌客户、方案商客户定制开发
AI-S3 双目双屏方案在四博 AI 选型表中明确定位为高端 AI 市场,可支持品牌 B 端客户或方案商客户进行定制开发,并支持小智、豆包、ChatGPT 等主流大模型。
十八、结语
四博 AI 双目智能音箱的核心价值,不是简单把 AI 大模型接到喇叭上,而是把"语音、表情、触控、震动、姿态、智能家居控制"整合成一套完整的人机交互系统。
普通音箱是"听得见、说得出"。
四博 AI 双目音箱是:
听得懂用户
看得出情绪
摸得有反馈
动得有感知
控得了设备
升得了系统
基于 ESP32-S3R8、双目屏、四路触控、震动马达、三轴传感器和 MCP 工具调用,四博 AI 双目方案可以把传统 AI 音箱升级成更具生命感和交互感的 AI 智能终端,既适合消费类 AI 陪伴产品,也适合智能家居、教育陪伴、酒店客控、桌面助手和 AI 玩具等场景。