基于 ESP32-S3 的四博 AI 双目智能音箱方案:四路触控、震动反馈、IMU 姿态识别、语音克隆与专属知识库接入
1. 项目定位
四博 AI 双目智能音箱不是传统蓝牙音箱,也不是简单的语音问答终端,而是一套面向 AI 桌宠、儿童陪伴机器人、AI 早教机、学习机、AI 台灯、智能音箱和礼品玩具 的多模态交互平台。
它的核心是把传统"语音输入 + 云端回答 + 喇叭播放"的音箱模型,升级为:
语音交互
+ 双目表情
+ 四路触控
+ 震动反馈
+ 三轴姿态感应
+ 小程序配置
+ 语音克隆
+ 专属知识库
+ 大模型对话
四博 AI 开发宝典中,AI-Speaker 开发板支持"四博小助手"小程序,并支持克隆、知识库、自建大模型和 MCP;RoPet_ESPS3_AI_EYE 双目方案支持 2.4G Wi-Fi、4G 模组扩展、0.71 寸和 1.28 寸屏幕;DOIT_ESPS3_AI_EYE_Vision 还集成摄像头、双目显示和 4 个触摸节点,适合多模态 AI 产品开发。
2. 硬件方案
2.1 推荐主控
推荐使用四博 ESPS3 系列:
主控:ESP32-S3
推荐模组:ESPS3-32 / ESPS3-32E
推荐规格:N16R2 / N16R8
应用方向:AI 音视频、双目屏、触控、摄像头、语音交互
四博模组选型资料中,ESPS3-32 系列包括 N4、N8、N8R2、N16R2、N16R8 等子型号,兼容 ESP32-S3-WROOM-1 系列模组;ESPS3-32E 则兼容 ESP32-S3-WROOM-1U 系列。
2.2 系统框图
┌──────────────────────────┐
│ 云端 AI 服务 │
│ ASR / LLM / RAG / TTS │
│ Voice Clone / MCP / Agent │
└─────────────▲────────────┘
│ WebSocket / HTTPS
│
┌──────────────┐ GPIO ┌───────┴────────┐ SPI ┌──────────────┐
│ 四路触控感应 │─────────▶│ │────────▶│ 0.71/1.28双目屏│
└──────────────┘ │ │ └──────────────┘
┌──────────────┐ I2C │ ESP32-S3 │ GPIO ┌──────────────┐
│ 三轴IMU │─────────▶│ │────────▶│ 震动马达 │
└──────────────┘ │ │ └──────────────┘
┌──────────────┐ I2S │ │ I2S ┌──────────────┐
│ 麦克风 │─────────▶│ │────────▶│ Codec/功放/喇叭│
└──────────────┘ └────────────────┘ └──────────────┘
│
Wi-Fi / 4G
3. 工程目录设计
main
├── app_main.c
├── board
│ ├── board_config.h
│ └── pin_config.h
├── core
│ ├── ai_event.c
│ ├── ai_event.h
│ ├── ai_state.c
│ └── ai_state.h
├── eye
│ ├── eye_display.c
│ ├── eye_display.h
│ ├── eye_anim.c
│ └── eye_assets.h
├── touch
│ ├── touch_input.c
│ └── touch_input.h
├── imu
│ ├── imu_sensor.c
│ ├── imu_sensor.h
│ ├── gesture_detect.c
│ └── gesture_detect.h
├── motor
│ ├── vibration.c
│ └── vibration.h
├── audio
│ ├── audio_recorder.c
│ ├── audio_player.c
│ └── tts_stream.c
├── cloud
│ ├── ai_ws_client.c
│ ├── ai_protocol.c
│ └── ai_protocol.h
└── storage
├── ai_config.c
└── ai_config.h
建议软件架构采用事件驱动:
触摸 / 姿态 / 语音 / 云端消息
↓
统一转成 AI 事件
↓
进入 AI 主状态机
↓
驱动双目屏、震动、音频、云端请求
4. ESP-IDF 工程配置
idf.py set-target esp32s3
idf.py menuconfig
建议开启:
CONFIG_SPIRAM=y
CONFIG_FREERTOS_HZ=1000
CONFIG_ESP_WIFI_ENABLED=y
CONFIG_ESP_HTTP_CLIENT_ENABLE_HTTPS=y
CONFIG_MBEDTLS_SSL_IN_CONTENT_LEN=16384
CONFIG_MBEDTLS_SSL_OUT_CONTENT_LEN=4096
CONFIG_LWIP_TCP_SND_BUF_DEFAULT=16384
CONFIG_LWIP_TCP_WND_DEFAULT=32768
CMakeLists.txt 示例:
idf_component_register(
SRCS
"app_main.c"
"core/ai_event.c"
"core/ai_state.c"
"eye/eye_display.c"
"eye/eye_anim.c"
"touch/touch_input.c"
"imu/imu_sensor.c"
"imu/gesture_detect.c"
"motor/vibration.c"
"audio/audio_player.c"
"cloud/ai_protocol.c"
"storage/ai_config.c"
INCLUDE_DIRS
"."
"core"
"eye"
"touch"
"imu"
"motor"
"audio"
"cloud"
"storage"
REQUIRES
driver
esp_timer
esp_wifi
nvs_flash
json
)
5. 全局事件总线
5.1 事件定义
#pragma once
#include "esp_err.h"
#include "freertos/FreeRTOS.h"
#include "freertos/queue.h"
typedef enum {
AI_EVT_NONE = 0,
AI_EVT_BOOT,
AI_EVT_WIFI_CONNECTED,
AI_EVT_WIFI_DISCONNECTED,
AI_EVT_WAKEUP,
AI_EVT_LISTENING,
AI_EVT_THINKING,
AI_EVT_TALKING,
AI_EVT_SLEEP,
AI_EVT_TOUCH_HEAD,
AI_EVT_TOUCH_LEFT,
AI_EVT_TOUCH_RIGHT,
AI_EVT_TOUCH_BASE,
AI_EVT_GESTURE_SHAKE,
AI_EVT_GESTURE_LEFT,
AI_EVT_GESTURE_RIGHT,
AI_EVT_GESTURE_PICKUP,
AI_EVT_GESTURE_FLIP,
AI_EVT_CLOUD_ACTION,
AI_EVT_ERROR,
} ai_event_id_t;
typedef struct {
ai_event_id_t id;
char text[256];
char action_json[768];
} ai_event_msg_t;
esp_err_t ai_event_init(void);
esp_err_t ai_event_post(ai_event_id_t id, const char *text);
esp_err_t ai_event_post_action(const char *json);
QueueHandle_t ai_event_get_queue(void);
5.2 队列实现
#include "ai_event.h"
#include <string.h>
#include <stdio.h>
static QueueHandle_t s_ai_event_queue = NULL;
esp_err_t ai_event_init(void)
{
s_ai_event_queue = xQueueCreate(16, sizeof(ai_event_msg_t));
return s_ai_event_queue ? ESP_OK : ESP_ERR_NO_MEM;
}
QueueHandle_t ai_event_get_queue(void)
{
return s_ai_event_queue;
}
esp_err_t ai_event_post(ai_event_id_t id, const char *text)
{
if (!s_ai_event_queue) {
return ESP_ERR_INVALID_STATE;
}
ai_event_msg_t msg = {
.id = id,
};
if (text) {
snprintf(msg.text, sizeof(msg.text), "%s", text);
}
return xQueueSend(s_ai_event_queue, &msg, pdMS_TO_TICKS(50)) == pdTRUE
? ESP_OK
: ESP_FAIL;
}
esp_err_t ai_event_post_action(const char *json)
{
if (!s_ai_event_queue || !json) {
return ESP_ERR_INVALID_ARG;
}
ai_event_msg_t msg = {
.id = AI_EVT_CLOUD_ACTION,
};
snprintf(msg.action_json, sizeof(msg.action_json), "%s", json);
return xQueueSend(s_ai_event_queue, &msg, pdMS_TO_TICKS(50)) == pdTRUE
? ESP_OK
: ESP_FAIL;
}
6. 四路触控输入
四路触控建议定义为:
Touch 1:头部触摸,唤醒 / 安抚 / 暂停
Touch 2:左侧触摸,上一条 / 左切换
Touch 3:右侧触摸,下一条 / 右切换
Touch 4:底部触摸,模式切换 / 配网
6.1 GPIO 定义
#define TOUCH_HEAD_GPIO 3
#define TOUCH_LEFT_GPIO 4
#define TOUCH_RIGHT_GPIO 5
#define TOUCH_BASE_GPIO 6
#define TOUCH_DEBOUNCE_MS 180
6.2 触控驱动
#include "driver/gpio.h"
#include "esp_timer.h"
#include "ai_event.h"
static int64_t s_last_touch_ms[4];
static int touch_gpio_to_index(int gpio)
{
switch (gpio) {
case TOUCH_HEAD_GPIO: return 0;
case TOUCH_LEFT_GPIO: return 1;
case TOUCH_RIGHT_GPIO: return 2;
case TOUCH_BASE_GPIO: return 3;
default: return -1;
}
}
static ai_event_id_t touch_gpio_to_event(int gpio)
{
switch (gpio) {
case TOUCH_HEAD_GPIO: return AI_EVT_TOUCH_HEAD;
case TOUCH_LEFT_GPIO: return AI_EVT_TOUCH_LEFT;
case TOUCH_RIGHT_GPIO: return AI_EVT_TOUCH_RIGHT;
case TOUCH_BASE_GPIO: return AI_EVT_TOUCH_BASE;
default: return AI_EVT_NONE;
}
}
static void IRAM_ATTR touch_isr_handler(void *arg)
{
int gpio = (int)arg;
int index = touch_gpio_to_index(gpio);
if (index < 0) {
return;
}
int64_t now_ms = esp_timer_get_time() / 1000;
if (now_ms - s_last_touch_ms[index] < TOUCH_DEBOUNCE_MS) {
return;
}
s_last_touch_ms[index] = now_ms;
ai_event_msg_t msg = {
.id = touch_gpio_to_event(gpio),
};
BaseType_t high_task_wakeup = pdFALSE;
xQueueSendFromISR(ai_event_get_queue(), &msg, &high_task_wakeup);
if (high_task_wakeup) {
portYIELD_FROM_ISR();
}
}
void touch_input_init(void)
{
uint64_t mask =
(1ULL << TOUCH_HEAD_GPIO) |
(1ULL << TOUCH_LEFT_GPIO) |
(1ULL << TOUCH_RIGHT_GPIO) |
(1ULL << TOUCH_BASE_GPIO);
gpio_config_t cfg = {
.pin_bit_mask = mask,
.mode = GPIO_MODE_INPUT,
.pull_up_en = GPIO_PULLUP_ENABLE,
.pull_down_en = GPIO_PULLDOWN_DISABLE,
.intr_type = GPIO_INTR_NEGEDGE,
};
gpio_config(&cfg);
gpio_install_isr_service(0);
gpio_isr_handler_add(TOUCH_HEAD_GPIO, touch_isr_handler, (void *)TOUCH_HEAD_GPIO);
gpio_isr_handler_add(TOUCH_LEFT_GPIO, touch_isr_handler, (void *)TOUCH_LEFT_GPIO);
gpio_isr_handler_add(TOUCH_RIGHT_GPIO, touch_isr_handler, (void *)TOUCH_RIGHT_GPIO);
gpio_isr_handler_add(TOUCH_BASE_GPIO, touch_isr_handler, (void *)TOUCH_BASE_GPIO);
}
7. 震动马达驱动
震动马达主要用于触摸确认、唤醒成功、配网提示、异常提醒。
#pragma once
#include <stdint.h>
#include <stdbool.h>
typedef enum {
VIB_MODE_SHORT = 0,
VIB_MODE_DOUBLE,
VIB_MODE_LONG,
VIB_MODE_ERROR,
} vib_mode_t;
void vibration_init(void);
void vibration_play(vib_mode_t mode);
#include "vibration.h"
#include "driver/gpio.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#define VIBRATION_GPIO 40
static void vibration_set(bool on)
{
gpio_set_level(VIBRATION_GPIO, on ? 1 : 0);
}
static void vibration_pulse(uint32_t ms)
{
vibration_set(true);
vTaskDelay(pdMS_TO_TICKS(ms));
vibration_set(false);
}
void vibration_init(void)
{
gpio_config_t cfg = {
.pin_bit_mask = 1ULL << VIBRATION_GPIO,
.mode = GPIO_MODE_OUTPUT,
.pull_up_en = GPIO_PULLUP_DISABLE,
.pull_down_en = GPIO_PULLDOWN_DISABLE,
.intr_type = GPIO_INTR_DISABLE,
};
gpio_config(&cfg);
vibration_set(false);
}
void vibration_play(vib_mode_t mode)
{
switch (mode) {
case VIB_MODE_SHORT:
vibration_pulse(120);
break;
case VIB_MODE_DOUBLE:
vibration_pulse(80);
vTaskDelay(pdMS_TO_TICKS(100));
vibration_pulse(80);
break;
case VIB_MODE_LONG:
vibration_pulse(500);
break;
case VIB_MODE_ERROR:
for (int i = 0; i < 3; i++) {
vibration_pulse(60);
vTaskDelay(pdMS_TO_TICKS(80));
}
break;
default:
break;
}
}
8. 三轴姿态识别
三轴传感器可以支持:
拿起:自动唤醒
摇一摇:切换内容 / 换故事
左倾:上一条
右倾:下一条
翻转:进入休眠
轻敲:触发陪伴语音
8.1 IMU 数据结构
#pragma once
#include "esp_err.h"
typedef struct {
float ax;
float ay;
float az;
float gx;
float gy;
float gz;
} imu_data_t;
esp_err_t imu_sensor_init(void);
esp_err_t imu_sensor_read(imu_data_t *out);
8.2 姿态识别算法
#include <math.h>
#include "imu_sensor.h"
typedef enum {
GESTURE_NONE = 0,
GESTURE_SHAKE,
GESTURE_TILT_LEFT,
GESTURE_TILT_RIGHT,
GESTURE_PICKUP,
GESTURE_FLIP,
} gesture_type_t;
gesture_type_t gesture_detect(const imu_data_t *imu)
{
if (!imu) {
return GESTURE_NONE;
}
float ax = imu->ax;
float ay = imu->ay;
float az = imu->az;
float abs_x = fabsf(ax);
float abs_y = fabsf(ay);
float abs_z = fabsf(az);
if (abs_x > 1.8f || abs_y > 1.8f || abs_z > 2.2f) {
return GESTURE_SHAKE;
}
if (ax > 0.75f) {
return GESTURE_TILT_RIGHT;
}
if (ax < -0.75f) {
return GESTURE_TILT_LEFT;
}
if (az < -0.65f) {
return GESTURE_FLIP;
}
if (abs_z < 0.4f && (abs_x > 0.3f || abs_y > 0.3f)) {
return GESTURE_PICKUP;
}
return GESTURE_NONE;
}
8.3 IMU 任务
static void imu_task(void *arg)
{
imu_data_t data;
gesture_type_t last_gesture = GESTURE_NONE;
int stable_count = 0;
while (1) {
if (imu_sensor_read(&data) == ESP_OK) {
gesture_type_t g = gesture_detect(&data);
if (g != GESTURE_NONE && g != last_gesture) {
stable_count++;
if (stable_count >= 2) {
switch (g) {
case GESTURE_SHAKE:
ai_event_post(AI_EVT_GESTURE_SHAKE, NULL);
break;
case GESTURE_TILT_LEFT:
ai_event_post(AI_EVT_GESTURE_LEFT, NULL);
break;
case GESTURE_TILT_RIGHT:
ai_event_post(AI_EVT_GESTURE_RIGHT, NULL);
break;
case GESTURE_PICKUP:
ai_event_post(AI_EVT_GESTURE_PICKUP, NULL);
break;
case GESTURE_FLIP:
ai_event_post(AI_EVT_GESTURE_FLIP, NULL);
break;
default:
break;
}
last_gesture = g;
stable_count = 0;
}
} else if (g == GESTURE_NONE) {
last_gesture = GESTURE_NONE;
stable_count = 0;
}
}
vTaskDelay(pdMS_TO_TICKS(50));
}
}
9. 双目光屏状态机
四博资料中提到,1.28 寸屏幕分辨率为 240×240,0.71 寸屏幕分辨率为 160×160;双目样式也可通过替换 defaultEye_1.28.h 或 defaultEye_0.71.h 素材数组进行定制。
9.1 状态定义
typedef enum {
EYE_STATE_IDLE = 0,
EYE_STATE_WAKEUP,
EYE_STATE_LISTENING,
EYE_STATE_THINKING,
EYE_STATE_TALKING,
EYE_STATE_SMILE,
EYE_STATE_SURPRISE,
EYE_STATE_SLEEP,
EYE_STATE_ERROR,
} eye_state_t;
9.2 状态切换
void eye_set_state(eye_state_t state)
{
switch (state) {
case EYE_STATE_IDLE:
eye_show_idle();
break;
case EYE_STATE_WAKEUP:
eye_show_wakeup();
break;
case EYE_STATE_LISTENING:
eye_show_listening();
break;
case EYE_STATE_THINKING:
eye_show_thinking();
break;
case EYE_STATE_TALKING:
eye_show_talking();
break;
case EYE_STATE_SMILE:
eye_show_smile();
break;
case EYE_STATE_SURPRISE:
eye_show_surprise();
break;
case EYE_STATE_SLEEP:
eye_show_sleep();
break;
case EYE_STATE_ERROR:
eye_show_error();
break;
default:
eye_show_idle();
break;
}
}
9.3 TTS 音频驱动眼睛动画
void eye_update_by_audio_level(int level)
{
if (level < 10) {
eye_show_talking_frame(0);
} else if (level < 30) {
eye_show_talking_frame(1);
} else if (level < 60) {
eye_show_talking_frame(2);
} else {
eye_show_talking_frame(3);
}
}
void audio_play_callback(const int16_t *pcm, size_t samples)
{
int64_t sum = 0;
for (size_t i = 0; i < samples; i++) {
sum += abs(pcm[i]);
}
int level = sum / samples / 256;
eye_update_by_audio_level(level);
}
10. 小程序配置、语音克隆与知识库
10.1 配置结构
typedef struct {
char device_id[32];
char user_id[32];
char kb_id[64];
char voice_id[64];
char wake_word[32];
uint8_t child_mode;
uint8_t volume;
} ai_user_config_t;
10.2 NVS 保存配置
#include "nvs_flash.h"
#include "nvs.h"
esp_err_t ai_config_save(const ai_user_config_t *cfg)
{
if (!cfg) {
return ESP_ERR_INVALID_ARG;
}
nvs_handle_t handle;
esp_err_t ret = nvs_open("ai_config", NVS_READWRITE, &handle);
if (ret != ESP_OK) {
return ret;
}
ret = nvs_set_blob(handle, "user_cfg", cfg, sizeof(ai_user_config_t));
if (ret == ESP_OK) {
ret = nvs_commit(handle);
}
nvs_close(handle);
return ret;
}
10.3 读取配置
esp_err_t ai_config_load(ai_user_config_t *cfg)
{
if (!cfg) {
return ESP_ERR_INVALID_ARG;
}
memset(cfg, 0, sizeof(ai_user_config_t));
nvs_handle_t handle;
esp_err_t ret = nvs_open("ai_config", NVS_READONLY, &handle);
if (ret != ESP_OK) {
return ret;
}
size_t size = sizeof(ai_user_config_t);
ret = nvs_get_blob(handle, "user_cfg", cfg, &size);
nvs_close(handle);
return ret;
}
11. 云端协议设计
11.1 设备事件上报
{
"type": "device_event",
"device_id": "sibo_ai_eye_001",
"event": "touch_head",
"payload": {
"touch_id": 1,
"gesture": "none",
"battery": 88
}
}
11.2 聊天请求
{
"type": "chat_request",
"device_id": "sibo_ai_eye_001",
"user_id": "user_001",
"kb_id": "child_learning_kb",
"voice_id": "clone_mom_001",
"text": "给我讲一个关于月亮的故事"
}
11.3 云端动作返回
{
"type": "ai_action",
"answer": "从前有一只小兔子,每天晚上都会看月亮......",
"tts_url": "https://server.com/tts/clone_mom_001_001.opus",
"action": {
"eye": "talking",
"vibration": "none",
"display": "story"
}
}
12. 构造聊天请求
#include "cJSON.h"
void ai_send_chat_request(const char *text)
{
if (!text) {
return;
}
ai_user_config_t cfg;
if (ai_config_load(&cfg) != ESP_OK) {
snprintf(cfg.device_id, sizeof(cfg.device_id), "sibo_ai_eye_default");
snprintf(cfg.user_id, sizeof(cfg.user_id), "default_user");
snprintf(cfg.kb_id, sizeof(cfg.kb_id), "default_kb");
snprintf(cfg.voice_id, sizeof(cfg.voice_id), "default_voice");
}
cJSON *root = cJSON_CreateObject();
cJSON_AddStringToObject(root, "type", "chat_request");
cJSON_AddStringToObject(root, "device_id", cfg.device_id);
cJSON_AddStringToObject(root, "user_id", cfg.user_id);
cJSON_AddStringToObject(root, "kb_id", cfg.kb_id);
cJSON_AddStringToObject(root, "voice_id", cfg.voice_id);
cJSON_AddStringToObject(root, "text", text);
char *json = cJSON_PrintUnformatted(root);
if (json) {
ai_ws_send(json);
free(json);
}
cJSON_Delete(root);
}
13. 解析云端动作
#include "cJSON.h"
#include "eye_display.h"
#include "vibration.h"
static void apply_eye_action(const char *eye)
{
if (!eye) {
return;
}
if (strcmp(eye, "idle") == 0) {
eye_set_state(EYE_STATE_IDLE);
} else if (strcmp(eye, "smile") == 0) {
eye_set_state(EYE_STATE_SMILE);
} else if (strcmp(eye, "thinking") == 0) {
eye_set_state(EYE_STATE_THINKING);
} else if (strcmp(eye, "talking") == 0) {
eye_set_state(EYE_STATE_TALKING);
} else if (strcmp(eye, "sleep") == 0) {
eye_set_state(EYE_STATE_SLEEP);
} else {
eye_set_state(EYE_STATE_IDLE);
}
}
static void apply_vibration_action(const char *vib)
{
if (!vib) {
return;
}
if (strcmp(vib, "short") == 0) {
vibration_play(VIB_MODE_SHORT);
} else if (strcmp(vib, "double") == 0) {
vibration_play(VIB_MODE_DOUBLE);
} else if (strcmp(vib, "long") == 0) {
vibration_play(VIB_MODE_LONG);
}
}
void ai_apply_cloud_action(const char *json)
{
cJSON *root = cJSON_Parse(json);
if (!root) {
return;
}
cJSON *action = cJSON_GetObjectItem(root, "action");
if (!cJSON_IsObject(action)) {
cJSON_Delete(root);
return;
}
cJSON *eye = cJSON_GetObjectItem(action, "eye");
cJSON *vib = cJSON_GetObjectItem(action, "vibration");
if (cJSON_IsString(eye)) {
apply_eye_action(eye->valuestring);
}
if (cJSON_IsString(vib)) {
apply_vibration_action(vib->valuestring);
}
cJSON_Delete(root);
}
14. 主状态机
static void ai_main_task(void *arg)
{
ai_event_msg_t msg;
while (1) {
if (xQueueReceive(ai_event_get_queue(), &msg, portMAX_DELAY) == pdTRUE) {
switch (msg.id) {
case AI_EVT_BOOT:
eye_set_state(EYE_STATE_IDLE);
break;
case AI_EVT_WAKEUP:
eye_set_state(EYE_STATE_WAKEUP);
vibration_play(VIB_MODE_SHORT);
break;
case AI_EVT_LISTENING:
eye_set_state(EYE_STATE_LISTENING);
break;
case AI_EVT_THINKING:
eye_set_state(EYE_STATE_THINKING);
break;
case AI_EVT_TALKING:
eye_set_state(EYE_STATE_TALKING);
break;
case AI_EVT_TOUCH_HEAD:
eye_set_state(EYE_STATE_SMILE);
vibration_play(VIB_MODE_SHORT);
ai_send_device_event("touch_head");
break;
case AI_EVT_TOUCH_LEFT:
eye_set_state(EYE_STATE_SMILE);
ai_send_device_event("touch_left");
break;
case AI_EVT_TOUCH_RIGHT:
eye_set_state(EYE_STATE_SMILE);
ai_send_device_event("touch_right");
break;
case AI_EVT_TOUCH_BASE:
eye_set_state(EYE_STATE_THINKING);
vibration_play(VIB_MODE_LONG);
ai_send_device_event("touch_mode");
break;
case AI_EVT_GESTURE_SHAKE:
eye_set_state(EYE_STATE_SURPRISE);
vibration_play(VIB_MODE_DOUBLE);
ai_send_device_event("gesture_shake");
break;
case AI_EVT_GESTURE_LEFT:
ai_send_device_event("gesture_left");
break;
case AI_EVT_GESTURE_RIGHT:
ai_send_device_event("gesture_right");
break;
case AI_EVT_GESTURE_PICKUP:
eye_set_state(EYE_STATE_WAKEUP);
vibration_play(VIB_MODE_SHORT);
ai_send_device_event("gesture_pickup");
break;
case AI_EVT_GESTURE_FLIP:
eye_set_state(EYE_STATE_SLEEP);
ai_send_device_event("gesture_flip");
break;
case AI_EVT_CLOUD_ACTION:
ai_apply_cloud_action(msg.action_json);
break;
case AI_EVT_ERROR:
eye_set_state(EYE_STATE_ERROR);
vibration_play(VIB_MODE_ERROR);
break;
default:
break;
}
}
}
}
15. app_main 初始化
void app_main(void)
{
ESP_ERROR_CHECK(nvs_flash_init());
ESP_ERROR_CHECK(ai_event_init());
wifi_manager_init();
blufi_config_init();
eye_display_init();
touch_input_init();
vibration_init();
imu_sensor_init();
audio_player_init();
ai_ws_client_init();
xTaskCreate(ai_main_task, "ai_main_task", 8192, NULL, 5, NULL);
xTaskCreate(imu_task, "imu_task", 4096, NULL, 4, NULL);
ai_event_post(AI_EVT_BOOT, NULL);
}
16. 云端 FastAPI 示例
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
class ChatRequest(BaseModel):
type: str
device_id: str
user_id: str = ""
kb_id: str = "default_kb"
voice_id: str = "default_voice"
text: str
def rag_search(kb_id: str, query: str) -> str:
return f"从知识库 {kb_id} 检索到的相关内容"
def call_llm(context: str, query: str) -> str:
prompt = f"""
你是四博AI双目智能音箱中的陪伴助手。
请结合知识库内容回答用户问题。
知识库内容:
{context}
用户问题:
{query}
"""
return "这是结合专属知识库生成的回答内容。"
def call_tts(text: str, voice_id: str) -> str:
return f"https://server.com/tts/{voice_id}/audio.opus"
@app.post("/api/v1/chat")
async def chat(req: ChatRequest):
context = rag_search(req.kb_id, req.text)
answer = call_llm(context, req.text)
tts_url = call_tts(answer, req.voice_id)
return {
"type": "ai_action",
"answer": answer,
"tts_url": tts_url,
"action": {
"eye": "talking",
"vibration": "none"
}
}
17. 产品化建议
17.1 触控调优
1. 四路触控必须做防抖。
2. 长按、短按、双击要分开识别。
3. 触摸后 100ms 内给震动反馈。
4. 外壳厚度会影响触摸灵敏度。
5. 量产时需要针对不同外壳做阈值校准。
17.2 姿态调优
1. IMU 建议 20Hz~50Hz 采样。
2. 摇晃识别需要时间窗口。
3. 倾斜识别需要滤波。
4. 翻转休眠建议延迟确认。
5. 拿起唤醒需要避免运输过程误触发。
17.3 双目屏调优
1. 待机动画建议低帧率运行,降低功耗。
2. TTS 播放时可根据音频能量驱动眼睛动画。
3. 配网、联网、OTA、错误状态都应有专属表情。
4. 0.71 和 1.28 寸屏幕需要分别适配素材比例。
5. 眼睛素材建议放 Flash,运行缓冲建议放 PSRAM。
18. 总结
四博 AI 双目智能音箱方案的核心价值,是把传统 AI 音箱升级成一个完整的多模态交互平台。
它具备:
语音输入
四路触控
三轴姿态感应
双目屏表情
震动反馈
小程序配置
语音克隆
专属知识库
大模型对话
一句话概括:
四博 AI 双目方案,是一套基于 ESP32-S3 的高性价比 AI 智能音箱平台,通过四路触控、震动马达、三轴姿态感应、0.71/1.28