如何写一个WebRTC ACE音频应用处理模块

编写源代码：

复制代码

// 标准输入输出头文件，提供printf等函数
#include <stdio.h>
// 文件控制操作头文件，提供open、fcntl等函数
#include <fcntl.h>
// UNIX标准函数头文件，提供read、write、close等函数
#include <unistd.h>
// 文件状态头文件，提供stat、fstat等函数
#include <sys/stat.h>
// 基本系统数据类型头文件
#include <sys/types.h>
// WebRTC音频处理主头文件
#include "audio_processing.h"
// WebRTC通用类型定义头文件
#include <module_common_types.h>
// 本地WebRTC AEC头文件
#include "webrtc_aec.h"

// 功能宏定义，启用各个音频处理模块
#define WEBRTC_AEC  // 启用回声消除
#define WEBRTC_HP   // 启用高通滤波
#define WEBRTC_LE   // 启用电平估计
#define WEBRTC_NS   // 启用噪声抑制
#define WEBRTC_VAD  // 启用语音活动检测

/* WebRTC支持参数说明：
 * 采样率: 8KHZ 和 16KHZ
 * 声道数: 1 (单声道)
 * 位深度: 16 bit
 * 帧时长: 10 ms
 * */

// 全局变量存储音频参数 - 使用单例模式思想管理全局状态
static unsigned int webrtc_sample_rate = 0;      // 采样率
static unsigned int webrtc_sample_bits = 0;      // 位深度
static unsigned int webrtc_sample_time_ms = 0;   // 帧时长(毫秒)
static unsigned int webrtc_sample_channel = 0;   // 声道数

using namespace webrtc;  // 使用WebRTC命名空间

// AEC设备参数结构体 - 封装帧相关参数
typedef struct {
    unsigned int frame_len;  // 帧长度(字节数)
    int time;                // 时间参数
} AEC_DEV;
AEC_DEV webrtc_dev;  // 全局AEC设备实例

bool webrtc_enable = false;  // WebRTC使能标志，控制模块状态

// WebRTC音频处理核心对象指针 - 使用工厂模式创建
AudioProcessing *webrtc_apm = NULL;        // 音频处理管理器
AudioFrame *webrtc_far_frame = NULL;       // 远端帧(扬声器数据)
AudioFrame *webrtc_near_frame = NULL;      // 近端帧(麦克风数据)

/**
 * 初始化WebRTC音频处理模块
 * 功能: 配置音频参数，创建处理对象，启用各个处理模块
 * 性能: 一次性初始化，运行时开销小，但创建对象较耗时
 * 设计模式: 工厂模式 + 单例模式思想
 * 
 * @param rate 采样率指针(支持8000/16000Hz)
 * @param channel 声道数指针(强制为1)
 * @param bits 位深度指针(强制为16bit)
 * @param time 帧时长(毫秒)
 * @return 成功返回0，失败返回-1
 */
int tang_apm_init(unsigned int *rate, unsigned int *channel, unsigned int *bits, unsigned int time)
{
    int32_t sample_rate_hz = 0;                    // 采样率(Hz)
    int num_render_channels = 0;                   // 渲染声道数
    int num_capture_input_channels = 0;            // 采集输入声道数
    int num_capture_output_channels = 0;           // 采集输出声道数

    printf("webrtc aec ingenic_apm_init !\n");
    
    // 单例检查：防止重复初始化
    if (webrtc_enable == true) {
        printf("webrtc aec has initted!\n");
        return 0;
    }

    // 参数验证和强制转换 - 策略模式：参数适配
    if ((*rate != 8000) && (*rate != 16000)) {
        printf("Error: webrtc not support this rate: %d, force to 8KHZ\n", *rate);
        *rate = 8000;  // 不支持的采样率强制设为8KHz
    }

    if (*channel != 1) {
        printf("Error: webrtc not support this channel: %d, force to 1 channel !!\n", *channel);
        *channel = 1;  // 强制单声道
    }
    
    if (*bits != 16) {
        printf("Error: webrtc not support this bits: %d, force to 16 bits\n", *bits);
        *bits = 16;  // 强制16bit
    }

    // 参数赋值 - 数据封装
    webrtc_sample_rate = sample_rate_hz = *rate;
    webrtc_sample_channel = num_capture_output_channels = num_capture_input_channels = num_render_channels = *channel;
    webrtc_sample_bits = *bits;
    webrtc_sample_time_ms = time;

    // 创建远端音频帧对象 - 工厂方法
    webrtc_far_frame = new AudioFrame();
    if(!webrtc_far_frame) {
        printf("webrtc_far_frame new erro\n");
        return -1;
    }

    // 创建近端音频帧对象 - 工厂方法
    webrtc_near_frame = new AudioFrame();
    if(!webrtc_far_frame) {  // 注意：这里应该是检查webrtc_near_frame
        printf("webrtc_near_frame new erro\n");
        delete webrtc_far_frame;  // 资源清理
        return -1;
    }

    // 计算帧长度：采样率 × 时间(秒) × 声道数 × 字节数 per sample
    webrtc_dev.frame_len = webrtc_sample_rate * webrtc_sample_time_ms *
        webrtc_sample_channel * (webrtc_sample_bits / 8) / 1000;
    webrtc_dev.time = 1;

    // 创建音频处理管理器 - 工厂模式
    webrtc_apm = AudioProcessing::Create(0);
    if(webrtc_apm == NULL) {
        printf("AudioProcessing::Create() error !\n");
        delete webrtc_near_frame;
        delete webrtc_far_frame;
        return -1;
    }

    // 配置基础音频参数
    webrtc_apm->set_sample_rate_hz(sample_rate_hz);                           // 设置采样率
    webrtc_apm->set_num_reverse_channels(num_render_channels);                // 设置反向声道数
    webrtc_apm->set_num_channels(num_capture_input_channels, num_capture_output_channels);  // 设置输入输出声道数

// 配置各个音频处理模块 - 装饰器模式：动态添加功能
#ifdef WEBRTC_AEC
    // 回声消除配置
    webrtc_apm->echo_cancellation()->Enable(true);                           // 启用AEC
    webrtc_apm->echo_cancellation()->enable_metrics(true);                   // 启用指标计算
    webrtc_apm->echo_cancellation()->enable_delay_logging(true);             // 启用延迟日志
    webrtc_apm->echo_cancellation()->enable_drift_compensation(false);       // 禁用漂移补偿
    webrtc_apm->echo_cancellation()->set_suppression_level(EchoCancellation::kLowSuppression);  // 设置抑制级别
#endif

#ifdef WEBRTC_HP
    // 高通滤波器配置
    webrtc_apm->high_pass_filter()->Enable(true);  // 启用高通滤波
#endif

#ifdef WEBRTC_LE
    // 电平估计配置
    webrtc_apm->level_estimator()->Enable(true);   // 启用电平估计
#endif

#ifdef WEBRTC_NS
    // 噪声抑制配置
    webrtc_apm->noise_suppression()->Enable(true);                           // 启用噪声抑制
    webrtc_apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh); // 设置抑制级别为最高
#endif

#ifdef WEBRTC_VAD
    // 语音活动检测配置
    webrtc_apm->voice_detection()->Enable(true);                            // 启用VAD
    webrtc_apm->voice_detection()->set_frame_size_ms(10);                   // 设置帧大小为10ms
    webrtc_apm->voice_detection()->set_likelihood(VoiceDetection::kLowLikelihood);  // 设置检测灵敏度
#endif

    webrtc_apm->Initialize();  // 初始化音频处理器

    webrtc_enable = true;  // 设置使能标志

    return 0;
}

/**
 * 调试信息输出函数
 * 功能: 打印当前音频处理器的所有配置状态
 * 性能: 仅用于调试，生产环境应禁用
 * 设计模式: 访问者模式 - 遍历访问各个处理器状态
 */
void dump()
{
    int ret;
    
    // 获取基础配置信息
    int sample_rate_hz_dump = webrtc_apm->sample_rate_hz();
    int num_input_channels_dump = webrtc_apm->num_input_channels();
    int num_output_channels_dump = webrtc_apm->num_output_channels();
    int num_reverse_channels_dump = webrtc_apm->num_reverse_channels();
    int stream_delay_ms_dump = webrtc_apm->stream_delay_ms();
    
    printf("sample rate : %d\n", sample_rate_hz_dump);
    printf("num_input_channels : %d\n", num_input_channels_dump);
    printf("num_output_channels : %d\n", num_output_channels_dump);
    printf("num_reverse_channels : %d\n", num_reverse_channels_dump);
    printf("stream_delay_ms : %d\n", stream_delay_ms_dump);

    /* AEC状态输出 */
    ret = webrtc_apm->echo_cancellation()->is_enabled();
    if(ret) {
        printf("AEC enable !\n");
        ret = webrtc_apm->echo_cancellation()->is_drift_compensation_enabled();
        if(ret) {
            printf("\t\tenable_drift_compensation");
            ret = webrtc_apm->echo_cancellation()->device_sample_rate_hz();
            printf("\t\t\tdevice_sample_rate_hz : %d\n", ret);
            ret = webrtc_apm->echo_cancellation()->stream_drift_samples();
            printf("\t\t\tstream_drift_samples : %d\n", ret);
        }

        ret = webrtc_apm->echo_cancellation()->suppression_level();
        printf("\t\tsuppression_level : %d\n", ret);

        ret = webrtc_apm->echo_cancellation()->are_metrics_enabled();
        if(ret) {
            printf("\t\tenable_metrics\n");
        }

        ret = webrtc_apm->echo_cancellation()->is_delay_logging_enabled();
        if(ret) {
            printf("\t\tenable_delay_logging\n");
        }
    }

    /* 高通滤波器状态输出 */
    ret = webrtc_apm->high_pass_filter()->is_enabled();
    if(ret)
        printf("HighPassFilter is enabled\n");

    /* 电平估计状态输出 */
    ret = webrtc_apm->level_estimator()->is_enabled();
    if(ret) {
        printf("LevelEstimator is enable\n");
    }

    /* 噪声抑制状态输出 */
    ret = webrtc_apm->noise_suppression()->is_enabled();
    if(ret) {
        printf("NoiseSuppression is enabled !\n");
        ret = webrtc_apm->noise_suppression()->level();
        printf("\t\tNoiseSuppression : %d\n", ret);
    }

    /* 语音活动检测状态输出 */
    ret = webrtc_apm->voice_detection()->is_enabled();
    if(ret) {
        printf("voice activity detection is enable !\n");

        ret = webrtc_apm->voice_detection()->likelihood();
        printf("\t\tlikelihood : %d\n", ret);

        ret = webrtc_apm->voice_detection()->frame_size_ms();
        printf("\t\tframe size per ms : %d\n", ret);
    }
}

/**
 * 设置远端帧数据(扬声器输出)
 * 功能: 将扬声器数据送入AEC作为参考信号
 * 性能: 实时处理，每帧调用一次，计算复杂度中等
 * 设计模式: 命令模式 - 封装数据处理命令
 * 
 * @param buf 扬声器数据缓冲区
 * @return 成功返回0，失败返回-1
 */
int tang_apm_set_far_frame(short *buf)
{
    int i, ret;
    
    // 配置远端帧参数
    webrtc_far_frame->_audioChannel = 1;  // 单声道
    webrtc_far_frame->_frequencyInHz = webrtc_sample_rate;  // 采样率
    webrtc_far_frame->_payloadDataLengthInSamples = webrtc_far_frame->_frequencyInHz/100;  // 每帧样本数

    // 数据预处理：左移1位(可能为了放大信号)
    for(i=0; i<webrtc_far_frame->_payloadDataLengthInSamples; i++)
        webrtc_far_frame->_payloadData[i] = buf[i] << 1;

    // 分析反向流(远端信号)
    ret = webrtc_apm->AnalyzeReverseStream(webrtc_far_frame);
    if(ret < 0) {
        printf("AnalyzeReverseStream() error : %d\n", ret);
        return -1;
    }

    return 0;
}

/**
 * 设置近端帧数据(麦克风输入)并处理
 * 功能: 处理麦克风输入，应用AEC、NS、VAD等效果
 * 性能: 实时处理，计算密集型，性能关键路径
 * 设计模式: 模板方法模式 - 定义处理流程
 * 
 * @param input 麦克风输入数据
 * @param output 处理后的输出数据
 * @param time 时间参数
 * @return 成功返回0，失败返回-1
 */
int tang_apm_set_near_frame(short *input, short *output, int time)
{
    int i, ret;
    
    // 配置近端帧参数
    webrtc_near_frame->_audioChannel = 1;  // 单声道
    webrtc_near_frame->_frequencyInHz = webrtc_sample_rate;  // 采样率
    webrtc_near_frame->_payloadDataLengthInSamples = webrtc_near_frame->_frequencyInHz/100;  // 每帧样本数

    // 拷贝输入数据到近端帧
    for(i = 0; i < webrtc_near_frame->_payloadDataLengthInSamples; i++)
        webrtc_near_frame->_payloadData[i] = input[i];

    webrtc_apm->set_stream_delay_ms(2);  // 设置流延迟为2ms

    // 处理音频流 - 核心处理函数
    ret = webrtc_apm->ProcessStream(webrtc_near_frame);
    if(ret < 0) {
        printf("AnalyzeReverseStream() error : %d\n", ret);  // 注意：错误信息应该是ProcessStream
        return -1;
    }

// VAD后处理：如果检测到无语音，静音输出
#ifdef WEBRTC_VAD
    ret = webrtc_apm->voice_detection()->stream_has_voice();
    if(ret == 0)
        for(i = 0; i < webrtc_near_frame->_payloadDataLengthInSamples; i++)
            webrtc_near_frame->_payloadData[i] = 0;  // 静音处理

#endif

    // 拷贝处理结果到输出缓冲区
    memcpy(output, webrtc_near_frame->_payloadData,
            webrtc_near_frame->_payloadDataLengthInSamples * sizeof(short));
    return 0;
}

/**
 * 销毁WebRTC音频处理模块
 * 功能: 清理所有资源，释放内存
 * 性能: 一次性调用，释放系统资源
 * 设计模式: 资源获取即初始化(RAII)的反向操作
 * 
 * @return 成功返回0
 */
int tang_apm_destroy(void)
{
    // 状态检查
    if (webrtc_enable == false) {
        printf("webrtc aec has initted!\n");  // 注意：应该是"not initted"或"already destroyed"
        return 0;
    }
    
    //dump();  // 调试信息输出(被注释)
    printf("ingenic_apm_destroy!\n");
    
    // 销毁音频处理器 - 工厂模式的销毁方法
    AudioProcessing::Destroy(webrtc_apm);
    webrtc_apm = NULL;
    
    // 释放音频帧内存 - 注意：第二个条件应该是webrtc_far_frame
    if (webrtc_near_frame)
        delete webrtc_near_frame;
    if (webrtc_near_frame)  // BUG：这里应该是webrtc_far_frame
        delete webrtc_far_frame;

    webrtc_enable = false;  // 重置使能标志
    return 0;
}

/**
 * 获取缓冲区长度
 * 功能: 返回每帧音频数据的字节长度
 * 性能: 直接返回预计算值，无计算开销
 * 
 * @return 帧长度(字节数)
 */
int webrtc_aec_get_buffer_length(void)
{
    return webrtc_dev.frame_len;
}

/**
 * WebRTC AEC处理入口函数
 * 功能: 封装完整的AEC处理流程
 * 性能: 实时处理，组合了两个核心处理函数
 * 设计模式: 外观模式 - 简化复杂子系统接口
 * 
 * @param buf_record 录音数据缓冲区
 * @param buf_play 播放数据缓冲区
 * @param buf_result 处理结果缓冲区
 * @param time 时间参数
 */
void webrtc_aec_calculate(void *buf_record, void *buf_play, void *buf_result, unsigned int time)
{
    // 处理流程：先设置远端参考信号，再处理近端信号
    ingenic_apm_set_far_frame((short *)buf_play);      // 设置扬声器数据
    ingenic_apm_set_near_frame((short *)buf_record, (short *)buf_result, time);  // 处理麦克风数据
}

整体分析和改进建议：

设计模式分析：

工厂模式 ：AudioProcessing::Create() 创建处理器
单例模式思想：全局状态管理，防止重复初始化
装饰器模式：动态组合各种音频处理功能
外观模式 ：webrtc_aec_calculate() 提供简化接口
策略模式：参数适配和验证

性能分析：

优点：模块化设计，实时处理能力
缺点：存在内存拷贝，可优化数据流
计算复杂度：中等，适合实时音频处理

发现的问题：

内存释放逻辑有bug（重复检查webrtc_near_frame）
错误信息描述不准确
缺少错误处理和边界检查

改进建议：

修复内存释放bug
增加参数验证和错误处理
考虑使用智能指针管理资源
优化数据拷贝，减少内存操作

Makefile

复制代码

# 获取当前工作目录的绝对路径
DIR_CUR := $(shell pwd)

# 编译器前缀设置 - 修改为支持多种架构
# 默认使用gcc，但可以通过环境变量覆盖
CC ?= gcc
CXX ?= g++
STRIP ?= strip

# 根据架构自动设置编译器标志
# 检测当前架构
UNAME_M := $(shell uname -m)

# 设置架构特定的编译标志
ifeq ($(UNAME_M), armv7l)
    # ARM 32位架构
    CFLAGS += -marm -mfpu=neon -mfloat-abi=hard
    ARCH = arm32
else ifeq ($(UNAME_M), aarch64)
    # ARM 64位架构
    CFLAGS += -march=armv8-a
    ARCH = arm64
else ifeq ($(UNAME_M), x86_64)
    # x86_64架构
    ARCH = x86_64
else
    # 其他架构
    ARCH = unknown
endif

# 安装路径配置
DESTDIR =           # 目标目录，通常用于打包或交叉编译
PREFIX = /usr       # 安装前缀
BINDIR = $(PREFIX)/bin/      # 二进制文件安装目录
LIBDIR = $(PREFIX)/lib/      # 库文件安装目录
INCDIR = $(PREFIX)/include/  # 头文件安装目录

# 编译标志设置
CFLAGS += -I$(DIR_CUR)/include -O3    # 添加头文件路径和优化级别
CFLAGS += -fPIC -g -D_FORTIFY_SOURCE=2 -DBUILDCFG -Wall -Wextra -Wno-unused-parameter #-DBT_ALONE
# -fPIC: 生成位置无关代码，用于共享库
# -g: 包含调试信息
# -D_FORTIFY_SOURCE=2: 加强缓冲区溢出保护
# -DBUILDCFG: 自定义构建配置宏
# -Wall -Wextra: 启用所有警告
# -Wno-unused-parameter: 忽略未使用参数警告

# 链接标志设置
LDFLAGS += -L$(DIR_CUR)/lib/ -lwebrtc_audio_processing -lpthread
# -L: 添加库搜索路径
# -l: 链接指定的库

# 源文件和目标文件设置
SRCS = $(wildcard *.cpp)              # 使用通配符获取所有.cpp文件
OBJS = $(patsubst %.cpp,%.c.o,$(SRCS)) # 将.cpp文件名转换为.o文件名

# 目标输出文件名
TARGET = libwebrtc.so

# 安装命令
INSTALL = install

# 导出环境变量给子进程
export CFLAGS LDFLAGS

# 默认目标
all: $(TARGET)

# 主要目标规则：构建共享库
$(TARGET): $(OBJS)
    $(CXX) $(OBJS) $(LDFLAGS) -shared -o $@
    # 使用C++编译器将目标文件链接成共享库
    # -shared: 生成共享库
    # $@: 代表目标文件名

# 模式规则：将.cpp文件编译为.o文件
%.c.o:%.cpp
    $(CXX) $(CFLAGS) -c $^ -o $@
    # -c: 只编译不链接
    # $^: 代表所有依赖文件
    # $@: 代表目标文件

# 安装目标
install:
    $(INSTALL) -d $(DESTDIR)$(LIBDIR)          # 创建库目录
    $(INSTALL) -d $(DESTDIR)$(INCDIR)          # 创建头文件目录
    $(INSTALL) -m 755 $(DIR_CUR)/$(TARGET) $(DESTDIR)$(LIBDIR)  # 安装主库文件
    cp -a  $(DIR_CUR)/lib/libwebrtc_audio_processing.so* $(DESTDIR)$(LIBDIR)  # 复制依赖库
    $(INSTALL) -m 666 $(DIR_CUR)/include/webrtc_aec.h $(DESTDIR)$(INCDIR)     # 安装头文件

# 卸载目标
uninstall:
    -rm -f $(DESTDIR)$(INCDIR)/webrtc_aec.h    # 删除头文件
    -rm -f $(DESTDIR)$(LIBDIR)/$(TARGET)       # 删除主库文件
    -rm -f $(DESTDIR)$(LIBDIR)/libwebrtc_audio_processing.so*  # 删除依赖库

# 清理目标：删除编译生成的目标文件
clean:
    -rm -f $(OBJS)

# 深度清理目标：同时删除最终目标文件
distclean: clean
    -rm -f $(TARGET)

# 声明伪目标，避免与同名文件冲突
.PHONY: all clean install uninstall $(TARGET)

主要修改内容：

架构检测：添加了自动检测当前系统架构的逻辑
编译器设置：将硬编码的mipsel编译器改为通用的gcc/g++，支持通过环境变量覆盖

CC = mipsel-linux- STRIP = mipsel-linux-strip
架构特定标志：为arm32和arm64设置了合适的编译标志
安装目录创建：在install目标中添加了目录创建步骤
变量使用：使用$(TARGET)变量代替硬编码的文件名

使用方式：

复制代码

# 自动检测架构编译
make

# 交叉编译（需要在环境中设置CC、CXX等变量）
export CC=arm-linux-gnueabihf-gcc
export CXX=arm-linux-gnueabihf-g++
make

# 安装到指定目录
make install DESTDIR=/tmp/install

这样修改后的Makefile可以自动适应不同的架构环境，包括arm32、arm64和x86_64。

函数调用流程：

复制代码

tang_apm_init()
    ↓
webrtc_aec_get_buffer_length()  [可选，用于查询缓冲区大小]
    ↓
webrtc_aec_calculate()          [循环调用，实时处理]
    ↓
tang_apm_destroy()

使用场景：

实时语音通信：消除扬声器到麦克风的回声
音频处理应用：需要实时回声消除的嵌入式系统
跨平台开发：C接口便于不同平台集成