过滤器实战:混音
这个 Demo 展示了如何使用 FFmpeg 的过滤器系统来做混音。比如我们在做直播的时候如何将麦克风输入音频和 BGM 合成在一起,以及一些细节的操作。
和水印 Demo 不同的是,这里我们使用了面相对象的思想进行了封装。
头文件
cpp
#ifndef AUDIO_MIX_H
#define AUDIO_MIX_H
extern "C" {
#include "libavutil/samplefmt.h"
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavfilter/version.h>
#include <libavutil/opt.h>
}
#include <cstdio>
#include <map>
#include <memory>
#include <mutex>
#include <string>
class AudioMixer {
public:
AudioMixer();
virtual ~AudioMixer();
// 添加音频
int addAudioInput(int index, uint32_t sample_rate, uint32_t channels,
uint32_t bit_rate, AVSampleFormat format);
int addAudioOutput(const uint32_t sample_rate, uint32_t channels,
uint32_t bit_rate, AVSampleFormat format);
int init(const char *duration = "longest");
int exit();
int addFrame(int index, uint8_t *inbuf, uint32_t size);
int getFrame(uint8_t *outbuf, uint32_t maxSize);
void dumpGraph();
private:
struct AudioInfo {
uint32_t sample_rate;
uint32_t channels;
uint32_t bit_rate;
AVSampleFormat format;
std::string name;
AVFilterContext *filter_ctx;
AudioInfo() { filter_ctx = NULL; }
};
bool initialized_ = false;
std::mutex mutex_;
std::map<uint32_t, AudioInfo> audio_input_info_;
std::shared_ptr<AudioInfo> audio_sink_info_;
std::shared_ptr<AudioInfo> audio_mix_info_;
std::shared_ptr<AudioInfo> audio_output_info_;
AVFilterGraph *filter_graph_ = nullptr;
};
#endif
过滤器架构
plain
+--------+
output:default------------[48000Hz flt:stereo]--input0| amix |default--[48000Hz flt:stereo]--auto_resampler_1:default
auto_resampler_0:default--[48000Hz flt:stereo]--input1| (amix) |
+--------+
+---------------+
aformat:default--[96000Hz s16:stereo]--default| sink |
| (abuffersink) |
+---------------+
+-----------+
| output |default--[48000Hz flt:stereo]--amix:input0
| (abuffer) |
+-----------+
+-----------+
| output |default--[48000Hz s16:stereo]--auto_resampler_0:default
| (abuffer) |
+-----------+
+-----------+
auto_resampler_1:default--[96000Hz s16:stereo]--default| aformat |default--[96000Hz s16:stereo]--sink:default
| (aformat) |
+-----------+
+------------------+
output:default--[48000Hz s16:stereo]--default| auto_resampler_0 |default--[48000Hz flt:stereo]--amix:input1
| (aresample) |
+------------------+
+------------------+
amix:default--[48000Hz flt:stereo]--default| auto_resampler_1 |default--[96000Hz s16:stereo]--aformat:default
| (aresample) |
+------------------+
源文件
添加输入音频流
cpp
int AudioMixer::addAudioInput(int index, uint32_t sample_rate, uint32_t channels,
uint32_t bit_rate, AVSampleFormat format) {
std::lock_guard<std::mutex> locker(mutex_);
if (initialized_) {
return -1;
}
// 如果存在就直接返回
if (audio_input_info_.find(index) != audio_input_info_.end()) {
return -1;
}
// 获取一个新的AudioInfo对象
auto& filter_info = audio_input_info_[index];
// 给新的AudioInfo赋值
filter_info.sample_rate = sample_rate;
filter_info.channels = channels;
filter_info.bit_rate = bit_rate;
filter_info.format = format;
# 起个名字
filter_info.name = std::string("input") + std::to_string(index);
return 0;
}
添加输出音频流
cpp
int AudioMixer::addAudioOutput(const uint32_t sample_rate, uint32_t channels,
uint32_t bit_rate, AVSampleFormat format) {
std::lock_guard<std::mutex> locker(mutex_);
if (initialized_) {
return -1;
}
// new 一个对象
audio_output_info_.reset(new AudioInfo);
// 赋值
audio_output_info_->sample_rate = sample_rate;
audio_output_info_->channels = channels;
audio_output_info_->bit_rate = bit_rate;
audio_output_info_->format = format;
audio_output_info_->name = "output";
return 0;
}
初始化
cpp
int AudioMixer::init(const char* duration) {
std::lock_guard<std::mutex> locker(mutex_);
if (initialized_) {
return -1;
}
// 判断有没有输入流
if (audio_input_info_.size() == 0) {
return -1;
}
// 初始化过滤器图管理所有过滤器
filter_graph_ = avfilter_graph_alloc();
if (filter_graph_ == nullptr) {
return -1;
}
char args[512] = {0};
const AVFilter* amix = avfilter_get_by_name("amix"); // 混音过滤器
audio_mix_info_->filter_ctx = avfilter_graph_alloc_filter(
filter_graph_,
amix,
"amix"
);
// 初始化参数
snprintf(args, sizeof(args), "inputs=%d:duration=%s:dropout_transition=0",
audio_input_info_.size(), duration);
// 使用字符串来初始化过滤器
if (avfilter_init_str(audio_mix_info_->filter_ctx, args) != 0) {
printf("[AudioMixer] avfilter_init_str(amix) failed.\n");
return -1;
}
// audio buffer sink
const AVFilter* abuffersink = avfilter_get_by_name("abuffersink");
audio_sink_info_->filter_ctx = avfilter_graph_alloc_filter(filter_graph_, abuffersink, "sink");
if (avfilter_init_str(audio_sink_info_->filter_ctx, nullptr) != 0) {
printf("[AudioMixer] avfilter_init_str(abuffersink) failed\n");
return -1;
}
// 遍历输入流
for(auto& iter : audio_input_info_) {
const AVFilter* abuffer = avfilter_get_by_name("abuffer");
// 采样率 编码格式 通道排列
snprintf(args, sizeof(args), "sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
iter.second.sample_rate,
av_get_sample_fmt_name(iter.second.format),
av_get_default_channel_layout(iter.second.channels));
printf("[AudioMixer] input(%d) args:%s\n", iter.first, args);
iter.second.filter_ctx = avfilter_graph_alloc_filter(
filter_graph_,
abuffer,
audio_output_info_->name.c_str()
);
if (avfilter_init_str(iter.second.filter_ctx, args) != 0) {
printf("[AudioMixer] avfilter_init_str(abuffer) failed\n");
return -1;
}
if (avfilter_link(iter.second.filter_ctx, 0, audio_mix_info_->filter_ctx, iter.first) != 0) {
printf("[AudioMixer] avfilter_link(abuffer, amix) failed\n");
return -1;
}
}
if (audio_output_info_ != nullptr) {
const AVFilter* aformat = avfilter_get_by_name("aformat");
snprintf(args, sizeof(args), "sample_rates=%d:sample_fmts=%s:channel_layouts=0x%" PRIx64,
audio_output_info_->sample_rate,
av_get_sample_fmt_name(audio_output_info_->format),
av_get_default_channel_layout(audio_output_info_->channels));
audio_output_info_->filter_ctx = avfilter_graph_alloc_filter(filter_graph_, aformat, "aformat");
if (avfilter_init_str(audio_output_info_->filter_ctx, args) != 0) {
printf("[AudioMixer] avfilter_init_str(aformat) failed\n");
return -1;
}
if (avfilter_link(audio_mix_info_->filter_ctx, 0, audio_output_info_->filter_ctx, 0) != 0) {
printf("[AudioMixer] avfilter_link(amix, aformat) failed\n");
return -1;
}
if (avfilter_link(audio_output_info_->filter_ctx, 0, audio_sink_info_->filter_ctx, 0) != 0) {
printf("[AudioMixer] avfilter_link(aformat, abuffersink) failed\n");
return -1;
}
}
if (avfilter_graph_config(filter_graph_, NULL) < 0) {
printf("[AudioMixer] avfilter_graph_config failed\n");
return -1;
}
initialized_ = true;
return 0;
}
将数据添加到过滤器
cpp
int AudioMixer::addFrame(int index, uint8_t* inbuf, uint32_t size) {
std::lock_guard<std::mutex> locker(mutex_);
if (!initialized_) {
return -1;
}
auto iter = audio_input_info_.find(index);
if (iter == audio_input_info_.end()) {
return -1;
}
if (inbuf && size > 0) {
std::shared_ptr<AVFrame> avframe(av_frame_alloc(), [](AVFrame* f){
av_frame_free(&f);
});
avframe->sample_rate = iter->second.sample_rate;
// avframe->channels = iter->second.channels;
avframe->format = iter->second.format;
avframe->channel_layout = av_get_default_channel_layout(iter->second.channels);
// 字节->比特 / 采样深度 / 通道数
avframe->nb_samples = size * 8 / iter->second.bit_rate / iter->second.channels;
av_frame_get_buffer(avframe.get(), 0);
memcpy(avframe->extended_data[0], inbuf, size);
if (av_buffersrc_add_frame(iter->second.filter_ctx, avframe.get()) != 0) {
printf("[AudioMixer] av_buffersrc_add_frame failed\n");
return -1;
}
}
else {
if (av_buffersrc_add_frame(iter->second.filter_ctx, NULL) != 0) {
return -1;
}
}
}
从过滤器中获得混音后的数据
cpp
int AudioMixer::getFrame(uint8_t* outbuf, uint32_t maxSize) {
std::lock_guard<std::mutex> locker(mutex_);
if (!initialized_) {
return -1;
}
// 智能指针,内存安全
// 制定了释放的操作,即后面的lambda表达式
std::shared_ptr<AVFrame> avframe(av_frame_alloc(), [](AVFrame* f){
av_frame_free(&f);
});
int ret = av_buffersink_get_frame(audio_sink_info_->filter_ctx, avframe.get());
if (ret < 0) {
return -1;
}
// 获取每一帧数据的大小
int size = av_samples_get_buffer_size(NULL, avframe->channels, avframe->nb_samples, (AVSampleFormat)avframe->format, 1);
if (size > (int)maxSize) {
return 0;
}
memcpy(outbuf, avframe->extended_data[0], size);
return size;
}
主函数
cpp
#include "audio_mixer.h"
// PCM1: 48000_2_f32le.pcm
// PCM2: 48000_2_s16le.pcm
#define PCM1_FRAME_SIZE (4*2*1024)
#define PCM2_FRAME_SIZE (2*2*1024)
#define PCM_OUT_FRAME_SIZE (40000)
int main(int argc, char* argv[]) {
AudioMixer amix;
amix.addAudioInput(0, 48000, 2, 32, AV_SAMPLE_FMT_FLT);
amix.addAudioInput(1, 48000, 2, 16, AV_SAMPLE_FMT_S16);
amix.addAudioOutput(96000, 2, 16, AV_SAMPLE_FMT_S16);
if (amix.init("longest") < 0) {
return -1;
}
amix.dumpGraph();
int len1 = 0;
int len2 = 0;
uint8_t buf1[PCM1_FRAME_SIZE];
uint8_t buf2[PCM2_FRAME_SIZE];
FILE* file1 = fopen("48000_2_f32le.pcm", "rb");
if (!file1) {
printf("open file1 failed\n");
return -1;
}
FILE* file2 = fopen("48000_2_s16le.pcm", "rb");
if (!file2) {
printf("open file2 failed\n");
return -1;
}
FILE* file_out = fopen("output.pcm", "wb");
if (!file_out) {
printf("open file_out failed\n");
return -1;
}
int file1_finish = 0;
int file2_finish = 0;
uint8_t out_buf[PCM_OUT_FRAME_SIZE];
uint32_t out_size = 0;
while(1) {
len1 = fread(buf1, 1, PCM1_FRAME_SIZE, file1);
len2 = fread(buf2, 1, PCM2_FRAME_SIZE, file2);
if (len1 > 0 || len2 > 0) {
if (len1 > 0) {
if (amix.addFrame(0, buf1, PCM1_FRAME_SIZE) < 0) {
printf("addFrame(pcm1) failed\n");
break;
}
}
else {
if (file1_finish == 0) {
file1_finish = 1;
amix.addFrame(0, NULL, 0);
}
}
if (len2 > 0) {
if (amix.addFrame(1, buf2, PCM2_FRAME_SIZE) < 0) {
printf("addFrame(pcm2) failed\n");
break;
}
}
else {
if (file2_finish == 0) {
file2_finish = 1;
amix.addFrame(1, NULL, 0); // 冲刷编码器
}
}
int ret = 0;
while((ret = amix.getFrame(out_buf, PCM_OUT_FRAME_SIZE)) > 0) {
out_size += ret;
if (out_size % 1024 == 0) {
printf("mix audio out size: %d\n", out_size);
}
fwrite(out_buf, 1, ret, file_out);
}
}
else {
printf("two file finish\n");
break;
}
}
amix.exit();
if (file1) {
fclose(file1);
}
if (file2) {
fclose(file2);
}
if (file_out) {
fclose(file_out);
}
getchar();
return 0;
}