cpp
#include <iostream>
#include <vector>
#include <thread>
#include <chrono>
#include <random>
#include <mutex>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#include <libavutil/channel_layout.h>
}
struct MP4InitParams {
std::string filename;
int video_width = 640;
int video_height = 480;
int video_frame_rate = 25;
int video_bit_rate = 1000000;
std::string video_codec_name = "libx264";
bool enable_video = true;
int audio_sample_rate = 44100;
int audio_channels = 2;
int audio_bit_rate = 128000;
std::string audio_codec_name = "aac";
bool enable_audio = true;
MP4InitParams(const std::string& name) : filename(name) {}
};
class FFmpegMp4Saver {
public:
FFmpegMp4Saver();
~FFmpegMp4Saver();
bool initialize(const MP4InitParams& params);
bool writeYUVFrame(const uint8_t* yuv_data, int64_t pts);
bool writeESFrame(const uint8_t* es_data, int size, int64_t pts, bool is_video, bool is_key_frame = true);
bool writePCMFrame(const uint8_t* pcm_data, int samples, int64_t pts);
bool finish();
bool isInitialized() const { return initialized_; }
bool isVideoEnabled() const { return video_initialized_; }
bool isAudioEnabled() const { return audio_initialized_; }
int getAudioBufferSize() const { return pcm_buffer_.size(); }
private:
bool initVideoCodec(const MP4InitParams& params);
bool initAudioCodec(const MP4InitParams& params);
bool encodeBufferedAudio();
bool writeEncodedFrame(AVFrame* frame, AVStream* stream, AVCodecContext* codec_ctx);
void cleanup();
AVFormatContext* format_ctx_;
AVCodecContext* video_codec_ctx_;
AVCodecContext* audio_codec_ctx_;
AVStream* video_stream_;
AVStream* audio_stream_;
SwsContext* sws_ctx_;
SwrContext* swr_ctx_;
int video_width_;
int video_height_;
int video_frame_rate_;
int audio_sample_rate_;
int audio_channels_;
bool initialized_;
bool video_initialized_;
bool audio_initialized_;
int64_t video_pts_;
int64_t audio_pts_;
std::vector<uint8_t> pcm_buffer_;
std::mutex audio_mutex_;
};
FFmpegMp4Saver::FFmpegMp4Saver()
: format_ctx_(nullptr)
, video_codec_ctx_(nullptr)
, audio_codec_ctx_(nullptr)
, video_stream_(nullptr)
, audio_stream_(nullptr)
, sws_ctx_(nullptr)
, swr_ctx_(nullptr)
, video_width_(0), video_height_(0)
, video_frame_rate_(25)
, audio_sample_rate_(44100)
, audio_channels_(2)
, initialized_(false)
, video_initialized_(false)
, audio_initialized_(false)
, video_pts_(0)
, audio_pts_(0) {
}
FFmpegMp4Saver::~FFmpegMp4Saver() {
finish();
}
bool FFmpegMp4Saver::initialize(const MP4InitParams& params) {
cleanup();
video_width_ = params.video_width;
video_height_ = params.video_height;
video_frame_rate_ = params.video_frame_rate;
audio_sample_rate_ = params.audio_sample_rate;
audio_channels_ = params.audio_channels;
int ret = avformat_alloc_output_context2(&format_ctx_, nullptr, nullptr, params.filename.c_str());
if (ret < 0 || !format_ctx_) {
std::cerr << "Failed to allocate output context: " << ret << std::endl;
return false;
}
if (params.enable_video) {
if (!initVideoCodec(params)) {
return false;
}
video_initialized_ = true;
}
if (params.enable_audio) {
if (!video_initialized_) {
std::cerr << "Video must be enabled when audio is enabled" << std::endl;
return false;
}
if (!initAudioCodec(params)) {
return false;
}
audio_initialized_ = true;
}
if (!(format_ctx_->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&format_ctx_->pb, params.filename.c_str(), AVIO_FLAG_WRITE);
if (ret < 0) {
std::cerr << "Failed to open output file: " << ret << std::endl;
return false;
}
}
ret = avformat_write_header(format_ctx_, nullptr);
if (ret < 0) {
std::cerr << "Failed to write header: " << ret << std::endl;
return false;
}
initialized_ = true;
std::cout << "MP4 saver initialized successfully" << std::endl;
return true;
}
bool FFmpegMp4Saver::initVideoCodec(const MP4InitParams& params) {
int ret = 0;
const AVCodec* codec = avcodec_find_encoder_by_name(params.video_codec_name.c_str());
if (!codec) {
std::cerr << "Failed to find video codec: " << params.video_codec_name << std::endl;
return false;
}
video_stream_ = avformat_new_stream(format_ctx_, codec);
if (!video_stream_) {
std::cerr << "Failed to create video stream" << std::endl;
return false;
}
video_codec_ctx_ = avcodec_alloc_context3(codec);
if (!video_codec_ctx_) {
std::cerr << "Failed to allocate video codec context" << std::endl;
return false;
}
video_codec_ctx_->width = params.video_width;
video_codec_ctx_->height = params.video_height;
video_codec_ctx_->time_base = av_make_q(1, params.video_frame_rate);
video_codec_ctx_->framerate = av_make_q(params.video_frame_rate, 1);
video_codec_ctx_->pix_fmt = AV_PIX_FMT_YUV420P;
video_codec_ctx_->bit_rate = params.video_bit_rate;
video_codec_ctx_->gop_size = 50;
video_codec_ctx_->max_b_frames = 0;
video_codec_ctx_->codec_id = codec->id;
video_codec_ctx_->codec_type = AVMEDIA_TYPE_VIDEO;
video_codec_ctx_->qmin = 10;
video_codec_ctx_->qmax = 51;
if (format_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {
video_codec_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
AVDictionary* param_dict = nullptr;
av_dict_set(¶m_dict, "preset", "medium", 0);
av_dict_set(¶m_dict, "tune", "zerolatency", 0);
ret = avcodec_open2(video_codec_ctx_, codec, ¶m_dict);
if (ret < 0) {
std::cerr << "Failed to open video codec: " << ret << std::endl;
av_dict_free(¶m_dict);
return false;
}
av_dict_free(¶m_dict);
ret = avcodec_parameters_from_context(video_stream_->codecpar, video_codec_ctx_);
if (ret < 0) {
std::cerr << "Failed to copy codec parameters to stream: " << ret << std::endl;
return false;
}
return true;
}
bool FFmpegMp4Saver::initAudioCodec(const MP4InitParams& params) {
int ret = 0;
const AVCodec* codec = avcodec_find_encoder_by_name(params.audio_codec_name.c_str());
if (!codec) {
std::cerr << "Failed to find audio codec: " << params.audio_codec_name << std::endl;
return false;
}
audio_stream_ = avformat_new_stream(format_ctx_, codec);
if (!audio_stream_) {
std::cerr << "Failed to create audio stream" << std::endl;
return false;
}
audio_codec_ctx_ = avcodec_alloc_context3(codec);
if (!audio_codec_ctx_) {
std::cerr << "Failed to allocate audio codec context" << std::endl;
return false;
}
AVChannelLayout layout = {};
av_channel_layout_default(&layout, params.audio_channels);
ret = av_channel_layout_copy(&audio_codec_ctx_->ch_layout, &layout);
audio_codec_ctx_->sample_rate = params.audio_sample_rate;
audio_codec_ctx_->sample_fmt = AV_SAMPLE_FMT_FLTP;
audio_codec_ctx_->bit_rate = params.audio_bit_rate;
audio_codec_ctx_->time_base = av_make_q(1, params.audio_sample_rate);
if (format_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {
audio_codec_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
ret = avcodec_open2(audio_codec_ctx_, codec, nullptr);
if (ret < 0) {
std::cerr << "Failed to open audio codec: " << ret << std::endl;
return false;
}
ret = avcodec_parameters_from_context(audio_stream_->codecpar, audio_codec_ctx_);
if (ret < 0) {
std::cerr << "Failed to copy codec parameters to stream: " << ret << std::endl;
return false;
}
swr_ctx_ = swr_alloc();
if (!swr_ctx_) {
std::cerr << "Failed to allocate resampler" << std::endl;
return false;
}
AVChannelLayout input_layout = {};
av_channel_layout_default(&input_layout, params.audio_channels);
ret = swr_alloc_set_opts2(&swr_ctx_,
&audio_codec_ctx_->ch_layout, audio_codec_ctx_->sample_fmt, audio_codec_ctx_->sample_rate,
&input_layout, AV_SAMPLE_FMT_S16, params.audio_sample_rate,
0, nullptr);
if (ret < 0) {
std::cerr << "Failed to set resampler options: " << ret << std::endl;
av_channel_layout_uninit(&input_layout);
return false;
}
av_channel_layout_uninit(&input_layout);
ret = swr_init(swr_ctx_);
if (ret < 0) {
std::cerr << "Failed to initialize audio resampler: " << ret << std::endl;
return false;
}
return true;
}
bool FFmpegMp4Saver::writeYUVFrame(const uint8_t* yuv_data, int64_t pts) {
if (!video_initialized_ || !video_codec_ctx_) {
std::cerr << "Video encoder not initialized" << std::endl;
return false;
}
AVFrame* frame = av_frame_alloc();
if (!frame) {
std::cerr << "Failed to allocate video frame" << std::endl;
return false;
}
frame->width = video_width_;
frame->height = video_height_;
frame->format = AV_PIX_FMT_YUV420P;
int ret = av_frame_get_buffer(frame, 32);
if (ret < 0) {
std::cerr << "Failed to allocate frame buffer: " << ret << std::endl;
av_frame_free(&frame);
return false;
}
memcpy(frame->data[0], yuv_data, video_width_ * video_height_);
memcpy(frame->data[1], yuv_data + video_width_ * video_height_, video_width_ * video_height_ / 4);
memcpy(frame->data[2], yuv_data + video_width_ * video_height_ * 5 / 4, video_width_ * video_height_ / 4);
frame->pts = pts;
bool result = writeEncodedFrame(frame, video_stream_, video_codec_ctx_);
av_frame_free(&frame);
return result;
}
bool FFmpegMp4Saver::writeESFrame(const uint8_t* es_data, int size, int64_t pts, bool is_video, bool is_key_frame) {
if (!format_ctx_) {
std::cerr << "Format context not initialized" << std::endl;
return false;
}
AVStream* stream = is_video ? video_stream_ : audio_stream_;
if (!stream) {
std::cerr << (is_video ? "Video" : "Audio") << " stream not initialized" << std::endl;
return false;
}
AVPacket* packet = av_packet_alloc();
if (!packet) {
std::cerr << "Failed to allocate packet" << std::endl;
return false;
}
packet->data = (uint8_t*)es_data;
packet->size = size;
packet->pts = pts;
packet->dts = pts;
packet->stream_index = stream->index;
if (is_key_frame) {
packet->flags |= AV_PKT_FLAG_KEY;
}
else {
packet->flags &= ~AV_PKT_FLAG_KEY;
}
int ret = av_interleaved_write_frame(format_ctx_, packet);
av_packet_free(&packet);
if (ret < 0) {
std::cerr << "Failed to write ES frame: " << ret << std::endl;
return false;
}
return true;
}
bool FFmpegMp4Saver::writePCMFrame(const uint8_t* pcm_data, int samples, int64_t pts) {
if (!audio_initialized_ || !audio_codec_ctx_ || !swr_ctx_) {
std::cerr << "Audio encoder not initialized" << std::endl;
return false;
}
std::lock_guard<std::mutex> lock(audio_mutex_);
size_t data_size = samples * audio_channels_ * 2;
size_t old_size = pcm_buffer_.size();
pcm_buffer_.resize(old_size + data_size);
memcpy(pcm_buffer_.data() + old_size, pcm_data, data_size);
if (audio_codec_ctx_->frame_size > 0) {
size_t buffered_samples = pcm_buffer_.size() / (audio_channels_ * 2);
if (buffered_samples >= audio_codec_ctx_->frame_size) {
return encodeBufferedAudio();
}
}
return true;
}
bool FFmpegMp4Saver::encodeBufferedAudio() {
if (!audio_codec_ctx_ || pcm_buffer_.empty()) {
return false;
}
int frame_size = audio_codec_ctx_->frame_size;
if (frame_size <= 0) {
return false;
}
int bytes_per_sample = audio_channels_ * 2;
int max_frames = pcm_buffer_.size() / (frame_size * bytes_per_sample);
bool all_success = true;
for (int i = 0; i < max_frames; i++) {
AVFrame* input_frame = av_frame_alloc();
if (!input_frame) {
return false;
}
input_frame->nb_samples = frame_size;
input_frame->format = AV_SAMPLE_FMT_S16;
AVChannelLayout input_layout = {};
av_channel_layout_default(&input_layout, audio_channels_);
av_channel_layout_copy(&input_frame->ch_layout, &input_layout);
int ret = av_frame_get_buffer(input_frame, 0);
if (ret < 0) {
av_frame_free(&input_frame);
return false;
}
size_t data_offset = i * frame_size * bytes_per_sample;
memcpy(input_frame->data[0], pcm_buffer_.data() + data_offset, frame_size * bytes_per_sample);
static FILE* fp_16 = NULL;
if (fp_16 == NULL)
{
fp_16 = fopen("./testRecord_int16.pcm", "wb");
}
if (fp_16)
{
fwrite(pcm_buffer_.data() + data_offset, 1, frame_size * bytes_per_sample, fp_16);
}
AVFrame* resampled_frame = av_frame_alloc();
if (!resampled_frame) {
av_frame_free(&input_frame);
return false;
}
resampled_frame->sample_rate = audio_codec_ctx_->sample_rate;
resampled_frame->nb_samples = frame_size;
resampled_frame->format = audio_codec_ctx_->sample_fmt;
av_channel_layout_copy(&resampled_frame->ch_layout, &audio_codec_ctx_->ch_layout);
ret = av_frame_get_buffer(resampled_frame, 0);
if (ret < 0) {
av_frame_free(&input_frame);
av_frame_free(&resampled_frame);
return false;
}
ret = swr_convert(swr_ctx_,
resampled_frame->data, resampled_frame->nb_samples,
(const uint8_t**)input_frame->data, input_frame->nb_samples);
if (ret < 0) {
av_frame_free(&input_frame);
av_frame_free(&resampled_frame);
return false;
}
resampled_frame->pts = audio_pts_;
audio_pts_ += frame_size;
static FILE* fp_float = NULL;
if (fp_float == NULL)
{
fp_float = fopen("./testRecord_float.pcm", "wb");
}
if (fp_float)
{
fwrite(resampled_frame->data[0], 1, resampled_frame->linesize[0], fp_float);
}
bool frame_success = writeEncodedFrame(resampled_frame, audio_stream_, audio_codec_ctx_);
av_frame_free(&input_frame);
av_frame_free(&resampled_frame);
if (!frame_success) {
all_success = false;
break;
}
}
if (max_frames > 0) {
size_t encoded_bytes = max_frames * frame_size * bytes_per_sample;
pcm_buffer_.erase(pcm_buffer_.begin(), pcm_buffer_.begin() + encoded_bytes);
}
return all_success;
}
bool FFmpegMp4Saver::writeEncodedFrame(AVFrame* frame, AVStream* stream, AVCodecContext* codec_ctx) {
if (!stream || !codec_ctx) {
return false;
}
AVPacket* packet = av_packet_alloc();
if (!packet) {
return false;
}
int ret = avcodec_send_frame(codec_ctx, frame);
if (ret < 0) {
std::cerr << "Failed to send frame to encoder: " << ret << std::endl;
av_packet_free(&packet);
return false;
}
while (ret >= 0) {
ret = avcodec_receive_packet(codec_ctx, packet);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
}
else if (ret < 0) {
std::cerr << "Failed to receive packet from encoder: " << ret << std::endl;
av_packet_free(&packet);
return false;
}
packet->stream_index = stream->index;
av_packet_rescale_ts(packet, codec_ctx->time_base, stream->time_base);
if (stream->index == 1)
{
printf("index:1 pts:%lld \n", packet->pts);
}
else
{
printf("index:0 pts:%lld \n", packet->pts);
}
ret = av_interleaved_write_frame(format_ctx_, packet);
av_packet_unref(packet);
if (ret < 0) {
std::cerr << "Failed to write packet: " << ret << std::endl;
av_packet_free(&packet);
return false;
}
}
av_packet_free(&packet);
return true;
}
bool FFmpegMp4Saver::finish() {
if (!format_ctx_) {
return false;
}
if (audio_initialized_) {
std::lock_guard<std::mutex> lock(audio_mutex_);
if (!pcm_buffer_.empty()) {
encodeBufferedAudio();
}
}
if (video_codec_ctx_) {
writeEncodedFrame(nullptr, video_stream_, video_codec_ctx_);
}
if (audio_codec_ctx_) {
writeEncodedFrame(nullptr, audio_stream_, audio_codec_ctx_);
}
int ret = av_write_trailer(format_ctx_);
if (ret < 0) {
std::cerr << "Failed to write trailer: " << ret << std::endl;
return false;
}
cleanup();
std::cout << "MP4 file saved successfully" << std::endl;
return true;
}
void FFmpegMp4Saver::cleanup() {
if (video_codec_ctx_) {
avcodec_free_context(&video_codec_ctx_);
}
if (audio_codec_ctx_) {
avcodec_free_context(&audio_codec_ctx_);
}
if (format_ctx_) {
if (!(format_ctx_->oformat->flags & AVFMT_NOFILE)) {
avio_closep(&format_ctx_->pb);
}
avformat_free_context(format_ctx_);
format_ctx_ = nullptr;
}
if (sws_ctx_) {
sws_freeContext(sws_ctx_);
sws_ctx_ = nullptr;
}
if (swr_ctx_) {
swr_free(&swr_ctx_);
}
std::lock_guard<std::mutex> lock(audio_mutex_);
pcm_buffer_.clear();
}
#include <iostream>
#include <vector>
#include <thread>
#include <chrono>
#include <cmath>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
// 生成测试PCM数据
static std::vector<uint8_t> generateTestPCM(int duration_ms, int sample_rate, int channels) {
int samples_count = duration_ms * sample_rate / 1000;
std::vector<uint8_t> pcm_data(samples_count * channels * 2); // 16-bit samples
// 简单的正弦波生成
for (int i = 0; i < samples_count; ++i) {
double time = (double)i / sample_rate;
double frequency = 440.0; // A4音
double amplitude = 0.5;
int16_t sample = static_cast<int16_t>(amplitude * 32767.0 * sin(2.0 * M_PI * frequency * time));
// 交错存储左右声道
for (int ch = 0; ch < channels; ++ch) {
int offset = (i * channels + ch) * 2;
pcm_data[offset] = sample & 0xFF;
pcm_data[offset + 1] = (sample >> 8) & 0xFF;
}
}
return pcm_data;
}
// 生成测试YUV数据
static std::vector<uint8_t> generateTestYUV(int width, int height, int frame_index) {
std::vector<uint8_t> yuv_data(width * height * 3 / 2); // YUV420格式
// 填充Y分量(亮度)
for (int i = 0; i < width * height; ++i) {
yuv_data[i] = 128; // 灰色背景
}
// 在画面中央绘制一个移动的白色方块
int block_size = 50;
int block_x = (frame_index * 10) % (width - block_size);
int block_y = (frame_index * 5) % (height - block_size);
for (int y = block_y; y < block_y + block_size; ++y) {
for (int x = block_x; x < block_x + block_size; ++x) {
yuv_data[y * width + x] = 255;
}
}
// 填充UV分量(色度)
int uv_start = width * height;
for (int i = uv_start; i < yuv_data.size(); ++i) {
yuv_data[i] = 128;
}
return yuv_data;
}
#pragma comment(lib, "SDL2.lib")
#undef main
int main() {
std::cout << "Testing FFmpeg MP4 Saver with YUV and PCM input..." << std::endl;
FFmpegMp4Saver saver;
// 配置参数
MP4InitParams params("test_output.mp4");
params.video_width = 640;
params.video_height = 480;
params.video_frame_rate = 25;
params.enable_audio = true;
params.audio_sample_rate = 8000;
params.audio_channels = 1;
if (!saver.initialize(params)) {
std::cerr << "Failed to initialize MP4 saver" << std::endl;
return -1;
}
// 生成测试数据
std::vector<uint8_t> yuv_data;
std::vector<uint8_t> pcm_data;
int video_frame_count = 100; // 4秒视频(25fps)
int audio_frame_size = 882; // 20ms音频(44100Hz)
std::cout << "Starting to write frames..." << std::endl;
for (int i = 0; i < video_frame_count; i++) {
// 生成YUV数据
yuv_data = generateTestYUV(640, 480, i);
// 生成PCM数据
// 写入视频帧(PTS基于帧率)
int64_t video_pts = i; // 每个PTS对应一帧
saver.writeYUVFrame(yuv_data.data(), video_pts);
pcm_data = generateTestPCM(40, params.audio_sample_rate, params.audio_channels);
saver.writePCMFrame(pcm_data.data(), 320, 0);
// 写入音频帧(PTS基于采样率)
//int64_t audio_pts = i * (audio_sample_rate / video_frame_rate);
//std::cout << "Frame " << i << " - Video PTS: " << video_pts
//<< ", Audio PTS: " << audio_pts
//<< ", Audio buffer size: " << saver.getAudioBufferSize() << " bytes" << std::endl;
// 模拟实时生成
std::this_thread::sleep_for(std::chrono::milliseconds(40));
}
if (!saver.finish()) {
std::cerr << "Failed to finish writing" << std::endl;
return -1;
}
std::cout << "Test completed successfully!" << std::endl;
std::cout << "Output file: test_output.mp4" << std::endl;
return 0;
}
1:基于ffmpeg8.0 的接口
2:支持输入YUV420 数据
3:支持输入PCM Int16 数据
4:支持输入 H264/H265 的ES 裸流
5:自带测试demo
链接动态库即可编译运行
最终生成mp4 文件