基于ffmpeg8.0录制mp4文件

cpp 复制代码
#include <iostream>
#include <vector>
#include <thread>
#include <chrono>
#include <random>
#include <mutex>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#include <libavutil/channel_layout.h>
}

struct MP4InitParams {
	std::string filename;

	int video_width = 640;
	int video_height = 480;
	int video_frame_rate = 25;
	int video_bit_rate = 1000000;
	std::string video_codec_name = "libx264";
	bool enable_video = true;

	int audio_sample_rate = 44100;
	int audio_channels = 2;
	int audio_bit_rate = 128000;
	std::string audio_codec_name = "aac";
	bool enable_audio = true;

	MP4InitParams(const std::string& name) : filename(name) {}
};

class FFmpegMp4Saver {
public:
	FFmpegMp4Saver();
	~FFmpegMp4Saver();

	bool initialize(const MP4InitParams& params);
	bool writeYUVFrame(const uint8_t* yuv_data, int64_t pts);
	bool writeESFrame(const uint8_t* es_data, int size, int64_t pts, bool is_video, bool is_key_frame = true);
	bool writePCMFrame(const uint8_t* pcm_data, int samples, int64_t pts);
	bool finish();

	bool isInitialized() const { return initialized_; }
	bool isVideoEnabled() const { return video_initialized_; }
	bool isAudioEnabled() const { return audio_initialized_; }
	int getAudioBufferSize() const { return pcm_buffer_.size(); }

private:
	bool initVideoCodec(const MP4InitParams& params);
	bool initAudioCodec(const MP4InitParams& params);
	bool encodeBufferedAudio();
	bool writeEncodedFrame(AVFrame* frame, AVStream* stream, AVCodecContext* codec_ctx);
	void cleanup();

	AVFormatContext* format_ctx_;
	AVCodecContext* video_codec_ctx_;
	AVCodecContext* audio_codec_ctx_;
	AVStream* video_stream_;
	AVStream* audio_stream_;
	SwsContext* sws_ctx_;
	SwrContext* swr_ctx_;

	int video_width_;
	int video_height_;
	int video_frame_rate_;
	int audio_sample_rate_;
	int audio_channels_;

	bool initialized_;
	bool video_initialized_;
	bool audio_initialized_;
	int64_t video_pts_;
	int64_t audio_pts_;

	std::vector<uint8_t> pcm_buffer_;
	std::mutex audio_mutex_;
};

FFmpegMp4Saver::FFmpegMp4Saver()
	: format_ctx_(nullptr)
	, video_codec_ctx_(nullptr)
	, audio_codec_ctx_(nullptr)
	, video_stream_(nullptr)
	, audio_stream_(nullptr)
	, sws_ctx_(nullptr)
	, swr_ctx_(nullptr)
	, video_width_(0), video_height_(0)
	, video_frame_rate_(25)
	, audio_sample_rate_(44100)
	, audio_channels_(2)
	, initialized_(false)
	, video_initialized_(false)
	, audio_initialized_(false)
	, video_pts_(0)
	, audio_pts_(0) {
}

FFmpegMp4Saver::~FFmpegMp4Saver() {
	finish();
}

bool FFmpegMp4Saver::initialize(const MP4InitParams& params) {
	cleanup();

	video_width_ = params.video_width;
	video_height_ = params.video_height;
	video_frame_rate_ = params.video_frame_rate;
	audio_sample_rate_ = params.audio_sample_rate;
	audio_channels_ = params.audio_channels;

	int ret = avformat_alloc_output_context2(&format_ctx_, nullptr, nullptr, params.filename.c_str());
	if (ret < 0 || !format_ctx_) {
		std::cerr << "Failed to allocate output context: " << ret << std::endl;
		return false;
	}

	if (params.enable_video) {
		if (!initVideoCodec(params)) {
			return false;
		}
		video_initialized_ = true;
	}

	if (params.enable_audio) {
		if (!video_initialized_) {
			std::cerr << "Video must be enabled when audio is enabled" << std::endl;
			return false;
		}
		if (!initAudioCodec(params)) {
			return false;
		}
		audio_initialized_ = true;
	}

	if (!(format_ctx_->oformat->flags & AVFMT_NOFILE)) {
		ret = avio_open(&format_ctx_->pb, params.filename.c_str(), AVIO_FLAG_WRITE);
		if (ret < 0) {
			std::cerr << "Failed to open output file: " << ret << std::endl;
			return false;
		}
	}

	ret = avformat_write_header(format_ctx_, nullptr);
	if (ret < 0) {
		std::cerr << "Failed to write header: " << ret << std::endl;
		return false;
	}

	initialized_ = true;
	std::cout << "MP4 saver initialized successfully" << std::endl;
	return true;
}

bool FFmpegMp4Saver::initVideoCodec(const MP4InitParams& params) {
	int ret = 0;
	const AVCodec* codec = avcodec_find_encoder_by_name(params.video_codec_name.c_str());
	if (!codec) {
		std::cerr << "Failed to find video codec: " << params.video_codec_name << std::endl;
		return false;
	}

	video_stream_ = avformat_new_stream(format_ctx_, codec);
	if (!video_stream_) {
		std::cerr << "Failed to create video stream" << std::endl;
		return false;
	}

	video_codec_ctx_ = avcodec_alloc_context3(codec);
	if (!video_codec_ctx_) {
		std::cerr << "Failed to allocate video codec context" << std::endl;
		return false;
	}

	video_codec_ctx_->width = params.video_width;
	video_codec_ctx_->height = params.video_height;
	video_codec_ctx_->time_base = av_make_q(1, params.video_frame_rate);
	video_codec_ctx_->framerate = av_make_q(params.video_frame_rate, 1);
	video_codec_ctx_->pix_fmt = AV_PIX_FMT_YUV420P;
	video_codec_ctx_->bit_rate = params.video_bit_rate;
	video_codec_ctx_->gop_size = 50;
	video_codec_ctx_->max_b_frames = 0;


	video_codec_ctx_->codec_id = codec->id;
	video_codec_ctx_->codec_type = AVMEDIA_TYPE_VIDEO;
	video_codec_ctx_->qmin = 10;
	video_codec_ctx_->qmax = 51;


	if (format_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {
		video_codec_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
	}

	AVDictionary* param_dict = nullptr;
	av_dict_set(&param_dict, "preset", "medium", 0);
	av_dict_set(&param_dict, "tune", "zerolatency", 0);

	ret = avcodec_open2(video_codec_ctx_, codec, &param_dict);
	if (ret < 0) {
		std::cerr << "Failed to open video codec: " << ret << std::endl;
		av_dict_free(&param_dict);
		return false;
	}

	av_dict_free(&param_dict);

	ret = avcodec_parameters_from_context(video_stream_->codecpar, video_codec_ctx_);
	if (ret < 0) {
		std::cerr << "Failed to copy codec parameters to stream: " << ret << std::endl;
		return false;
	}

	return true;
}

bool FFmpegMp4Saver::initAudioCodec(const MP4InitParams& params) {
	int ret = 0;
	const AVCodec* codec = avcodec_find_encoder_by_name(params.audio_codec_name.c_str());
	if (!codec) {
		std::cerr << "Failed to find audio codec: " << params.audio_codec_name << std::endl;
		return false;
	}

	audio_stream_ = avformat_new_stream(format_ctx_, codec);
	if (!audio_stream_) {
		std::cerr << "Failed to create audio stream" << std::endl;
		return false;
	}

	audio_codec_ctx_ = avcodec_alloc_context3(codec);
	if (!audio_codec_ctx_) {
		std::cerr << "Failed to allocate audio codec context" << std::endl;
		return false;
	}

	AVChannelLayout layout = {};
	av_channel_layout_default(&layout, params.audio_channels);

	ret = av_channel_layout_copy(&audio_codec_ctx_->ch_layout, &layout);
	audio_codec_ctx_->sample_rate = params.audio_sample_rate;
	audio_codec_ctx_->sample_fmt = AV_SAMPLE_FMT_FLTP;
	audio_codec_ctx_->bit_rate = params.audio_bit_rate;
	audio_codec_ctx_->time_base = av_make_q(1, params.audio_sample_rate);

	if (format_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {
		audio_codec_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
	}

	ret = avcodec_open2(audio_codec_ctx_, codec, nullptr);
	if (ret < 0) {
		std::cerr << "Failed to open audio codec: " << ret << std::endl;
		return false;
	}

	ret = avcodec_parameters_from_context(audio_stream_->codecpar, audio_codec_ctx_);
	if (ret < 0) {
		std::cerr << "Failed to copy codec parameters to stream: " << ret << std::endl;
		return false;
	}

	swr_ctx_ = swr_alloc();
	if (!swr_ctx_) {
		std::cerr << "Failed to allocate resampler" << std::endl;
		return false;
	}

	AVChannelLayout input_layout = {};
	av_channel_layout_default(&input_layout, params.audio_channels);

	ret = swr_alloc_set_opts2(&swr_ctx_,
		&audio_codec_ctx_->ch_layout, audio_codec_ctx_->sample_fmt, audio_codec_ctx_->sample_rate,
		&input_layout, AV_SAMPLE_FMT_S16, params.audio_sample_rate,
		0, nullptr);
	if (ret < 0) {
		std::cerr << "Failed to set resampler options: " << ret << std::endl;
		av_channel_layout_uninit(&input_layout);
		return false;
	}

	av_channel_layout_uninit(&input_layout);

	ret = swr_init(swr_ctx_);
	if (ret < 0) {
		std::cerr << "Failed to initialize audio resampler: " << ret << std::endl;
		return false;
	}

	return true;
}

bool FFmpegMp4Saver::writeYUVFrame(const uint8_t* yuv_data, int64_t pts) {
	if (!video_initialized_ || !video_codec_ctx_) {
		std::cerr << "Video encoder not initialized" << std::endl;
		return false;
	}

	AVFrame* frame = av_frame_alloc();
	if (!frame) {
		std::cerr << "Failed to allocate video frame" << std::endl;
		return false;
	}

	frame->width = video_width_;
	frame->height = video_height_;
	frame->format = AV_PIX_FMT_YUV420P;

	int ret = av_frame_get_buffer(frame, 32);
	if (ret < 0) {
		std::cerr << "Failed to allocate frame buffer: " << ret << std::endl;
		av_frame_free(&frame);
		return false;
	}

	memcpy(frame->data[0], yuv_data, video_width_ * video_height_);
	memcpy(frame->data[1], yuv_data + video_width_ * video_height_, video_width_ * video_height_ / 4);
	memcpy(frame->data[2], yuv_data + video_width_ * video_height_ * 5 / 4, video_width_ * video_height_ / 4);

	frame->pts = pts;

	bool result = writeEncodedFrame(frame, video_stream_, video_codec_ctx_);
	av_frame_free(&frame);

	return result;
}

bool FFmpegMp4Saver::writeESFrame(const uint8_t* es_data, int size, int64_t pts, bool is_video, bool is_key_frame) {
	if (!format_ctx_) {
		std::cerr << "Format context not initialized" << std::endl;
		return false;
	}

	AVStream* stream = is_video ? video_stream_ : audio_stream_;

	if (!stream) {
		std::cerr << (is_video ? "Video" : "Audio") << " stream not initialized" << std::endl;
		return false;
	}

	AVPacket* packet = av_packet_alloc();
	if (!packet) {
		std::cerr << "Failed to allocate packet" << std::endl;
		return false;
	}

	packet->data = (uint8_t*)es_data;
	packet->size = size;
	packet->pts = pts;
	packet->dts = pts;
	packet->stream_index = stream->index;

	if (is_key_frame) {
		packet->flags |= AV_PKT_FLAG_KEY;
	}
	else {
		packet->flags &= ~AV_PKT_FLAG_KEY;
	}

	int ret = av_interleaved_write_frame(format_ctx_, packet);
	av_packet_free(&packet);

	if (ret < 0) {
		std::cerr << "Failed to write ES frame: " << ret << std::endl;
		return false;
	}

	return true;
}

bool FFmpegMp4Saver::writePCMFrame(const uint8_t* pcm_data, int samples, int64_t pts) {
	if (!audio_initialized_ || !audio_codec_ctx_ || !swr_ctx_) {
		std::cerr << "Audio encoder not initialized" << std::endl;
		return false;
	}

	std::lock_guard<std::mutex> lock(audio_mutex_);

	size_t data_size = samples * audio_channels_ * 2;
	size_t old_size = pcm_buffer_.size();
	pcm_buffer_.resize(old_size + data_size);
	memcpy(pcm_buffer_.data() + old_size, pcm_data, data_size);

	if (audio_codec_ctx_->frame_size > 0) {
		size_t buffered_samples = pcm_buffer_.size() / (audio_channels_ * 2);
		if (buffered_samples >= audio_codec_ctx_->frame_size) {
			return encodeBufferedAudio();
		}
	}

	return true;
}

bool FFmpegMp4Saver::encodeBufferedAudio() {
	if (!audio_codec_ctx_ || pcm_buffer_.empty()) {
		return false;
	}

	int frame_size = audio_codec_ctx_->frame_size;
	if (frame_size <= 0) {
		return false;
	}

	int bytes_per_sample = audio_channels_ * 2;
	int max_frames = pcm_buffer_.size() / (frame_size * bytes_per_sample);

	bool all_success = true;

	for (int i = 0; i < max_frames; i++) {
		AVFrame* input_frame = av_frame_alloc();
		if (!input_frame) {
			return false;
		}

		input_frame->nb_samples = frame_size;
		input_frame->format = AV_SAMPLE_FMT_S16;

		AVChannelLayout input_layout = {};
		av_channel_layout_default(&input_layout, audio_channels_);
		av_channel_layout_copy(&input_frame->ch_layout, &input_layout);

		int ret = av_frame_get_buffer(input_frame, 0);
		if (ret < 0) {
			av_frame_free(&input_frame);
			return false;
		}

		size_t data_offset = i * frame_size * bytes_per_sample;
		memcpy(input_frame->data[0], pcm_buffer_.data() + data_offset, frame_size * bytes_per_sample);

		static FILE* fp_16 = NULL;
		if (fp_16 == NULL)
		{
			fp_16 = fopen("./testRecord_int16.pcm", "wb");
		}
		if (fp_16)
		{
			fwrite(pcm_buffer_.data() + data_offset, 1, frame_size * bytes_per_sample, fp_16);

		}

		AVFrame* resampled_frame = av_frame_alloc();
		if (!resampled_frame) {
			av_frame_free(&input_frame);
			return false;
		}

		resampled_frame->sample_rate = audio_codec_ctx_->sample_rate;
		resampled_frame->nb_samples = frame_size;
		resampled_frame->format = audio_codec_ctx_->sample_fmt;
		av_channel_layout_copy(&resampled_frame->ch_layout, &audio_codec_ctx_->ch_layout);

		ret = av_frame_get_buffer(resampled_frame, 0);
		if (ret < 0) {
			av_frame_free(&input_frame);
			av_frame_free(&resampled_frame);
			return false;
		}

		ret = swr_convert(swr_ctx_,
			resampled_frame->data, resampled_frame->nb_samples,
			(const uint8_t**)input_frame->data, input_frame->nb_samples);
		if (ret < 0) {
			av_frame_free(&input_frame);
			av_frame_free(&resampled_frame);
			return false;
		}

		resampled_frame->pts = audio_pts_;
		audio_pts_ += frame_size;

		static FILE* fp_float = NULL;
		if (fp_float == NULL)
		{
			fp_float = fopen("./testRecord_float.pcm", "wb");
		}
		if (fp_float)
		{
			fwrite(resampled_frame->data[0], 1, resampled_frame->linesize[0], fp_float);

		}

		bool frame_success = writeEncodedFrame(resampled_frame, audio_stream_, audio_codec_ctx_);

		av_frame_free(&input_frame);
		av_frame_free(&resampled_frame);

		if (!frame_success) {
			all_success = false;
			break;
		}
	}

	if (max_frames > 0) {
		size_t encoded_bytes = max_frames * frame_size * bytes_per_sample;
		pcm_buffer_.erase(pcm_buffer_.begin(), pcm_buffer_.begin() + encoded_bytes);
	}

	return all_success;
}

bool FFmpegMp4Saver::writeEncodedFrame(AVFrame* frame, AVStream* stream, AVCodecContext* codec_ctx) {
	if (!stream || !codec_ctx) {
		return false;
	}

	AVPacket* packet = av_packet_alloc();
	if (!packet) {
		return false;
	}

	int ret = avcodec_send_frame(codec_ctx, frame);
	if (ret < 0) {
		std::cerr << "Failed to send frame to encoder: " << ret << std::endl;
		av_packet_free(&packet);
		return false;
	}

	while (ret >= 0) {
		ret = avcodec_receive_packet(codec_ctx, packet);
		if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
			break;
		}
		else if (ret < 0) {
			std::cerr << "Failed to receive packet from encoder: " << ret << std::endl;
			av_packet_free(&packet);
			return false;
		}

		packet->stream_index = stream->index;
		av_packet_rescale_ts(packet, codec_ctx->time_base, stream->time_base);
		if (stream->index == 1)
		{
			printf("index:1 pts:%lld \n", packet->pts);

		}
		else
		{
			printf("index:0 pts:%lld \n", packet->pts);
		}

		ret = av_interleaved_write_frame(format_ctx_, packet);
		av_packet_unref(packet);
		if (ret < 0) {
			std::cerr << "Failed to write packet: " << ret << std::endl;
			av_packet_free(&packet);
			return false;
		}
	}

	av_packet_free(&packet);
	return true;
}

bool FFmpegMp4Saver::finish() {
	if (!format_ctx_) {
		return false;
	}

	if (audio_initialized_) {
		std::lock_guard<std::mutex> lock(audio_mutex_);
		if (!pcm_buffer_.empty()) {
			encodeBufferedAudio();
		}
	}

	if (video_codec_ctx_) {
		writeEncodedFrame(nullptr, video_stream_, video_codec_ctx_);
	}

	if (audio_codec_ctx_) {
		writeEncodedFrame(nullptr, audio_stream_, audio_codec_ctx_);
	}

	int ret = av_write_trailer(format_ctx_);
	if (ret < 0) {
		std::cerr << "Failed to write trailer: " << ret << std::endl;
		return false;
	}

	cleanup();
	std::cout << "MP4 file saved successfully" << std::endl;
	return true;
}

void FFmpegMp4Saver::cleanup() {
	if (video_codec_ctx_) {
		avcodec_free_context(&video_codec_ctx_);
	}
	if (audio_codec_ctx_) {
		avcodec_free_context(&audio_codec_ctx_);
	}
	if (format_ctx_) {
		if (!(format_ctx_->oformat->flags & AVFMT_NOFILE)) {
			avio_closep(&format_ctx_->pb);
		}
		avformat_free_context(format_ctx_);
		format_ctx_ = nullptr;
	}
	if (sws_ctx_) {
		sws_freeContext(sws_ctx_);
		sws_ctx_ = nullptr;
	}
	if (swr_ctx_) {
		swr_free(&swr_ctx_);
	}

	std::lock_guard<std::mutex> lock(audio_mutex_);
	pcm_buffer_.clear();
}


#include <iostream>
#include <vector>
#include <thread>
#include <chrono>
#include <cmath>

#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif

// 生成测试PCM数据
static std::vector<uint8_t> generateTestPCM(int duration_ms, int sample_rate, int channels) {
	int samples_count = duration_ms * sample_rate / 1000;
	std::vector<uint8_t> pcm_data(samples_count * channels * 2); // 16-bit samples

	// 简单的正弦波生成
	for (int i = 0; i < samples_count; ++i) {
		double time = (double)i / sample_rate;
		double frequency = 440.0; // A4音
		double amplitude = 0.5;

		int16_t sample = static_cast<int16_t>(amplitude * 32767.0 * sin(2.0 * M_PI * frequency * time));

		// 交错存储左右声道
		for (int ch = 0; ch < channels; ++ch) {
			int offset = (i * channels + ch) * 2;
			pcm_data[offset] = sample & 0xFF;
			pcm_data[offset + 1] = (sample >> 8) & 0xFF;
		}
	}

	return pcm_data;
}

// 生成测试YUV数据
static std::vector<uint8_t> generateTestYUV(int width, int height, int frame_index) {
	std::vector<uint8_t> yuv_data(width * height * 3 / 2); // YUV420格式

	// 填充Y分量（亮度）
	for (int i = 0; i < width * height; ++i) {
		yuv_data[i] = 128; // 灰色背景
	}

	// 在画面中央绘制一个移动的白色方块
	int block_size = 50;
	int block_x = (frame_index * 10) % (width - block_size);
	int block_y = (frame_index * 5) % (height - block_size);

	for (int y = block_y; y < block_y + block_size; ++y) {
		for (int x = block_x; x < block_x + block_size; ++x) {
			yuv_data[y * width + x] = 255;
		}
	}

	// 填充UV分量（色度）
	int uv_start = width * height;
	for (int i = uv_start; i < yuv_data.size(); ++i) {
		yuv_data[i] = 128;
	}

	return yuv_data;
}


#pragma comment(lib, "SDL2.lib")

#undef main

int main() {
	std::cout << "Testing FFmpeg MP4 Saver with YUV and PCM input..." << std::endl;

	FFmpegMp4Saver saver;

	// 配置参数
	MP4InitParams params("test_output.mp4");
	params.video_width = 640;
	params.video_height = 480;
	params.video_frame_rate = 25;
	params.enable_audio = true;
	params.audio_sample_rate = 8000;
	params.audio_channels = 1;

	if (!saver.initialize(params)) {
		std::cerr << "Failed to initialize MP4 saver" << std::endl;
		return -1;
	}

	// 生成测试数据
	std::vector<uint8_t> yuv_data;
	std::vector<uint8_t> pcm_data;

	int video_frame_count = 100;  // 4秒视频（25fps）
	int audio_frame_size = 882;     // 20ms音频（44100Hz）

	std::cout << "Starting to write frames..." << std::endl;

	for (int i = 0; i < video_frame_count; i++) {
		// 生成YUV数据
		yuv_data = generateTestYUV(640, 480, i);

		// 生成PCM数据
		

		// 写入视频帧（PTS基于帧率）
		int64_t video_pts = i;  // 每个PTS对应一帧
		saver.writeYUVFrame(yuv_data.data(), video_pts);


		pcm_data = generateTestPCM(40, params.audio_sample_rate, params.audio_channels);

		saver.writePCMFrame(pcm_data.data(), 320, 0);

		// 写入音频帧（PTS基于采样率）
		//int64_t audio_pts = i * (audio_sample_rate / video_frame_rate);

		//std::cout << "Frame " << i << " - Video PTS: " << video_pts
		//<< ", Audio PTS: " << audio_pts
		//<< ", Audio buffer size: " << saver.getAudioBufferSize() << " bytes" << std::endl;

		// 模拟实时生成
		std::this_thread::sleep_for(std::chrono::milliseconds(40));
	}

	if (!saver.finish()) {
		std::cerr << "Failed to finish writing" << std::endl;
		return -1;
	}

	std::cout << "Test completed successfully!" << std::endl;
	std::cout << "Output file: test_output.mp4" << std::endl;

	return 0;
}
1：基于ffmpeg8.0 的接口
2：支持输入YUV420 数据
3：支持输入PCM Int16 数据
4：支持输入 H264/H265 的ES 裸流
5：自带测试demo
链接动态库即可编译运行
最终生成mp4 文件