13 - FFmpeg 封装 MP4

PCM -> AAC

YUV -> H.264

AAC + H.264 -> MP4

MP4 提取测试文件

mp4 提取的视频裸数据

ffmpeg -i test.mp4 -vf "scale=1920:1080,fps=25" -pix_fmt yuv420p -an 25FPS_1920*1080_YUV420P.yuv

播放

ffplay -f rawvideo -pixel_format yuv420p -video_size 1920x1080 -framerate 25 25FPS_1920 *1080_YUV420P.yuv

mp4 提取的音频裸数据

ffmpeg -i test.mp4 -vn -ar 44100 -ac 2 -sample_fmt s16 -f s16le 44KHZ_S16_2CH_PCM.raw

播放

ffplay -ar 44100 -ac 2 -f s16le 44KHZ_S16_2CH_PCM.raw

流程如下：

1、初始化控制器 - 打开输入文件 + 初始化编码器 + 确定帧间隔

2、分配 Audio、Video 的帧buffer

3、新建流

4、打开输出文件IO

5、写包 - header + packet + tailer

PCM -> AAC -- 编码器封装代码如下

‌AAC编码PCM采样格式必须是fltp‌，‌这是因为FFmpeg默认的AAC编码器不支持PCM数据直接以AV_SAMPLE_FMT_S16格式进行编码。‌AV_SAMPLE_FMT_S16格式的PCM数据是两个声道交替存储，‌每个样点占用2个字节，‌而FFmpeg的AAC编码器需要PCM数据以平面存储格式（‌AV_SAMPLE_FMT_FLTP）‌提供，‌其中样点为float类型，‌且每个声道单独存储。‌这种格式转换（‌重采样）‌是为了适应编码器的输入要求，‌确保音频数据能够被正确编码成AAC格式。‌此外，‌重采样的需求不仅限于编码过程，‌还包括播放设备和其他音频处理步骤的需要。‌例如，‌有些声音设备只能播放特定采样率和采样格式的音频数据，‌如果音频数据的格式与设备要求不匹配，‌就需要进行重采样以适应设备的播放要求

因此对重采样器代码封装:

cpp 复制代码

#include "global.h"
using namespace std;

AVFrame *AllocFltpPcmFrame(int channels, int nb_samples);
void FreePcmFrame(AVFrame *frame);

class AudioResampler
{
public:
    AudioResampler();
    ~AudioResampler();
    bool InitFromS16ToFLTP(int InChannels, int InSampleRate, int OutChannels, int OutSampleRate);
    int ResampleFromS16ToFLTP(uint8_t *in_data, AVFrame *out_frame);

protected:
    void SetErrorString(int ret);

private:
    string ErrorString_;
    int InChannels_;
    int InSampleRate_;
    int OutChannels_;
    int OutSampleRate_;
    SwrContext *ctx_;
};

cpp 复制代码

#include "AudioResampler.h"
using namespace std;

AVFrame *AllocFltpPcmFrame(int channels, int nb_samples)
{
    AVFrame *pcm = av_frame_alloc();
    pcm->format = AV_SAMPLE_FMT_FLTP;
    pcm->channels = channels;
    pcm->channel_layout = av_get_default_channel_layout(channels);
    pcm->nb_samples = nb_samples;

    int ret = av_frame_get_buffer(pcm, 0);
    if (ret != 0)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] av frame alloc error! -- line:%d\n", __FUNCTION__, __LINE__);
        return nullptr;
    }
    return pcm;
}

void FreePcmFrame(AVFrame *frame)
{
    if (frame != nullptr)
    {
        av_frame_free(&frame);
    }
}

AudioResampler::AudioResampler()
{
    InChannels_ = -1;
    InSampleRate_ = -1;
    OutChannels_ = -1;
    OutSampleRate_ = -1;
    ctx_ = nullptr;
}

AudioResampler::~AudioResampler()
{
    av_log(NULL, AV_LOG_WARNING, "[AudioResampler::~AudioResampler] -- line:%d\n", __LINE__);
    if (ctx_ != nullptr)
    {
        swr_free(&ctx_);
    }
}
bool AudioResampler::InitFromS16ToFLTP(int InChannels, int InSampleRate, int OutChannels, int OutSampleRate)
{
    InChannels_ = InChannels;
    InSampleRate_ = InSampleRate;
    OutChannels_ = OutChannels;
    OutSampleRate_ = OutSampleRate;

    ctx_ = swr_alloc_set_opts(ctx_,
                              av_get_default_channel_layout(OutChannels), AV_SAMPLE_FMT_FLTP, OutSampleRate,
                              av_get_default_channel_layout(InChannels), AV_SAMPLE_FMT_S16, OutSampleRate,
                              0, nullptr);
    if (ctx_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[AudioResampler::InitFromS16ToFLTP] swr alloc set opts error -- line:%d\n", __LINE__);
        return false;
    }
    int ret = swr_init(ctx_);
    if (ret < 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[AudioResampler::InitFromS16ToFLTP] avcodec_open2 failed:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }
    av_log(NULL, AV_LOG_INFO, "[AudioResampler::InitFromS16ToFLTP] success -- line:%d\n", __LINE__);
    return true;
}

int AudioResampler::ResampleFromS16ToFLTP(uint8_t *InData_, AVFrame *OutFrame)
{
    const uint8_t *InData[AV_NUM_DATA_POINTERS] = {0};
    InData[0] = InData_;
    int samples = swr_convert(ctx_, OutFrame->data, OutFrame->nb_samples, InData, OutFrame->nb_samples);
    if (samples <= 0)
    {
        return -1;
    }
    return samples;
}

void AudioResampler::SetErrorString(int ret)
{
    ErrorString_.clear();
    char errbuf[1024] = {0};
    av_strerror(ret, errbuf, sizeof(errbuf) - 1);
    ErrorString_ = errbuf;
}

AAC 音频编码器封装

cpp 复制代码

#include "global.h"
using namespace std;

class AudioEncoder
{
public:
    AudioEncoder();
    ~AudioEncoder();

    bool InitAAC(int channels, int SampleRate, int BitRate);

    AVPacket *Encode(AVFrame *frame, int StreamIndex, int64_t pts, int64_t TimeBase);
    bool Encode(AVFrame *frame, int StreamIndex, int64_t pts, int64_t TimeBase, vector<AVPacket *> &packets);

    int GetFrameSize();    // 获取一帧数据 每个通道需要多少个采样点
    int GetSampleFormat(); // 编码器需要的采样格式
    AVCodecContext *GetCodecContext();
    int GetChannels();
    int GetSampleRate();

protected:
    void SetErrorString(int ret);

private:
    string ErrorString_;

    int channels_;
    int SampleRate_;
    int BitRate_;
    int64_t pts_;
    AVCodecContext *AudioCodecCtx_;
};

cpp 复制代码

#include "AudioEncoder.h"

AudioEncoder::AudioEncoder()
{
    channels_ = -1;
    SampleRate_ = -1;
    BitRate_ = -1;
    pts_ = -1;
    AudioCodecCtx_ = nullptr;
}

AudioEncoder::~AudioEncoder()
{
    av_log(NULL, AV_LOG_WARNING, "[AudioEncoder::~AudioEncoder] -- line:%d\n", __LINE__);
    if (AudioCodecCtx_ != nullptr)
    {
        avcodec_free_context(&AudioCodecCtx_); // 内部已经把 AudioCodecCtx_ 置空
    }
}

bool AudioEncoder::InitAAC(int channels, int SampleRate, int BitRate)
{
    channels_ = channels;
    SampleRate_ = SampleRate;
    BitRate_ = BitRate;

    const AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_AAC);
    AudioCodecCtx_ = avcodec_alloc_context3(encoder);
    if (encoder == nullptr || AudioCodecCtx_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[AudioEncoder::InitAAC] AV_CODEC_ID_AAC codec find encoder or codec alloc context error -- line:%d\n", __LINE__);
        return false;
    }

    AudioCodecCtx_->flags2 |= AV_CODEC_FLAG_GLOBAL_HEADER;
    AudioCodecCtx_->bit_rate = BitRate_;
    AudioCodecCtx_->sample_rate = SampleRate_;
    AudioCodecCtx_->sample_fmt = AV_SAMPLE_FMT_FLTP;
    AudioCodecCtx_->channels = channels_;
    AudioCodecCtx_->channel_layout = av_get_default_channel_layout(AudioCodecCtx_->channels);

    int ret = avcodec_open2(AudioCodecCtx_, NULL, NULL);
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[AudioEncoder::InitAAC] av codec open error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }
    av_log(NULL, AV_LOG_INFO, "[AudioEncoder::InitAAC] success -- line:%d\n", __LINE__);
    return true;
}

AVPacket *AudioEncoder::Encode(AVFrame *frame, int StreamIndex, int64_t pts, int64_t TimeBase)
{
    if (AudioCodecCtx_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[AudioEncoder::Encode] AudioCodecCtx_ == nullptr -- line:%d\n", __LINE__);
        return nullptr;
    }
    pts = av_rescale_q(pts, AVRational{1, (int)TimeBase}, AudioCodecCtx_->time_base);
    if (frame != nullptr)
    {
        frame->pts = pts;
    }
    int ret = avcodec_send_frame(AudioCodecCtx_, frame);
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[AudioEncoder::Encode] av codec send frame error:%s-- line:%d\n", ErrorString_.c_str(), __LINE__);
        return nullptr;
    }

    AVPacket *packet = av_packet_alloc();
    ret = avcodec_receive_packet(AudioCodecCtx_, packet);
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[AudioEncoder::Encode] av codec receive packet error:%s-- line:%d\n", ErrorString_.c_str(), __LINE__);
        if (packet != nullptr)
        {
            av_packet_free(&packet);
        }
        return nullptr;
    }
    packet->stream_index = StreamIndex;
    return packet;
}

bool AudioEncoder::Encode(AVFrame *frame, int StreamIndex, int64_t pts, int64_t TimeBase, vector<AVPacket *> &packets)
{
    if (AudioCodecCtx_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[AudioEncoder::Encode] AudioCodecCtx_ == nullptr -- line:%d\n", __LINE__);
        return false;
    }
    pts = av_rescale_q(pts, AVRational{1, (int)TimeBase}, AudioCodecCtx_->time_base);
    if (frame != nullptr)
    {
        frame->pts = pts;
    }
    int ret = avcodec_send_frame(AudioCodecCtx_, frame);
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[AudioEncoder::Encode] av codec send frame error:%s-- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }

    while (true)
    {
        AVPacket *packet = av_packet_alloc();
        ret = avcodec_receive_packet(AudioCodecCtx_, packet);
        packet->stream_index = StreamIndex;
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
        {
            av_packet_free(&packet);
            break;
        }
        else if (ret < 0)
        {
            SetErrorString(ret);
            av_log(NULL, AV_LOG_ERROR, "[AudioEncoder::Encode] av codec receive packet error:%s-- line:%d\n", ErrorString_.c_str(), __LINE__);
            if (packet != nullptr)
            {
                av_packet_free(&packet);
            }
            return false;
        }
        packets.push_back(packet);
    }
    return true;
}
int AudioEncoder::GetFrameSize() // 获取一帧数据 每个通道需要多少个采样点
{
    if (AudioCodecCtx_ != nullptr)
    {
        return AudioCodecCtx_->frame_size;
    }
    return -1;
}

int AudioEncoder::GetSampleFormat() // 编码器需要的采样格式
{
    if (AudioCodecCtx_ != nullptr)
    {
        return AudioCodecCtx_->sample_fmt;
    }
    return -1;
}

AVCodecContext *AudioEncoder::GetCodecContext()
{
    return AudioCodecCtx_;
}

int AudioEncoder::GetChannels()
{
    if (AudioCodecCtx_ != nullptr)
    {
        return AudioCodecCtx_->channels; 
    }
    return -1;
}
int AudioEncoder::GetSampleRate()
{
    if (AudioCodecCtx_ != nullptr)
    {
        return AudioCodecCtx_->sample_fmt;
    }
    return -1;
}

void AudioEncoder::SetErrorString(int ret)
{
    ErrorString_.clear();
    char errbuf[1024] = {0};
    av_strerror(ret, errbuf, sizeof(errbuf) - 1);
    ErrorString_ = errbuf;
}

H264 视频编码器封装

cpp 复制代码

#include "global.h"
using namespace std;

class VideoEncoder
{
public:
    VideoEncoder();
    ~VideoEncoder();

    bool InitH264(int width, int height, int fps, int BitRate);

    AVPacket *Encode(uint8_t *yuvData, int yuvSize, int StreamIndex, int64_t pts, int64_t TimeBase);
    bool Encode(uint8_t *yuvData, int yuvSize, int StreamIndex, int64_t pts, int64_t TimeBase, vector<AVPacket *> &packets);

    AVCodecContext *GetCodecContext();

protected:
    void SetErrorString(int ret);

private:
    int width_;
    int height_;
    int fps_;

    int BitRate_;
    int64_t pts_;
    AVCodecContext *VideoCodecCtx_;

    AVFrame *frame_;
    AVDictionary *dict_;

    string ErrorString_;
};

cpp 复制代码

#include "VideoEncoder.h"

VideoEncoder::VideoEncoder()
{
    width_ = -1;
    height_ = -1;
    fps_ = -1;

    BitRate_ = -1;
    int64_t pts_ = -1;
    VideoCodecCtx_ = nullptr;

    frame_ = nullptr;
    dict_ = nullptr;

    ErrorString_ = "";
}

VideoEncoder::~VideoEncoder()
{
    av_log(NULL, AV_LOG_WARNING, "[VideoEncoder::~VideoEncoder] -- line:%d\n", __LINE__);
    if (VideoCodecCtx_ != nullptr)
    {
        avcodec_free_context(&VideoCodecCtx_);
    }
    if (frame_ != nullptr)
    {
        av_frame_free(&frame_);
    }
    if (dict_ != nullptr)
    {
        av_dict_free(&dict_);
    }
}

bool VideoEncoder::InitH264(int width, int height, int fps, int BitRate)
{
    width_ = width;
    height_ = height;
    fps_ = fps;
    BitRate_ = BitRate;

    const AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
    VideoCodecCtx_ = avcodec_alloc_context3(encoder);
    if (encoder == nullptr || VideoCodecCtx_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::InitH264] AV_CODEC_ID_H264 codec find encoder or codec alloc context error -- line:%d\n", __LINE__);
        return false;
    }

    VideoCodecCtx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    VideoCodecCtx_->bit_rate = BitRate_;
    VideoCodecCtx_->width = width_;
    VideoCodecCtx_->height = height_;
    VideoCodecCtx_->framerate = {fps_, 1};
    VideoCodecCtx_->time_base = {1, 1000000}; // 单位为微妙
    VideoCodecCtx_->gop_size = fps_;
    VideoCodecCtx_->max_b_frames = 0;
    VideoCodecCtx_->pix_fmt = AV_PIX_FMT_YUV420P;

    av_opt_set(VideoCodecCtx_->priv_data, "preset", "veryslow", 0);
    av_opt_set(VideoCodecCtx_->priv_data, "profile", "high", 0);
    av_dict_set(&dict_, "tune", "film", 0);  

    int ret = avcodec_open2(VideoCodecCtx_, NULL, &dict_);
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::InitH264] av codec open error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }

    frame_ = av_frame_alloc();
    if (frame_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::InitH264] av frame alloc:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }

    frame_->width = width_;
    frame_->height = height_;
    frame_->format = VideoCodecCtx_->pix_fmt;

    av_log(NULL, AV_LOG_INFO, "[VideoEncoder::InitH264] success -- line:%d\n", __LINE__);
    return true;
}

AVPacket *VideoEncoder::Encode(uint8_t *yuvData, int yuvSize, int StreamIndex, int64_t pts, int64_t TimeBase)
{
    if (VideoCodecCtx_ == nullptr)
    {
        av_log(NULL, AV_LOG_INFO, "[VideoEncoder::Encode] VideoCodecCtx_ == nullptr -- line:%d\n", __LINE__);
        return nullptr;
    }
    int ret;
    pts = av_rescale_q(pts, AVRational{1, (int)TimeBase}, VideoCodecCtx_->time_base);
    frame_->pts = pts;
    if (yuvData != nullptr)
    {
        size_t SrcReqByteSize = av_image_fill_arrays(frame_->data, frame_->linesize, yuvData, (AVPixelFormat)frame_->format, frame_->width, frame_->height, 1);
        if (SrcReqByteSize != yuvSize)
        {
            av_log(NULL, AV_LOG_INFO, "[VideoEncoder::Encode] The number of bytes required:%ld != yuvSize:%d -- line:%d\n", SrcReqByteSize, yuvSize, __LINE__);
            return nullptr;
        }
        ret = avcodec_send_frame(VideoCodecCtx_, frame_);
    }
    else
    {
        ret = avcodec_send_frame(VideoCodecCtx_, NULL);
    }
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::Encode] avcodec send frame error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return nullptr;
    }

    // 这边要循环读取 没写
    AVPacket *packet = av_packet_alloc();
    ret = avcodec_receive_packet(VideoCodecCtx_, packet);
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::Encode] avcodec receive packet error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        if (packet != nullptr)
        {
            av_packet_free(&packet);
        }
        return nullptr;
    }
    packet->stream_index = StreamIndex;
    return packet;
}

bool VideoEncoder::Encode(uint8_t *yuvData, int yuvSize, int StreamIndex, int64_t pts, int64_t TimeBase, vector<AVPacket *> &packets)
{
    if (VideoCodecCtx_ == nullptr)
    {
        av_log(NULL, AV_LOG_INFO, "[VideoEncoder::Encode] VideoCodecCtx_ == nullptr -- line:%d\n", __LINE__);
        return false;
    }
    int ret;
    /** 这行代码的作用是将时间戳 pts 从一个时间基准（TimeBase）转换到另一个时间基准（VideoCodecCtx_->time_base）。
     * 具体来说，av_rescale_q 是 FFmpeg 库中的一个函数，用于处理时间戳和帧数的转换。
     * 时间基准转换: 不同的编码或处理流程可能使用不同的时间基准。为了确保视频帧的时间戳在整个处理链中一致，必须进行转换。
     * 正确的播放顺序: 如果时间戳不正确，视频可能会出现不同步、跳帧或播放顺序错误等问题。
     * 兼容性: 在处理多种视频格式或编解码时，时间基准的统一是必要的，以确保兼容性和正确性。
     * 调用 av_rescale_q 函数可以确保在处理视频数据时，时间戳在不同的时间基准之间正确转换，从而保证视频的播放和处理效果
     **/
    pts /*新的时间戳*/ = av_rescale_q(pts /*要转换的源时间戳*/, AVRational{1, (int)TimeBase} /*源时间基准（时间分数）*/, VideoCodecCtx_->time_base /*目标时间基准*/);
    frame_->pts = pts;

    if (yuvData != nullptr)
    {
        // 填充图像数据的数组 设置图像的 data 和 linesize 数组
        size_t SrcReqByteSize = av_image_fill_arrays(frame_->data, frame_->linesize, yuvData, (AVPixelFormat)frame_->format, frame_->width, frame_->height, 1);
        if (SrcReqByteSize != yuvSize)
        {
            av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::Encode] frame_->width:%d frame_->height:%d (AVPixelFormat)frame_->format:%d -- line:%d\n", frame_->width, frame_->height, (AVPixelFormat)frame_->format, __LINE__);
            av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::Encode] The number of bytes required:%ld != yuvSize:%d -- line:%d\n", SrcReqByteSize, yuvSize, __LINE__);
            return false;
        }
        // 将帧（AVFrame）发送到编码器，以便进行编码。
        ret = avcodec_send_frame(VideoCodecCtx_, frame_);
    }
    else
    {
        ret = avcodec_send_frame(VideoCodecCtx_, NULL);
    }
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::Encode] avcodec send frame error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }

    while (true)
    {
        AVPacket *packet = av_packet_alloc();
        ret = avcodec_receive_packet(VideoCodecCtx_, packet); // 处理完，打包到packet里
        packet->stream_index = StreamIndex;
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
        {
            av_packet_free(&packet);
            break;
        }
        else if (ret < 0)
        {
            SetErrorString(ret);
            av_log(NULL, AV_LOG_ERROR, "[VideoEncoder::Encode] avcodec receive packet error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
            av_packet_free(&packet);
            return false;
        }
        packets.push_back(packet);
    }

    return true;
}

AVCodecContext *VideoEncoder::GetCodecContext()
{
    return VideoCodecCtx_;
}

void VideoEncoder::SetErrorString(int ret)
{
    ErrorString_.clear();
    char errbuf[1024] = {0};
    av_strerror(ret, errbuf, sizeof(errbuf) - 1);
    ErrorString_ = errbuf;
}

封装 MP4 封装器

cpp 复制代码

#include "global.h"
using namespace std;

class Muxer
{
public:
    Muxer();
    ~Muxer();

    // 初始化 输出文件
    bool Init(const char *url);

    // 创建流
    bool AddStream(AVCodecContext *CodecCtx);

    /** 写流
     * avformat_write_header@
     * avformat_write_frame
     * avformat_write_trailer
     */
    bool SendHeader();
    bool SendPacket(AVPacket *packet);
    bool SendTrailer();

    bool Open(); // avio open

    int GetAudioStreamIndex();
    int GetVideoStreamIndex();

protected:
    void SetErrorString(int ret);

private:
    AVFormatContext *FmtCtx_;
    string url_;
    string ErrorString_;

    // 编码器上下文
    AVCodecContext *AudioCodecCtx_;
    AVStream *AudioStream_;

    AVCodecContext *VideoCodecCtx_;
    AVStream *VideoStream_;

    int AudioIndex_;
    int VideoIndex_;
};

cpp 复制代码

#include "muxer.h"

Muxer::Muxer()
{
    FmtCtx_ = nullptr;

    ErrorString_ = "";
    url_ = "";

    // 编码器上下文
    AudioCodecCtx_ = nullptr;
    AudioStream_ = nullptr;

    VideoCodecCtx_ = nullptr;
    VideoStream_ = nullptr;

    AudioIndex_ = -1;
    VideoIndex_ = -1;
}

// 资源释放
Muxer::~Muxer()
{
    av_log(NULL, AV_LOG_WARNING, "[Muxer::~Muxer] -- line:%d\n", __LINE__);
    if (FmtCtx_ != nullptr)
    {
        avformat_close_input(&FmtCtx_);
    }

    AudioCodecCtx_ = nullptr;
    AudioStream_ = nullptr;

    AudioIndex_ = -1;

    VideoCodecCtx_ = nullptr;
    VideoStream_ = nullptr;
}

// 初始化 输出文件
bool Muxer::Init(const char *url)
{
    int ret = avformat_alloc_output_context2(&FmtCtx_, NULL, NULL, url);
    if (ret < 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[Muxer::Init] avformat alloc output context error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }
    url_ = url;
    av_log(NULL, AV_LOG_DEBUG, "[Muxer::Init] success -- line:%d\n", __LINE__);
    return true;
}

// 创建流
bool Muxer::AddStream(AVCodecContext *CodecCtx)
{
    if (FmtCtx_ == nullptr || CodecCtx == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[Muxer::AddStream] FmtCtx_ == nullptr || CodecCtx == nullptr -- line:%d\n", __LINE__);
        return false;
    }

    // avformat_new_stream 在 AVFormatContext 中创建 Stream 通道
    AVStream *stream = avformat_new_stream(FmtCtx_, NULL);
    if (stream == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, " avformat new stream failed -- line:%d\n", __LINE__);
        return false;
    }

    // 重编码器上下文复制
    avcodec_parameters_from_context(stream->codecpar, CodecCtx);

    av_log(NULL, AV_LOG_INFO, "[Muxer::AddStream] dump format -- line:%d\n", __LINE__);
    av_dump_format(FmtCtx_, 0, url_.c_str(), 1);

    // 判断当时是音频还是视频
    if (CodecCtx->codec_type == AVMEDIA_TYPE_AUDIO)
    {
        AudioCodecCtx_ = CodecCtx;
        AudioStream_ = stream;
        AudioIndex_ = stream->index;
    }
    else if (CodecCtx->codec_type == AVMEDIA_TYPE_VIDEO)
    {
        VideoCodecCtx_ = CodecCtx;
        VideoStream_ = stream;
        VideoIndex_ = stream->index;
    }
    return true;
}

// 写流
bool Muxer::SendHeader()
{
    if (FmtCtx_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[Muxer::SendHeader] FmtCtx_ == nullptr -- line:%d\n", __LINE__);
        return false;
    }
    int ret = avformat_write_header(FmtCtx_, NULL);
    if (ret < 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[Muxer::SendHeader] format write header error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }
    return true;
}

bool Muxer::SendPacket(AVPacket *packet)
{
    // 编码完成已经打包好的packet
    if (packet == nullptr || packet->size <= 0 || packet->data == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[Muxer::SendPacket] packet == nullptr || packet->size <= 0 || packet->data == nullptr -- line:%d\n", __LINE__);
        if (packet != nullptr)
        {
            av_packet_free(&packet);
        }
        return false;
    }

    av_log(NULL, AV_LOG_DEBUG, "[Muxer::SendPacket] packet->stream_index:%d, pts:%ld -- line:%d\n", packet->stream_index, packet->pts, __LINE__);

    AVRational SrcTimeBase; // 编码后的包
    AVRational DstTimeBase; // mp4 输出文件对应的 time base

    if (VideoStream_ && VideoCodecCtx_ && packet->stream_index == VideoIndex_)
    {
        SrcTimeBase = VideoCodecCtx_->time_base; // 编码后的包
        DstTimeBase = VideoStream_->time_base;   // mp4 输出文件对应的time base
    }
    else if (AudioStream_ && AudioCodecCtx_ && packet->stream_index == AudioIndex_)
    {
        SrcTimeBase = AudioCodecCtx_->time_base; // 编码后的包
        DstTimeBase = AudioStream_->time_base;   // mp4 输出文件对应的time base
    }

    // 时间基转换
    packet->pts = av_rescale_q(packet->pts, SrcTimeBase, DstTimeBase);
    packet->dts = av_rescale_q(packet->dts, SrcTimeBase, DstTimeBase);
    packet->duration = av_rescale_q(packet->duration, SrcTimeBase, DstTimeBase);

    // 不是立即写入文件，内部有缓存，主要是对pts进行排序
    int ret = av_interleaved_write_frame(FmtCtx_, packet);
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[Muxer::SendPacket] interleaved write frame error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        if (packet != nullptr)
        {
            av_packet_free(&packet);
        }
        return false;
    }
    av_packet_free(&packet);
    return true;
}

bool Muxer::SendTrailer()
{
    if (FmtCtx_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[Muxer::SendTrailer] FmtCtx_ == nullptr -- line:%d\n", __LINE__);
        return false;
    }
    int ret = av_write_trailer(FmtCtx_);
    if (ret != 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[Muxer::SendTrailer] interleaved write frame error:%s -- line:%d\n", ErrorString_.c_str(), __LINE__);
        return false;
    }
    return true;
}

bool Muxer::Open() // avio open
{
    int ret = avio_open(&FmtCtx_->pb, url_.c_str(), AVIO_FLAG_WRITE);
    if (ret < 0)
    {
        SetErrorString(ret);
        av_log(NULL, AV_LOG_ERROR, "[Muxer::Open] av io open error:%s url:%s -- line:%d\n", ErrorString_.c_str(), url_.c_str(), __LINE__);
        return false;
    }
    return true;
}

int Muxer::GetAudioStreamIndex()
{
    return AudioIndex_;
}

int Muxer::GetVideoStreamIndex()
{
    return VideoIndex_;
}

void Muxer::SetErrorString(int ret)
{
    ErrorString_.clear();
    char errbuf[1024] = {0};
    av_strerror(ret, errbuf, sizeof(errbuf) - 1);
    ErrorString_ = errbuf;
}

封装流程控制器

cpp 复制代码

#include "global.h"
#include "AudioEncoder.h"
#include "AudioResampler.h"
#include "VideoEncoder.h"

#include "muxer.h"

using namespace std;

class ManagementControl
{

public:
    ManagementControl();
    ~ManagementControl();

    // 初始化控制器 - 打开输入文件 + 初始化编码器 + 确定帧间隔
    bool InitControl();
    // 分配 Audio、Video 的帧buffer
    bool AllocFrameBuffer();
    // 新建流
    bool CreateStream();
    // 打开输出文件IO
    bool OpenOutputFileIO();
    // 写包 - header + packet + tailer
    bool WritingPackage();

private:
    string InYuvName_;
    string InPcmName_;
    string OutMp4Name_;

    FILE *IN_YUV_FD_;
    FILE *IN_PCM_FD_;

    bool VideoEnd_;
    bool AudioEnd_;

    double AudioFrameDuration_; // 帧间隔
    double VideoFrameDuration_;

    int VideoIndex_;
    int AudioIndex_;

    double VideoPts_;
    double AudioPts_;

    int64_t VideoTimeBase_;
    int64_t AudioTimeBase_;

    int VideoWidth_;
    int VideoHeight_;
    int VideoFps_;
    int VideoBitRate_;

    int AudioChannels_;
    int AudioSampleRate_;
    int AudioSampleFormat_;
    int AudioBitRate_;

    size_t AudioFrameSize_;
    size_t VideoFrameSize_;

    Muxer Mp4Muxer_;
    VideoEncoder VideoEncoder_;
    AudioEncoder AudioEncoder_;
    AudioResampler AudioResampler_;

    vector<AVPacket *> packets_;

    uint8_t *VideoFrameBuf_;
    uint8_t *AudioFrameBuf_; 
};

cpp 复制代码

#include "ManagementControl.h"

ManagementControl::ManagementControl()
{
    InYuvName_ "/*YUV 路径*/";
    InPcmName_ = "/*PCM 路径*/";
    OutMp4Name_ =  "/*MP4 文件输出路径*/";

    IN_YUV_FD_ = nullptr;
    IN_PCM_FD_ = nullptr;
    VideoFrameBuf_ = nullptr;
    AudioFrameBuf_ = nullptr;

    VideoWidth_ = YUV_WIDTH;
    VideoHeight_ = YUV_HEIGHT;
    VideoFps_ = YUV_FPS;
    VideoBitRate_ = VIDEO_BIT_RATE;

    AudioChannels_ = PCM_CHANNELS;
    AudioSampleRate_ = PCM_SAMPLE_RATE;
    AudioSampleFormat_ = PCM_SAMPLE_FROMAT;
    AudioBitRate_ = AUDIO_BIT_RATE;

    AudioEnd_ = false;
    VideoEnd_ = false;

    AudioIndex_ = -1;
    VideoIndex_ = -1;

    AudioTimeBase_ = AUDIO_TIME_BASE;
    VideoTimeBase_ = VIDEO_TIME_BASE;
    AudioPts_ = 0;
    VideoPts_ = 0;

    AudioFrameDuration_ = -1;
    VideoFrameDuration_ = -1;

    AudioFrameSize_ = -1;
    VideoFrameSize_ = -1;
}

ManagementControl::~ManagementControl()
{
    av_log(NULL, AV_LOG_WARNING, "[ManagementControl::~ManagementControl] -- line:%d\n", __LINE__);
    if (VideoFrameBuf_ != nullptr)
        free(VideoFrameBuf_);
    if (AudioFrameBuf_ != nullptr)
        free(AudioFrameBuf_);
    if (IN_YUV_FD_ != nullptr)
        fclose(IN_YUV_FD_);
    if (IN_PCM_FD_ != nullptr)
        fclose(IN_PCM_FD_);
}

bool ManagementControl::InitControl()
{
    IN_YUV_FD_ = fopen(InYuvName_.c_str(), "rb");
    IN_PCM_FD_ = fopen(InPcmName_.c_str(), "rb");
    if (IN_YUV_FD_ == nullptr || IN_PCM_FD_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::InitControl] InYuvName_:%s | IN_PCM_FD_ :%s open error -- line:%d\n", InYuvName_.c_str(), InPcmName_.c_str(), __LINE__);
        return false;
    }

    bool ret = Mp4Muxer_.Init(OutMp4Name_.c_str());
    if (ret == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::InitControl] Mp4Muxer_.Init error -- line:%d\n", __LINE__);
        return false;
    }

    ret = VideoEncoder_.InitH264(VideoWidth_, VideoHeight_, VideoFps_, VideoBitRate_);
    if (ret == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::InitControl] VideoEncoder_.InitH264 error -- line:%d\n", __LINE__);
        return false;
    }

    ret = AudioEncoder_.InitAAC(AudioChannels_, AudioSampleRate_, AudioBitRate_);
    if (ret == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::InitControl] AudioEncoder_.InitAAC error -- line:%d\n", __LINE__);
        return false;
    }

    ret = AudioResampler_.InitFromS16ToFLTP(AudioChannels_, AudioSampleRate_, AudioEncoder_.GetChannels(), AudioEncoder_.GetSampleRate());
    if (ret == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::InitControl] AudioResampler_.InitFromS16ToFLTP error -- line:%d\n", __LINE__);
        return false;
    }

    AudioFrameDuration_ = 1.0 * AudioEncoder_.GetFrameSize() / AudioSampleRate_ * AudioTimeBase_; // 帧间隔
    VideoFrameDuration_ = 1.0 / VideoFps_ * VideoTimeBase_;
    if (AudioFrameDuration_ <= 0 || VideoFrameDuration_ <= 0)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::InitControl] AudioFrameDuration_ <= 0 || VideoFrameDuration_ <= 0 -- line:%d\n", __LINE__);
        return false;
    }
    av_log(NULL, AV_LOG_INFO, "[ManagementControl::InitControl] VideoFrameDuration_: %lf || AudioFrameDuration_: %lf -- line:%d\n", VideoFrameDuration_, AudioFrameDuration_, __LINE__);
    return true;
}

bool ManagementControl::AllocFrameBuffer()
{
    // 统计 Audio frame size
    // AudioFrameSize = 单个采样点占用的字节 * 通道数量 * 每个通道有多少个采样点
    AudioFrameSize_ = av_get_bytes_per_sample((AVSampleFormat)AudioSampleFormat_) * AudioChannels_ * AudioEncoder_.GetFrameSize();
    // 统计 Video frame size
    VideoFrameSize_ = (VideoWidth_ * VideoHeight_) /*Y*/ + (VideoWidth_ * VideoHeight_ / 4) /*U*/ + (VideoWidth_ * VideoHeight_ / 4) /*V*/;
    if (AudioFrameSize_ <= 0 || VideoFrameSize_ <= 0)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::AllocFrameBuffer] frame size error -- line:%d\n", __LINE__);
        return false;
    }
    av_log(NULL, AV_LOG_INFO, "[ManagementControl::AllocFrameBuffer] VideoFrameSize = %ld | AudioFrameSize = %ld -- line:%d\n", VideoFrameSize_, AudioFrameSize_, __LINE__);

    // 分配 frame buffer
    AudioFrameBuf_ = (uint8_t *)malloc(AudioFrameSize_);
    VideoFrameBuf_ = (uint8_t *)malloc(VideoFrameSize_);
    if (AudioFrameBuf_ == nullptr || VideoFrameBuf_ == nullptr)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::AllocFrameBuffer] AudioFrameBuf_ == nullptr || VideoFrameBuf_ == nullptr -- line:%d\n", __LINE__);
        return false;
    }

    return true;
}

bool ManagementControl::CreateStream()
{
    // 添加视频流
    bool ret = Mp4Muxer_.AddStream(VideoEncoder_.GetCodecContext());
    if (ret == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::CreateStreamAndOpenIO] AddStream Video failed -- line:%d\n", __LINE__);
        return false;
    }

    ret = Mp4Muxer_.AddStream(AudioEncoder_.GetCodecContext());
    if (ret == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::CreateStreamAndOpenIO] AddStream Audio failed -- line:%d\n", __LINE__);
        return false;
    }
    return true;
}

bool ManagementControl::OpenOutputFileIO()
{
    if (Mp4Muxer_.Open() == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::OpenOutputFileIO] Mp4Muxer_.Open failed -- line:%d\n", __LINE__);
        return false;
    }
    return true;
}

bool ManagementControl::WritingPackage()
{
    // 写 Header
    bool ret = Mp4Muxer_.SendHeader();
    if (ret == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[ManagementControl::WritingPackage] SendHeader failed -- line:%d\n", __LINE__);
        return false;
    }

    // 写 packet
    size_t ReadSize;
    int AudioIndex_ = Mp4Muxer_.GetAudioStreamIndex();
    int VideoIndex_ = Mp4Muxer_.GetVideoStreamIndex();
    av_log(NULL, AV_LOG_INFO, "[ManagementControl::WritingVideoFrameDuration_Package] Audio Index:%d | Video Index:%d -- line:%d\n", AudioIndex_, VideoIndex_, __LINE__);

    while (AudioEnd_ == false || VideoEnd_ == false) // 还有视频或者音频没有读完
    {
        av_log(NULL, AV_LOG_INFO, "[ManagementControl::WritingPackage] Audio pts:%lf | Video pts: %lf \n", AudioPts_ / 1000, VideoPts_ / 1000);

        if (VideoEnd_ == false && (AudioPts_ > VideoPts_ || AudioEnd_ == true))
        {
            ReadSize = fread(VideoFrameBuf_, 1, VideoFrameSize_, IN_YUV_FD_); // 每次读取一张图
            if (ReadSize < VideoFrameSize_)
            {
                VideoEnd_ = true;
                av_log(NULL, AV_LOG_INFO, "[ManagementControl::WritingPackage] fread yuv frame buffer finish -- line:%d\n", __LINE__);
            }
            AVPacket *packet = nullptr;
            if (VideoEnd_ == false)
            {
                ret = VideoEncoder_.Encode(VideoFrameBuf_, VideoFrameSize_, VideoIndex_, VideoPts_, VideoTimeBase_, packets_);
            }
            else
            {
                av_log(NULL, AV_LOG_WARNING, "[ManagementControl::WritingPackage] Video flush encoder -- line:%d\n", __LINE__);
                ret = VideoEncoder_.Encode(nullptr, 0, VideoIndex_, VideoPts_, VideoTimeBase_, packets_);
            }
            VideoPts_ += VideoFrameDuration_; // 叠加 pts

            if (ret == true)
            {
                for (int i = 0; i < packets_.size(); i++)
                {
                    ret = Mp4Muxer_.SendPacket(packets_[i]);
                }
            }
            packets_.clear();
        }
        else if (AudioEnd_ == false)
        {
            ReadSize = fread(AudioFrameBuf_, 1, AudioFrameSize_, IN_PCM_FD_);
            if (ReadSize < AudioFrameSize_) // 省的数据不够一帧了
            {
                AudioEnd_ = true;
                av_log(NULL, AV_LOG_INFO, "[ManagementControl::WritingPackage] fread pcm frame buf finish -- line:%d\n", __LINE__);
            }

            if (AudioEnd_ == false)
            {
                AVFrame *fltpFrame = AllocFltpPcmFrame(AudioChannels_, AudioEncoder_.GetFrameSize());
                ret = AudioResampler_.ResampleFromS16ToFLTP(AudioFrameBuf_, fltpFrame);
                if (ret < 0)
                {
                    av_log(NULL, AV_LOG_ERROR, "[ManagementControl::WritingPackage] ResampleFromS16ToFLTP error!\n");
                }
                ret = AudioEncoder_.Encode(fltpFrame, AudioIndex_, AudioPts_, AudioTimeBase_, packets_);
                FreePcmFrame(fltpFrame);
            }
            else
            {
                av_log(NULL, AV_LOG_WARNING, "[ManagementControl::WritingPackage] Audio flush encoder -- line:%d\n", __LINE__);
                ret = AudioEncoder_.Encode(nullptr, AudioIndex_, AudioPts_, AudioTimeBase_, packets_);
            }
            AudioPts_ += AudioFrameDuration_; // 叠加 pts
            if (ret >= 0)
            {
                for (int i = 0; i < packets_.size(); i++)
                {
                    ret = Mp4Muxer_.SendPacket(packets_[i]);
                }
            }
            packets_.clear();
        }
    }

    // 写 Trailer
    ret = Mp4Muxer_.SendTrailer();
    if (ret == false)
    {
        av_log(NULL, AV_LOG_INFO, "[ManagementControl::WritingPackage] mp4_muxer,SendTrailer failed -- line:%d\n", __LINE__);
    }

    av_log(NULL, AV_LOG_WARNING, "[ManagementControl::WritingPackage] wirte mp4 finish -- line:%d\n", __LINE__);
    return true;
}

代码测试

cpp 复制代码

#include "ManagementControl.h"

int main(int argc, char const *argv[])
{
    av_log_set_level(AV_LOG_DEBUG);
    ManagementControl EncapsulationMP4;
    if (EncapsulationMP4.InitControl() == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] EncapsulationMP4.InitControl error -- line:%d\n", __FUNCTION__, __LINE__);
        exit(-1);
    }

    if (EncapsulationMP4.AllocFrameBuffer() == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] EncapsulationMP4.AllocFrameBuffer error -- line:%d\n", __FUNCTION__, __LINE__);
        exit(-1);
    }

    if (EncapsulationMP4.CreateStream() == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] EncapsulationMP4.CreateStream error -- line:%d\n", __FUNCTION__, __LINE__);
        exit(-1);
    }
    if (EncapsulationMP4.OpenOutputFileIO() == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] EncapsulationMP4.OpenOutputFileIO error -- line:%d\n", __FUNCTION__, __LINE__);
        exit(-1);
    }
    if (EncapsulationMP4.WritingPackage() == false)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] EncapsulationMP4.WritingPackage error -- line:%d\n", __FUNCTION__, __LINE__);
        exit(-1);
    }
    return 0;
}