11 - FFmpeg - 编码 AAC

Planar 模式是 ffmpeg内部存储模式，我们实际使用的音频文件都是Packed模式的。

FFmpeq解码不同格式的音频输出的音频采样格式不是一样。

其中AAC解码输出的数据为浮点型的 AV_SAMPLE_FMT_FLTP 格式，MP3 解码输出的数据为 AV_SAMPLE_FMT_S16P 格式(使用的mp3文件为16位深)。

具体采样格式可以査看解码后的 AVframe 中的 format 成员或解码器的AVCodecContext中的sample_fmt成员。

Planar或者Packed模式直接影响到保存文件时写文件的操作，操作数据的时候一定要先检测音频采样格式。

方法1

cpp 复制代码

int encodeAudioInterface(AVCodecContext *encoderCtx, AVFrame *frame, AVPacket *packet, FILE *dest_fp)
{
    int ret = avcodec_send_frame(encoderCtx, frame);
    if (ret < 0)
    {
        av_log(NULL, AV_LOG_ERROR, "send frame to encoder failed:%s\n", av_err2str(ret));
        ret = -1;
    }
    while (ret >= 0)
    {
        ret = avcodec_receive_packet(encoderCtx, packet);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
        {
            av_log(NULL, AV_LOG_WARNING, "[encodeAudioInterface] -- AVERROR(EAGAIN) || AVERROR_EOF \n");
            return 0;
        }
        else if (ret < 0)
        {
            av_log(NULL, AV_LOG_ERROR, "encode frame failed:%s\n", av_err2str(ret));
            return -1;
        }
        fwrite(packet->data, 1, packet->size, dest_fp);
        av_packet_unref(packet);
    }
    return 0;
}

cpp 复制代码

int encodeAudio(const char *inFileName, const char *outFileName)
{
    int ret = 0;
    /*****************************************************************************/
    FILE *src_fp = fopen(inFileName, "rb");
    if (src_fp == NULL)
    {
        av_log(NULL, AV_LOG_ERROR, "open infile:%s failed!\n", inFileName);
        ret = -1;
        goto end;
    }
    FILE *dest_fp = fopen(outFileName, "wb+");
    if (dest_fp == NULL)
    {
        av_log(NULL, AV_LOG_ERROR, "open outfile:%s failed!\n", outFileName);
        ret = -1;
        goto end;
    }
    /*****************************************************************************/
    AVFrame *frame = av_frame_alloc();
    frame->sample_rate = 48 * 1000; // 采样率 - 48K
    frame->channels = 2;
    frame->channel_layout = AV_CH_LAYOUT_STEREO;
    frame->format = AV_SAMPLE_FMT_S16;
    frame->nb_samples = 1024;

    // libfdk_aac
    av_frame_get_buffer(frame, 0);
    AVCodec *encoder = avcodec_find_encoder_by_name("libfdk_aac");
    if (encoder == NULL)
    {
        av_log(NULL, AV_LOG_ERROR, "find encoder failed!\n");
        ret = -1;
        goto end;
    }
    AVCodecContext *encoderCtx = avcodec_alloc_context3(encoder);
    if (encoderCtx == NULL)
    {
        av_log(NULL, AV_LOG_ERROR, "alloc encoder context failed!\n");
        ret = -1;
        goto end;
    }
    encoderCtx->sample_fmt = frame->format;
    encoderCtx->sample_rate = frame->sample_rate;
    encoderCtx->channels = frame->channels;
    encoderCtx->channel_layout = frame->channel_layout;
    ret = avcodec_open2(encoderCtx, encoder, NULL);
    if (ret < 0)
    {
        av_log(NULL, AV_LOG_ERROR, "open encoder failed:%s\n", av_err2str(ret));
        goto end;
    }


    AVPacket packet;
    av_init_packet(&packet);
    while (1)
    {
        // packet L R L R
        // 2 * 2 * 1024 = 4096
        int readSize = fread(frame->data[0], 1, frame->linesize[0], src_fp);
        if (readSize == 0)
        {
            av_log(NULL, AV_LOG_INFO, "finish read infile!\n");
            break;
        }
        encodeAudioInterface(encoderCtx, frame, &packet, dest_fp);
    }
    encodeAudioInterface(encoderCtx, NULL, &packet, dest_fp);

end:
    if (frame)
    {
        av_frame_free(&frame);
    }
    if (encoderCtx)
    {
        avcodec_free_context(&encoderCtx);
        return ret;
    }
    if (src_fp)
    {
        fclose(src_fp);
    }
    if (dest_fp)
    {
        fclose(dest_fp);
    }
}

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

方法 2

cpp 复制代码

int CheckSampleRate(const AVCodec *encoder, const int SampleRate)
{
    // encoder->supported_samplerates 支持的音频采样数组，如果未知则为NULL，数组以0结尾
    const int *SupportSampleRate = encoder->supported_samplerates;
    while (*SupportSampleRate != 0)
    {
        av_log(NULL, AV_LOG_DEBUG, "[%s] encoder->name: %s, support %d hz -- line:%d\n", __FUNCTION__, encoder->name, *SupportSampleRate, __LINE__); // 受 frame->format等参数影响
        if (*SupportSampleRate == SampleRate)
        {
            av_log(NULL, AV_LOG_INFO, "[%s] This sampling rate is supported by the encoder %d hz -- line:%d\n", __FUNCTION__, *SupportSampleRate, __LINE__); // 受 frame->format等参数影响
            return 1;
        }
        SupportSampleRate++;
    }
    return 0;
}

cpp 复制代码

int CheckChannelLayout(const AVCodec *encoder, const uint64_t ChannelLayout)
{
    // 支持通道布局的数组，如果未知则为NULL。数组以0结尾
    const uint64_t *SupportsChannelLayout = encoder->channel_layouts;
    if (!SupportsChannelLayout)
    { // 不是每个AVCodec都给出支持的channel_layout
        av_log(NULL, AV_LOG_WARNING, "[%s] the encoder %s no set channel_layouts -- line:%d\n", __FUNCTION__, encoder->name, __LINE__);
        return 1;
    }
    while (*SupportsChannelLayout != 0)
    {
        av_log(NULL, AV_LOG_DEBUG, "[%s] encoder->name: %s, support channel_layout %ld -- line:%d\n", __FUNCTION__, encoder->name, *SupportsChannelLayout, __LINE__); // 受 frame->format等参数影响
        if (*SupportsChannelLayout == ChannelLayout)
        {
            av_log(NULL, AV_LOG_INFO, "[%s] This channel layout is supported by the encoder %d -- line:%d\n", __FUNCTION__, *SupportsChannelLayout, __LINE__); // 受 frame->format等参数影响
            return 1;
        }
        *SupportsChannelLayout++;
    }
    return 0;
}

cpp 复制代码

int CheckSampleFmt(const AVCodec *codecCtx, enum AVSampleFormat SampleFmt)
{ // 数组支持的样例格式，如果未知则为NULL，数组以-1结尾
    const enum AVSampleFormat *SampleFmts = codecCtx->sample_fmts;
    while (*SampleFmts != AV_SAMPLE_FMT_NONE) // 通过 AV_SAMPLE_FMT_NONE 作为结束
    {
        if (*SampleFmts == SampleFmt)
        {
            return 1;
        }
        *SampleFmts++;
    }
    return 0;
}

cpp 复制代码

void GetAdtsHeader(AVCodecContext *codecCtx, uint8_t *adtsHeader, int aacLength)
{
    uint8_t freqIdx = 0; // 0: 96000HZ 3:48000Hz 4:44100Hz
    switch (codecCtx->sample_rate)
    {
    case 96000:
        freqIdx = 0;
        break;
    case 88200:
        freqIdx = 1;
        break;
    case 64000:
        freqIdx = 2;
        break;
    case 48000:
        freqIdx = 3;
        break;
    case 44100:
        freqIdx = 4;
        break;
    case 32000:
        freqIdx = 5;
        break;
    case 24000:
        freqIdx = 6;
        break;
    case 22050:
        freqIdx = 7;
        break;
    case 16000:
        freqIdx = 8;
        break;
    case 12000:
        freqIdx = 9;
        break;
    case 11025:
        freqIdx = 10;
        break;
    case 8000:
        freqIdx = 11;
        break;
    case 7350:
        freqIdx = 12;
        break;
    default:
        freqIdx = 4;
        break;
    }
    uint8_t chanCfg = codecCtx->channels;
    uint32_t frameLength = aacLength + 7;
    adtsHeader[0] = 0xff;
    adtsHeader[1] = 0xF1;
    adtsHeader[2] = ((codecCtx->profile) << 6) + (freqIdx << 2) + (chanCfg >> 2);
    adtsHeader[3] = (((chanCfg & 3) << 6) + (frameLength >> 11));
    adtsHeader[4] = ((frameLength & 0x7FF) >> 3);
    adtsHeader[5] = (((frameLength & 7) << 5) + 0x1F);
    adtsHeader[6] = 0xFC;
}

cpp 复制代码

int decodeAudioInterface(AVCodecContext *codecCtx, AVFrame *frame, AVPacket *pkt, FILE *output)
{
    int ret = avcodec_send_frame(codecCtx, frame);
    if (ret < 0)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] sending the frame to the encoder error! -- line:%d\n", __FUNCTION__, __LINE__);
        return -1;
    }

    // 编码和解码都是一样的，都是send 1次，然后 receive 多次，直到AVERROR(EAGAIN)或者AVERROR_EOF
    while (ret >= 0)
    {
        ret = avcodec_receive_packet(codecCtx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
        {
            return 0;
        }
        else if (ret < 0)
        {
            av_log(NULL, AV_LOG_ERROR, "[%s] encoding audio frameerror! -- line:%d\n", __FUNCTION__, __LINE__);
            return -1;
        }
        uint8_t aacHeader[7];
        GetAdtsHeader(codecCtx, aacHeader, pkt->size);
        size_t len = fwrite(aacHeader, 1, 7, output);
        if (len != 7)
        {
            av_log(NULL, AV_LOG_ERROR, "[%s] fwrite aac_header failed! -- line:%d\n", __FUNCTION__, __LINE__);
            return -1;
        }
        len = fwrite(pkt->data, 1, pkt->size, output);
        if (len != pkt->size)
        {
            av_log(NULL, AV_LOG_ERROR, "[%s] fwrite aac data failed! -- line:%d\n", __FUNCTION__, __LINE__);
            return -1;
        }
    }
    return -1;
}

cpp 复制代码

void f32leConvert2fltp(float *f32le, float *fltp, int nb_samples)
{
    float *fltp_l = fltp;              // 左通道
    float *fltp_r = fltp + nb_samples; // 右声道
    for (int i = 0; i < nb_samples; i++)
    {
        fltp_l[i] = f32le[i * 2];     // 1 0 - 2 3
        fltp_r[i] = f32le[i * 2 + 1]; // 可以尝试注释左声道或者右声道听听声音
    }
}

cpp 复制代码

int decodeAudio(const char *pcmFileName, const char *aacFileName, const char *encoderName)
{
    FILE *pcmfile = fopen(pcmFileName, "rb");
    FILE *aacfile = fopen(aacFileName, "wb");
    if (pcmfile == NULL || aacfile == NULL)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] open %s or %s file failed -- line:%d \n", __FUNCTION__, aacFileName, pcmFileName, __LINE__);
        goto _end;
    }

    const AVCodec *encoder = NULL;
    if (encoderName != NULL && (strcmp(encoderName, "libfdk_aac") == 0 || strcmp(encoderName, "aac") == 0)) // encoderName 如果制定了编码器
    {
        encoder = avcodec_find_encoder_by_name(encoderName); // 设置为指定编码器
        av_log(NULL, AV_LOG_INFO, "[%s] force codec name: %s -- line:%d\n", __FUNCTION__, encoderName, __LINE__);
    }
    else
    {
        encoder = avcodec_find_encoder(AV_CODEC_ID_AAC);
        av_log(NULL, AV_LOG_INFO, "[%s] default codec name: %s -- line:%d\n", __FUNCTION__, "aac", __LINE__);
    }
    if (encoder == NULL)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] Codec found error! -- line:%d\n", __FUNCTION__, __LINE__);
        goto _end;
    }

    // 创建编码器上下文
    AVCodecContext *codecCtx = avcodec_alloc_context3(encoder);
    if (codecCtx == NULL)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] Conld not allocate audio codec context -- line:%d\n", __FUNCTION__, __LINE__);
        goto _end;
    }

    codecCtx->codec_id = AV_CODEC_ID_AAC;
    codecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
    codecCtx->bit_rate = 128 * 1024;
    codecCtx->channel_layout = AV_CH_LAYOUT_STEREO;
    codecCtx->sample_rate = 48000;
    codecCtx->channels = av_get_channel_layout_nb_channels(codecCtx->channel_layout);
    codecCtx->profile = FF_PROFILE_AAC_LOW;

    if (strcmp(encoder->name, "libfdk_aac") == 0)
    {
        codecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
    }
    else
    {
        codecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;
    }
    // 检测采样格式的支持情况
    if (!CheckSampleFmt(encoder, codecCtx->sample_fmt))
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] Encoder does not support sample format %s -- line:%d\n", __FUNCTION__, av_get_sample_fmt_name(codecCtx->sample_fmt), __LINE__);
        goto _end;
    }
    if (!CheckSampleRate(encoder, codecCtx->sample_rate))
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] Encoder does not support sample rate codecCtx->sample_rate:%d -- line:%d\n", __FUNCTION__, codecCtx->sample_rate, __LINE__);
        goto _end;
    }
    if (!CheckChannelLayout(encoder, codecCtx->channel_layout))
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] Encoder does not support sample channel_layout %lu -- line:%d\n", __FUNCTION__, codecCtx->channel_layout, __LINE__);
        goto _end;
    }

    av_log(NULL, AV_LOG_INFO, "\n[%s] ------------------------ Audio encode config ------------------------ line:%d \n", __FUNCTION__, __LINE__);
    av_log(NULL, AV_LOG_INFO, "[%s] codecCtx->bit_rate: %ld kbps -- line:%d\n", __FUNCTION__, codecCtx->bit_rate / 1024, __LINE__);
    av_log(NULL, AV_LOG_INFO, "[%s] codecCtx->sample_rate: %d -- line:%d\n", __FUNCTION__, codecCtx->sample_rate, __LINE__);
    av_log(NULL, AV_LOG_INFO, "[%s] codecCtx->sample_fmt: %s -- line:%d\n", __FUNCTION__, av_get_sample_fmt_name(codecCtx->sample_fmt), __LINE__);
    av_log(NULL, AV_LOG_INFO, "[%s] codecCtx->channels: %d -- line:%d\n", __FUNCTION__, codecCtx->channels, __LINE__);

    // frame_size 是在 av_coedc_open2后进行关联
    av_log(NULL, AV_LOG_INFO, "[%s] Before frame size %d -- line:%d\n", __FUNCTION__, codecCtx->frame_size, __LINE__);
    if (avcodec_open2(codecCtx, encoder, NULL) < 0)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] Could not open codec -- line:%d\n", __FUNCTION__, __LINE__);
        goto _end;
    }
    av_log(NULL, AV_LOG_INFO, "[%s] Once frame_size %d -- line:%d\n\n", __FUNCTION__, codecCtx->frame_size, __LINE__); // 决定每次送多少个采样点

    AVPacket *packet = av_packet_alloc();
    if (!packet)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] packet alloc error! -- line:%d \n", __FUNCTION__, __LINE__);
        goto _end;
    }

    AVFrame *frame = av_frame_alloc();
    if (!frame)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] Could not allocate audio frame -- line:%d\n", __FUNCTION__, __LINE__);
        goto _end;
    }

    frame->nb_samples = codecCtx->frame_size;
    frame->format = codecCtx->sample_fmt;
    frame->channel_layout = codecCtx->channel_layout;
    frame->channels = av_get_channel_layout_nb_channels(frame->channel_layout);

    av_log(NULL, AV_LOG_INFO, "[%s] frame nb_samples: %d -- line:%d\n", __FUNCTION__, frame->nb_samples, __LINE__);
    av_log(NULL, AV_LOG_INFO, "[%s] frame sample_fmt: %s -- line:%d\n", __FUNCTION__, av_get_sample_fmt_name(frame->format), __LINE__);
    av_log(NULL, AV_LOG_INFO, "[%s] frame channel_layout: %lu -- line:%d\n", __FUNCTION__, frame->channel_layout, __LINE__);

    // 为frame分配buffer
    int ret = av_frame_get_buffer(frame, 0);
    if (ret < 0)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] Could not allocate audio data buffers -- line:%d\n", __FUNCTION__, __LINE__);
        goto _end;
    }

    // 计算出每一帧的数据 单个采样点的字节 * 通道数目 * 每帧采样点的数量
    size_t FrameByteSize = av_get_bytes_per_sample(frame->format) * frame->channels * frame->nb_samples;
    av_log(NULL, AV_LOG_INFO, "[%s] frame_bytes %ld frame->channels %d frame->nb_samples %d -- line:%d\n", __FUNCTION__, FrameByteSize, frame->channels, frame->nb_samples, __LINE__);

    uint8_t *pcmBuf = (uint8_t *)malloc(FrameByteSize);
    uint8_t *pcmBufTemp = (uint8_t *)malloc(FrameByteSize);
    if (!pcmBuf || !pcmBufTemp)
    {
        av_log(NULL, AV_LOG_ERROR, "[%s] pcmBuf or pcmBufTemp malloc failed -- line:%d\n", __FUNCTION__, __LINE__);
        goto _end;
    }
    memset(pcmBuf, 0, FrameByteSize);
    memset(pcmBufTemp, 0, FrameByteSize);
    int64_t pts = 0;
    av_log(NULL, AV_LOG_INFO, "\n[%s] ------------------------ start enode ------------------------ line:%d \n", __FUNCTION__, __LINE__);

    while (1)
    {
        memset(pcmBuf, 0, FrameByteSize);
        size_t ReadByteSize = fread(pcmBuf, 1, FrameByteSize, pcmfile);
        if (ReadByteSize <= 0)
        {
            av_log(NULL, AV_LOG_INFO, "[%s] read file finish -- line:%d \n", __FUNCTION__, __LINE__);
            break;
        }

        /*确保该 frame 可写, 如果编码器内部保持了内存参数计数，则需要重新拷贝一个备份
        目的是新写入的数据和编码器保存的数据不能产生冲突*/
        ret = av_frame_make_writable(frame);
        if (ret != 0)
        {
            av_log(NULL, AV_LOG_ERROR, "[%s] av_frame_make_writable failed!!! -- line:%d\n", __FUNCTION__, __LINE__);
        }
        if (AV_SAMPLE_FMT_S16 == frame->format)
        {
            // 将读取到的PCM数据填充到frame去，但是要注意匹配格式，（planner | packet ）
            ret = av_samples_fill_arrays(frame->data, frame->linesize, pcmBuf, frame->channels, frame->nb_samples, frame->format, 0);
        }
        else
        {
            // 将读取到的PCM数据填充到frame去，但是要注意匹配格式，（planner | packet ）
            // 将本地的f32le packed 模式的数据转为 float palanner
            memset(pcmBufTemp, 0, FrameByteSize);
            f32leConvert2fltp((float *)pcmBuf, (float *)pcmBufTemp, frame->nb_samples);
            ret = av_samples_fill_arrays(frame->data, frame->linesize, pcmBufTemp, frame->channels, frame->nb_samples, frame->format, 0);
        }

        // 设置 pts
        pts += frame->nb_samples;
        frame->pts = pts; // 使用采样率作为pts的单位，具体换算成秒 pts*1/采样率
        ret = decodeAudioInterface(codecCtx, frame, packet, aacfile);
        if (ret < 0)
        {
            av_log(NULL, AV_LOG_ERROR, "[%s] encode failed -- line:%d\n", __FUNCTION__, __LINE__);
            break;
        }
    }
    /*冲刷编码器*/
    decodeAudioInterface(codecCtx, NULL, packet, aacfile);
_end:
    if (aacfile)
    {
        fclose(aacfile);
    }
    if (pcmfile)
    {
        fclose(pcmfile);
    }
    if (pcmBuf)
    {
        free(pcmBuf);
    }
    if (pcmBufTemp)
    {
        free(pcmBufTemp);
    }
    if (packet)
    {
        av_packet_free(&packet);
    }
    if (frame)
    {
        av_frame_free(&frame);
    }
    if (codecCtx)
    {
        avcodec_free_context(&codecCtx);
    }
    return ret;
}

对于 flush encoder 的操作:

编码器通常的冲洗方法:调用一次 avcodec_send_frame(NULL)(返回成功)，

然后不停调用 avcodec_receive_packet() 直到其返回 AVERROR_EOF，取出所有缓存帧,

avcodec_receive_packet() 返回 AVERROR EOF 这一次是没有有效数据的，仅仅获取到一个结束标志