c++
复制代码
enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, // "u8", uint8_t, packed
AV_SAMPLE_FMT_S16, // "s16", int16_t, packed
AV_SAMPLE_FMT_S32, // "s32", int32_t, packed
AV_SAMPLE_FMT_FLT, // "flt", float, packed
AV_SAMPLE_FMT_DBL, // "dbl", double, packed
AV_SAMPLE_FMT_U8P, // "u8p", uint8_t, planar
AV_SAMPLE_FMT_S16P, // "s16p", int16_t, planar
AV_SAMPLE_FMT_S32P, // "s32p", int32_t, planar
AV_SAMPLE_FMT_FLTP, // "fltp", float, planar
AV_SAMPLE_FMT_DBLP, // "dblp", double, planar
AV_SAMPLE_FMT_S64, // "s64", int64_t, packed
AV_SAMPLE_FMT_S64P, // "s64p", int64_t, planar
AV_SAMPLE_FMT_NB // Number of sample formats
};
c++
复制代码
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
}
#include <glog/logging.h>
#include <fstream>
#include <string_view> // std=c++17
static constexpr std::size_t kInputAudioBufferSize = 20480;
static constexpr int kInputAudioBufferRefillThreshold = 4096;
thread_local static char error_buffer[AV_ERROR_MAX_STRING_SIZE] = {}; // store FFmpeg error string
/**
* @brief Convert FFmpeg error code to error string
* @param error_code FFmpeg error code
* @return error string
*/
static char *ErrorToString(const int error_code) {
std::memset(error_buffer, 0, AV_ERROR_MAX_STRING_SIZE);
return av_make_error_string(error_buffer, AV_ERROR_MAX_STRING_SIZE, error_code);
}
/**
* @brief Get file extension from file name
* @param file_name file name
* @return file extension
*/
static std::string GetFileExtension(std::string_view file_name) {
size_t pos = file_name.rfind('.');
if (pos == std::string::npos) {
return "";
}
std::string extension(file_name.substr(pos + 1));
for (char &c: extension) {
c = static_cast<char>(std::tolower(c));
}
return extension;
}
/**
* @brief Decode an aac frame, change pcm to packed format, write to ofstream
* @param codec_ctx codec context
* @param pkt an aac frame
* @param ofs output file stream
*/
static bool InnerDecodeAudio(AVCodecContext *codec_ctx, AVPacket *pkt, std::ofstream &ofs) {
if (!codec_ctx || !pkt) {
return false;
}
int error_code{};
bool logged = false;
// send packet to decoder
if ((error_code = avcodec_send_packet(codec_ctx, pkt)) < 0) {
if (error_code != AVERROR(EAGAIN) && error_code != AVERROR_EOF) {
LOG(ERROR) << "Failed to send packet to decoder: " << ErrorToString(error_code);
return false;
}
}
// allocate AVFrame
AVFrame *frame = av_frame_alloc();
if (frame == nullptr) {
LOG(ERROR) << "Failed to allocate AVFrame: av_frame_alloc()";
return false;
}
// receive pcm data from decoder, until EOF
// do not need to manage pcm data memory
while ((error_code = avcodec_receive_frame(codec_ctx, frame)) == 0) {
int is_planar = av_sample_fmt_is_planar(codec_ctx->sample_fmt);
// log 1 time per frame
if (!logged) {
LOG(INFO) << "Decode a " << pkt->size << " bytes AAC frame"
<< ", sample_rate=" << codec_ctx->sample_rate
<< ", channels=" << codec_ctx->ch_layout.nb_channels
<< ", sample_format=" << av_get_sample_fmt_name(codec_ctx->sample_fmt)
<< ", is_planar=" << is_planar;
logged = true;
}
if (!ofs) {
continue;
}
int bytes_per_sample = av_get_bytes_per_sample(codec_ctx->sample_fmt);
if (bytes_per_sample <= 0) {
LOG(ERROR) << "Failed to get bytes per sample: " << codec_ctx->sample_fmt;
continue;
}
// write to output file
// if planar format: LL...LLRR...RR, L in data[0], R in data[1]
// if packed format: LRLR...LRLR, LR in data[0]
// output format only support packed
if (is_planar) {
for (int i = 0; i < frame->nb_samples; ++i) {
for (int j = 0; j < codec_ctx->ch_layout.nb_channels; ++j) {
if (!ofs.write(reinterpret_cast<char *>(frame->data[j] + i * bytes_per_sample), bytes_per_sample)) {
LOG(ERROR) << "Failed to write pcm file, ofstream is broken";
continue;
}
}
}
} else {
if (!ofs.write(reinterpret_cast<char *>(frame->data[0]),
frame->nb_samples * bytes_per_sample * codec_ctx->ch_layout.nb_channels)) {
LOG(ERROR) << "Failed to write pcm file, ofstream is broken";
continue;
}
}
}
av_frame_free(&frame);
if (error_code != AVERROR(EAGAIN) && error_code != AVERROR_EOF) {
LOG(ERROR) << "Failed to receive frame from decoder: " << ErrorToString(error_code);
return false;
}
if (!ofs) {
return false;
}
return true;
}
/**
* @brief Decode audio file
* @param input_file input file, must be aac
* @param output_file output file name, packed pcm format
*/
void DecodeAudio(std::string_view input_file, std::string_view output_file) {
int error_code{};
// check file extension
AVCodecID codec_id{};
std::string file_extension = GetFileExtension(input_file);
if (file_extension == "aac") {
codec_id = AV_CODEC_ID_AAC;
LOG(INFO) << "Decode AAC audio start";
} else {
LOG(ERROR) << "Unsupported audio format: " << file_extension << ", only AAC is supported";
return;
}
// find AVCodec
const AVCodec *codec = avcodec_find_decoder(codec_id);
if (codec == nullptr) {
LOG(ERROR) << "AVCodec not found: " << codec_id;
return;
}
// open input_file
std::ifstream ifs(input_file.data(), std::ios::in | std::ios::binary);
if (!ifs.is_open()) {
LOG(ERROR) << "Failed to open input file: " << input_file;
return;
}
// open output_file
std::ofstream ofs(output_file.data(), std::ios::out | std::ios::binary);
if (!ofs.is_open()) {
LOG(ERROR) << "Failed to open output file: " << output_file;
return;
}
// initialize AVCodecParserContext
AVCodecParserContext *parser_ctx = av_parser_init(codec->id);
if (parser_ctx == nullptr) {
LOG(ERROR) << "Failed to init AVCodecParserContext: " << codec->id;
return;
}
// allocate AVCodecContext
AVCodecContext *codec_ctx = avcodec_alloc_context3(codec);
if (codec_ctx == nullptr) {
LOG(ERROR) << "Failed to allocate AVCodecContext: " << codec->id;
av_parser_close(parser_ctx);
return;
}
// initialize AVCodecContext
if ((error_code = avcodec_open2(codec_ctx, codec, nullptr)) < 0) {
LOG(ERROR) << "Failed to init AVCodecContext: " << ErrorToString(error_code);
avcodec_free_context(&codec_ctx);
av_parser_close(parser_ctx);
return;
}
// allocate AVPacket
AVPacket *pkt = av_packet_alloc();
if (pkt == nullptr) {
LOG(ERROR) << "Failed to allocate AVPacket: av_packet_alloc()";
avcodec_free_context(&codec_ctx);
av_parser_close(parser_ctx);
return;
}
// allocate input buffer
const std::size_t input_buffer_size = kInputAudioBufferSize + AV_INPUT_BUFFER_PADDING_SIZE;
auto input_buffer = std::make_unique<uint8_t[]>(input_buffer_size); // std=c++17
std::memset(input_buffer.get(), 0, input_buffer_size);
uint8_t *data = input_buffer.get();
size_t data_size{};
while (true) {
// refill input buffer
if (data_size < kInputAudioBufferRefillThreshold && !ifs.eof()) {
if (data_size > 0) {
std::memcpy(input_buffer.get(), data, data_size);
}
data = input_buffer.get();
std::size_t bytes_to_read = kInputAudioBufferSize - data_size;
if (!ifs.read(reinterpret_cast<char *>(data) + data_size, static_cast<std::streamsize>(bytes_to_read))) {
if (!ifs.eof()) {
LOG(ERROR) << "Failed to read input file: " << input_file << ", ifstream is broken";
break;
}
LOG(INFO) << "End of ifstream: " << input_file;
}
data_size += ifs.gcount();
}
// parse an audio frame. if success, pkt->data == data && pkt->size == parsed
int parsed = av_parser_parse2(parser_ctx, codec_ctx,
&pkt->data, &pkt->size,
data, static_cast<int>(data_size),
AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (parsed < 0) {
LOG(ERROR) << "Failed to parse audio: " << ErrorToString(parsed);
break;
}
data += parsed;
data_size -= parsed;
// decode audio and write to output_file
if (pkt->size > 0) {
InnerDecodeAudio(codec_ctx, pkt, ofs);
}
// if decode end, drain the decoder
if (data_size == 0 && ifs.eof()) {
pkt->data = nullptr;
pkt->size = 0;
InnerDecodeAudio(codec_ctx, pkt, ofs);
break;
}
}
LOG(INFO) << "Decode AAC audio end";
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
av_parser_close(parser_ctx);
}
#if 0
int main(int argc, char *argv[]) {
google::InitGoogleLogging(argv[0]);
FLAGS_logtostderr = true;
FLAGS_minloglevel = google::GLOG_INFO;
DecodeAudio("audio.aac", "audio.pcm");
google::ShutdownGoogleLogging();
return 0;
}
#endif