FFmpeg 实现从设备端获取音视频流并通过RTMP推流

使用FFmpeg库（版本号为：4.4.2-0ubuntu0.22.04.1）实现从摄像头和麦克风获取音视频流并通过RTMP推流。
RTMP服务器使用的是SRS，我这边是跑在Ubuntu上的，最好是关闭掉系统防火墙，不然连接服务器好像会出问题，拉流端使用VLC。如果想要降低延时，请看我另外一篇博客，里面有说降低延时的方法。
代码如下：
cpp 复制代码
#include <libavdevice/avdevice.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavutil/imgutils.h>
#include <pthread.h>
#include <libavutil/audio_fifo.h>

typedef struct st_video
{
    enum AVPixelFormat camera_pix_fmt;
    AVStream *stream_out;
    AVFormatContext *context_in;
    AVFormatContext **context_out;
    int streamid;
    struct SwsContext *sws_ctx;
    AVCodecContext *codec_context;
    pthread_mutex_t *lock_write_frame;
} st_video;

typedef struct st_audio
{
    AVStream *stream_out;
    AVFormatContext *context_in;
    AVFormatContext **context_out;
    int streamid;
    struct SwrContext *swr_ctx;
    AVCodecContext *codec_context;
    pthread_mutex_t *lock_write_frame;
} st_audio;

int initVideo(st_video *s_video);
int initAudio(st_audio *s_audio);
void *thread_v(void *arg);
void *thread_a(void *arg);

int main(void)
{
    int ret = -1;
    const char *url = "rtmp://192.168.3.230/live/livestream"; // rtmp地址
    AVFormatContext *context_out = NULL;
    pthread_mutex_t lock_write_frame;
    st_video s_video;
    s_video.context_out = &context_out;
    s_video.lock_write_frame = &lock_write_frame;
    st_audio s_audio;
    s_audio.context_out = &context_out;
    s_audio.lock_write_frame = &lock_write_frame;

    // 打印ffmpeg版本信息
    printf("ffmpeg version: %s\n", av_version_info());

    // 注册所有设备
    avdevice_register_all();

    // 分配输出格式上下文
    avformat_alloc_output_context2(&context_out, NULL, "flv", NULL);
    if (!context_out)
    {
        printf("avformat_alloc_output_context2 failed\n");
        return -1;
    }

    // 初始化视频流
    initVideo(&s_video);
    // 初始化音频流
    initAudio(&s_audio);

    // 打开url
    if (!(context_out->oformat->flags & AVFMT_NOFILE))
    {
        ret = avio_open(&context_out->pb, url, AVIO_FLAG_WRITE);
        if (ret < 0)
        {
            printf("avio_open error (errmsg '%s')\n", av_err2str(ret));
            return -1;
        }
    }

    // 写入头部信息
    ret = avformat_write_header(context_out, NULL);
    if (ret < 0)
    {
        avio_close(context_out->pb);
        printf("avformat_write_header failed\n");
        return -1;
    }

    pthread_t thread1, thread2;
    pthread_mutex_init(&lock_write_frame, NULL);
    pthread_create(&thread1, NULL, thread_v, &s_video);
    pthread_create(&thread2, NULL, thread_a, &s_audio);

    pthread_join(thread1, NULL);
    pthread_join(thread2, NULL);
    pthread_mutex_destroy(&lock_write_frame);

    if (s_video.sws_ctx)
        sws_freeContext(s_video.sws_ctx);
    if (s_video.context_in)
        avformat_close_input(&s_video.context_in);
    if (s_video.codec_context)
        avcodec_free_context(&s_video.codec_context);

    if (s_audio.codec_context)
        avcodec_free_context(&s_audio.codec_context);
    if (s_audio.swr_ctx)
        swr_free(&s_audio.swr_ctx);
    if (s_audio.context_in)
        avformat_close_input(&s_audio.context_in);

    if (context_out && !(context_out->flags & AVFMT_NOFILE))
        avio_close(context_out->pb);
    if (context_out)
        avformat_free_context(context_out);

    return 0;
}

int initStream(AVFormatContext **context_in, enum AVMediaType type, int *streamid,
               const char *input_format_name, const char *device_name, AVDictionary **options,
               AVFormatContext *context_out, AVStream **stream_out)
{
    // 查找输入格式
    AVInputFormat *fmt = av_find_input_format(input_format_name);
    if (!fmt)
    {
        printf("av_find_input_format error\n");
        return -1;
    }

    // 打开输入
    if (avformat_open_input(context_in, device_name, fmt, options) != 0)
    {
        av_dict_free(options);
        printf("avformat_open_input error\n");
        return -1;
    }

    // 获取输入流信息
    if (avformat_find_stream_info(*context_in, NULL) < 0)
    {
        printf("avformat_find_stream_info error\n");
        return -1;
    }

    // 获取流索引
    *streamid = av_find_best_stream(*context_in, type, -1, -1, NULL, 0);
    if (*streamid < 0)
    {
        printf("cannot find video stream\n");
        return -1;
    }

    // 创建输出流
    *stream_out = avformat_new_stream(context_out, NULL);
    if (!(*stream_out))
    {
        avformat_free_context(context_out);
        printf("avformat_new_stream failed\n");
        return -1;
    }

    return 0;
}

int initSws(struct SwsContext **sws_ctx, AVStream *stream_in, AVStream *stream_out)
{
    // 初始化转换上下文
    *sws_ctx = sws_getContext(
        stream_in->codecpar->width, stream_in->codecpar->height, stream_in->codecpar->format,
        stream_out->codecpar->width, stream_out->codecpar->height, stream_out->codecpar->format,
        SWS_BILINEAR, NULL, NULL, NULL);
    if (!sws_ctx)
    {
        printf("sws_getContext error\n");
        return -1;
    }
    return 0;
}

int initSwr(struct SwrContext **swr_ctx, AVStream *stream_in, AVStream *stream_out)
{
    // 根据通道数获取默认的通道布局，codecpar->channel_layou没有被设置，不能直接使用
    int64_t chlayout_in = av_get_default_channel_layout(stream_in->codecpar->channels);
    int64_t chlayout_out = av_get_default_channel_layout(stream_out->codecpar->channels);
    // 初始化重采样上下文
    *swr_ctx = swr_alloc_set_opts(
        NULL,
        chlayout_in, stream_out->codecpar->format, stream_out->codecpar->sample_rate,
        chlayout_out, stream_in->codecpar->format, stream_in->codecpar->sample_rate,
        0, NULL);
    if (!(*swr_ctx) || swr_init(*swr_ctx) < 0)
    {
        printf("allocate resampler context failed\n");
        return -1;
    }
    return 0;
}

int setVcodec(AVCodecContext **codec_context, AVStream *stream_in,
              int frame_rate, AVFormatContext *context_out, AVStream *stream_out)
{
    AVCodec *c = NULL;

    // 查找编码器
    c = avcodec_find_encoder(AV_CODEC_ID_H264);
    if (!c)
    {
        printf("Codec not found\n");
        return -1;
    }
    printf("codec name: %s\n", c->name);

    // 分配编码器上下文
    *codec_context = avcodec_alloc_context3(c);
    if (!(*codec_context))
    {
        printf("avcodec_alloc_context3 failed\n");
        return -1;
    }

    AVCodecContext *ctx = *codec_context;
    // 设置编码器参数
    ctx->codec_id = AV_CODEC_ID_H264;
    ctx->codec_type = AVMEDIA_TYPE_VIDEO;
    ctx->pix_fmt = AV_PIX_FMT_YUV420P;
    ctx->width = stream_in->codecpar->width;
    ctx->height = stream_in->codecpar->height;
    ctx->time_base = (AVRational){1, frame_rate};         // 设置时间基
    ctx->framerate = (AVRational){frame_rate, 1};         // 设置帧率
    ctx->bit_rate = 750 * 1000;                           // 设置比特率
    ctx->gop_size = frame_rate;                           // 设置GOP大小
    ctx->max_b_frames = 0;                                // 设置最大B帧数，不需要B帧时设置为0
    av_opt_set(ctx->priv_data, "profile", "baseline", 0); // 设置h264画质级别
    av_opt_set(ctx->priv_data, "tune", "zerolatency", 0); // 设置h264编码优化参数
    // 检测输出上下文的封装格式，判断是否设置 AV_CODEC_FLAG_GLOBAL_HEADER
    // AV_CODEC_FLAG_GLOBAL_HEADER：由原来编码时在每个关键帧前加入pps和sps，改变为在extradate这个字节区加入pps和sps
    if (context_out->oformat->flags & AVFMT_GLOBALHEADER)
    {
        printf("set AV_CODEC_FLAG_GLOBAL_HEADER\n");
        ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }

    // 打开编码器
    if (avcodec_open2(ctx, c, NULL) < 0)
    {
        avcodec_free_context(codec_context);
        printf("avcodec_open2 failed\n");
        return -1;
    }

    // 将编码器参数复制到流
    int ret = avcodec_parameters_from_context(stream_out->codecpar, ctx);
    if (ret < 0)
    {
        avcodec_free_context(codec_context);
        printf("avcodec_parameters_from_context failed\n");
        return -1;
    }

    return 0;
}

int setAcodec(AVCodecContext **codec_context, AVStream *stream_in,
              AVFormatContext *context_out, AVStream *stream_out)
{
    AVCodec *c = NULL;

    // 查找编码器
    c = avcodec_find_encoder(AV_CODEC_ID_AAC);
    if (!c)
    {
        printf("Codec not found\n");
        return -1;
    }
    printf("codec name: %s\n", c->name);

    // 分配编码器上下文
    *codec_context = avcodec_alloc_context3(c);
    if (!c)
    {
        printf("avcodec_alloc_context3 failed\n");
        return -1;
    }

    AVCodecContext *ctx = *codec_context;
    // 设置编码器参数
    ctx->codec_id = AV_CODEC_ID_AAC;
    ctx->codec_type = AVMEDIA_TYPE_AUDIO;
    ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
    ctx->sample_rate = stream_in->codecpar->sample_rate;
    ctx->channels = stream_in->codecpar->channels;
    ctx->channel_layout = av_get_default_channel_layout(stream_in->codecpar->channels);
    ctx->bit_rate = 64000;
    ctx->profile = FF_PROFILE_AAC_LOW;
    if (context_out->oformat->flags & AVFMT_GLOBALHEADER)
    {
        printf("set AV_CODEC_FLAG_GLOBAL_HEADER\n");
        ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }

    // 打开编码器
    if (avcodec_open2(ctx, c, NULL) < 0)
    {
        avcodec_free_context(codec_context);
        printf("avcodec_open2 failed\n");
        return -1;
    }

    // 将编码器参数复制到流
    int ret = avcodec_parameters_from_context(stream_out->codecpar, ctx);
    if (ret < 0)
    {
        avcodec_free_context(codec_context);
        printf("avcodec_parameters_from_context failed\n");
        return -1;
    }

    return 0;
}

int initVideo(st_video *s_video)
{
    s_video->streamid = -1;
    const char *input_format_name = "video4linux2"; // 输入格式名称，Linux下为video4linux2或v4l2
    const char *device_name = "/dev/video0";        // 摄像头设备名称
    const char *camera_resolution = "640x480";      // 摄像头分辨率
    s_video->camera_pix_fmt = AV_PIX_FMT_YUYV422;   // 摄像头像素格式
    int frame_rate = 25;                            // 帧率
    int ret = -1;
    AVDictionary *options = NULL;

    av_dict_set(&options, "video_size", camera_resolution, 0); // 设置分辨率
    ret = initStream(&s_video->context_in, AVMEDIA_TYPE_VIDEO, &s_video->streamid,
                     input_format_name, device_name, &options,
                     *(s_video->context_out), &s_video->stream_out);
    if (ret < 0)
    {
        printf("initStream failed\n");
        return -1;
    }
    AVStream *stream_v = s_video->context_in->streams[s_video->streamid];
    printf("video stream, width: %d, height: %d, format: %s\n",
           stream_v->codecpar->width, stream_v->codecpar->height,
           av_get_pix_fmt_name((enum AVPixelFormat)stream_v->codecpar->format));

    ret = setVcodec(&s_video->codec_context, stream_v, frame_rate,
                    *(s_video->context_out), s_video->stream_out);
    if (ret < 0)
    {
        printf("setVcodec failed\n");
        return -1;
    }
    ret = initSws(&s_video->sws_ctx, stream_v, s_video->stream_out);
    if (ret < 0)
    {
        printf("initSws failed\n");
        return -1;
    }

    return 0;
}

int initAudio(st_audio *s_audio)
{
    const char *input_format_name = "alsa";
    const char *device_name = "hw:1,0";   // 麦克风设备名称
    const char *in_sample_rate = "16000"; // 采样率
    const char *in_channels = "1";        // 声道数
    int ret = -1;
    AVDictionary *options = NULL;

    // 设置麦克风音频参数
    av_dict_set(&options, "sample_rate", in_sample_rate, 0);
    av_dict_set(&options, "channels", in_channels, 0);
    ret = initStream(&s_audio->context_in, AVMEDIA_TYPE_AUDIO, &s_audio->streamid,
                     input_format_name, device_name, &options,
                     *(s_audio->context_out), &s_audio->stream_out);
    if (ret < 0)
    {
        printf("initStream failed\n");
        return -1;
    }
    AVStream *stream_a = s_audio->context_in->streams[s_audio->streamid];
    printf("audio stream, sample_rate: %d, channels: %d, format: %s\n",
           stream_a->codecpar->sample_rate, stream_a->codecpar->channels,
           av_get_sample_fmt_name((enum AVSampleFormat)stream_a->codecpar->format));

    ret = setAcodec(&s_audio->codec_context, stream_a, *(s_audio->context_out), s_audio->stream_out);
    if (ret < 0)
    {
        printf("setAcodec failed\n");
        return -1;
    }

    ret = initSwr(&s_audio->swr_ctx, stream_a, s_audio->stream_out);
    if (ret < 0)
    {
        printf("initSwr failed\n");
        return -1;
    }

    return 0;
}

void *thread_v(void *arg)
{
    int ret = -1;
    int64_t frame_index = 0;
    st_video *s_video = (st_video *)arg;
    AVStream *stream_v = s_video->context_in->streams[s_video->streamid];

    // 分配内存
    AVFrame *input_frame = av_frame_alloc();
    AVFrame *frame_yuv420p = av_frame_alloc();
    if (!input_frame || !frame_yuv420p)
    {
        printf("av_frame_alloc error\n");
        goto end;
    }
    AVPacket *packet = av_packet_alloc();
    if (!packet)
    {
        printf("av_packet_alloc failed\n");
        goto end;
    }

    // 设置帧格式
    input_frame->format = s_video->camera_pix_fmt;
    input_frame->width = stream_v->codecpar->width;
    input_frame->height = stream_v->codecpar->height;

    frame_yuv420p->format = AV_PIX_FMT_YUV420P;
    frame_yuv420p->width = stream_v->codecpar->width;
    frame_yuv420p->height = stream_v->codecpar->height;

    // 分配帧内存
    ret = av_frame_get_buffer(frame_yuv420p, 0);
    if (ret < 0)
    {
        printf("av_frame_get_buffer error\n");
        goto end;
    }

    // 读取帧并进行转换
    AVPacket pkt;
    while (av_read_frame(s_video->context_in, &pkt) >= 0)
    {
        if (pkt.stream_index == s_video->streamid)
        {
            // 把读取的帧数据(AVPacket)拷贝到输入帧(AVFrame)中
            ret = av_image_fill_arrays(input_frame->data, input_frame->linesize, pkt.data, s_video->camera_pix_fmt,
                                       stream_v->codecpar->width, stream_v->codecpar->height, 1);
            if (ret < 0)
            {
                av_packet_unref(&pkt);
                printf("av_image_fill_arrays error\n");
                break;
            }

            // 转换为 YUV420P
            sws_scale(s_video->sws_ctx, (const uint8_t *const *)input_frame->data, input_frame->linesize, 0,
                      input_frame->height, frame_yuv420p->data, frame_yuv420p->linesize);

            frame_yuv420p->pts = frame_index;
            frame_index++;
            // 发送帧到编码器
            ret = avcodec_send_frame(s_video->codec_context, frame_yuv420p);
            if (ret < 0)
            {
                printf("avcodec_send_frame error (errmsg '%s')\n", av_err2str(ret));
                break;
            }

            // 接收编码后的数据包
            while (ret >= 0)
            {
                ret = avcodec_receive_packet(s_video->codec_context, packet);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                {
                    break;
                }
                else if (ret < 0)
                {
                    printf("avcodec_receive_packet error (errmsg '%s')\n", av_err2str(ret));
                    goto end;
                }

                packet->stream_index = s_video->stream_out->index;
                // 将时间戳从编码器时间基转换到流时间基
                av_packet_rescale_ts(packet, s_video->codec_context->time_base, s_video->stream_out->time_base);
                packet->pos = -1;
                // 推送到RTMP服务器
                pthread_mutex_lock(s_video->lock_write_frame);
                ret = av_interleaved_write_frame(*(s_video->context_out), packet);
                pthread_mutex_unlock(s_video->lock_write_frame);
                if (ret < 0)
                {
                    printf("av_interleaved_write_frame error (errmsg '%d')\n", ret);
                    av_packet_unref(packet);
                    goto end;
                }

                av_packet_unref(packet);
            }
        }
        av_packet_unref(&pkt);
    }

end:
    // 释放资源
    if (input_frame)
        av_frame_free(&input_frame);
    if (frame_yuv420p)
        av_frame_free(&frame_yuv420p);
    if (packet)
        av_packet_free(&packet);

    return NULL;
}

void *thread_a(void *arg)
{
    st_audio *s_audio = (st_audio *)arg;
    int ret = -1;
    int fsize = 0;
    int64_t pts = 0;
    AVFrame *frame_out = NULL;
    AVAudioFifo *fifo = NULL;

    frame_out = av_frame_alloc();
    if (!frame_out)
    {
        printf("av_frame_alloc failed\n");
        goto end;
    }
    // 设置帧参数， av_frame_get_buffer 在分配缓冲区时会用到
    frame_out->format = s_audio->codec_context->sample_fmt;
    frame_out->nb_samples = s_audio->codec_context->frame_size;
    frame_out->channel_layout = s_audio->codec_context->channel_layout;
    // 分配帧缓冲区
    ret = av_frame_get_buffer(frame_out, 0);
    if (ret < 0)
    {
        printf("av_frame_get_buffer failed\n");
        goto end;
    }

    AVStream *stream_a = s_audio->context_in->streams[s_audio->streamid];
    // 计算编码每帧aac所需的pcm数据的大小 = 采样个数 * 采样格式大小 * 声道数
    fsize = s_audio->codec_context->frame_size *
            av_get_bytes_per_sample(stream_a->codecpar->format) *
            stream_a->codecpar->channels;
    printf("frame size: %d\n", fsize);

    fifo = av_audio_fifo_alloc((enum AVSampleFormat)stream_a->codecpar->format,
                               stream_a->codecpar->channels, s_audio->codec_context->frame_size * 5);
    if (!fifo)
    {
        printf("av_audio_fifo_alloc failed\n");
        goto end;
    }

    uint8_t *buf = av_malloc(fsize);
    if (!buf)
    {
        printf("av_malloc failed\n");
        goto end;
    }

    AVPacket *recv_ptk = av_packet_alloc();
    if (!recv_ptk)
    {
        printf("av_packet_alloc failed\n");
        goto end;
    }

    int sample_size = av_get_bytes_per_sample(stream_a->codecpar->format);
    // 读取帧
    AVPacket read_pkt;
    while (av_read_frame(s_audio->context_in, &read_pkt) >= 0)
    {
        if (read_pkt.stream_index == s_audio->streamid)
        {
            av_audio_fifo_write(fifo, (void **)&read_pkt.buf->data,
                                read_pkt.size / sample_size);
            if (av_audio_fifo_size(fifo) < s_audio->codec_context->frame_size)
            {
                // 不够一帧aac编码所需的数据
                continue;
            }
            av_audio_fifo_read(fifo, (void **)&buf, s_audio->codec_context->frame_size);
            // 重采样
            ret = swr_convert(s_audio->swr_ctx, frame_out->data, frame_out->nb_samples,
                              (const uint8_t **)&buf, frame_out->nb_samples);
            if (ret < 0)
            {
                printf("swr_convert failed\n");
                goto end;
            }

            frame_out->pts = pts;
            pts += frame_out->nb_samples;

            // 发送帧给编码器
            ret = avcodec_send_frame(s_audio->codec_context, frame_out);
            if (ret < 0)
            {
                printf("avcodec_send_frame failed\n");
                goto end;
            }

            // 接收编码后的数据包
            while (ret >= 0)
            {
                ret = avcodec_receive_packet(s_audio->codec_context, recv_ptk);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                {
                    break;
                }
                else if (ret < 0)
                {
                    printf("avcodec_receive_packet error (errmsg '%s')\n", av_err2str(ret));
                    goto end;
                }

                recv_ptk->stream_index = s_audio->stream_out->index;
                av_packet_rescale_ts(recv_ptk, s_audio->codec_context->time_base,
                                     s_audio->stream_out->time_base);
                pthread_mutex_lock(s_audio->lock_write_frame);
                ret = av_interleaved_write_frame(*s_audio->context_out, recv_ptk);
                pthread_mutex_unlock(s_audio->lock_write_frame);
                if (ret < 0)
                {
                    printf("av_interleaved_write_frame failed\n");
                    av_packet_unref(recv_ptk);
                    goto end;
                }
                av_packet_unref(recv_ptk);
            }
        }
        av_packet_unref(&read_pkt);
    }

end:
    if (frame_out)
        av_frame_free(&frame_out);
    if (recv_ptk)
        av_packet_free(&recv_ptk);
    if (fifo)
        av_audio_fifo_free(fifo);
    if (buf)
        av_free(buf);

    return NULL;
}
相关博客链接：FFmpeg 实现从摄像头获取流并通过RTMP推流
FFmpeg 实现从麦克风获取流并通过RTMP推流