ffmpeg ffplay 基于h264中SEI信息进行双摄画面拆分播放实践

1.背景

工作中用到IPCamera支持双摄（即一个IPCamera带两个摄像头），IPC端将两个摄像头的画面上下拼接成了一个画面发布dash到云端，并且携带SEI信息。SEI信息中带两个frame（x, y, width, height），app端（iOS、安卓）根据这个信息拆分画面通过opengl展示到两个view上，以便以不同的排列方式展示双摄画面。

2. 使用ffplay播放单个画面，请参考：

ffplay+SDL2+opengles在iOS中使用(参考ijkplayer)_ffplay swift ios-CSDN博客

3. ffp_handleSEI方法用于将AVPacket中的SEI信息读取到multi成员变量中。

cpp 复制代码

//根据视频获取的AVPacket获取SEI中的双摄画面信息，以便根据这些信息拆分显示双摄。
void FSPlay::ffp_handleSEI(AVPacket *pkt) {
    //如果不需要查询SEI信息（单摄不需要查询）或multi数据已经获取过了（仅获取一次即可），则直接返回。
    if (!needSearchSEI || multi.acquired) {
        return;
    }
    
    //将AVPacket的data、size数据传入sei_saas_data_without_nal_unit方法获取实际的SEI buf，内部通过查询前后256byte的数据查找对应的uuid，找到uuid后其后边就是SEI对应的数据。
    //NAL引导码 + NAL帧类型(SEI) + SEI帧类型(用户自定义类型) + 数据长度 + UUID + 净荷数据 + 0X80
    //引导码为0x00 0x00 0x00 0x01或者 0x00 0x00 0x01，NAL帧类型为6，SEI帧类型为5，数据长度为UUID长度+ 净荷数据长度 0x80为尾部固定。
    fsbase::ByteBuf buf = fsbase::sei_saas_data_without_nal_unit(pkt->data, pkt->size);
    if (buf.size() > 0) {
        fsbase::sei_frame_t f;
        
        //通过SEI的buf data创建sei_frame结构体。
        fsbase::sei_frame_make(buf.data(), 0, &f);
        
        //如果saas_data有效时执行if代码。
        if (f.saas_data != NULL && f.saas_len > 0) {
            
            //根据saas_data构建multi结构体。
            std::shared_ptr<fsbase::multi_rect_t> multi_cpp = fsbase::sei_query_multi_rect(f.saas_data, f.saas_len);
            
            //如果multi_cpp有效时执行if代码。
            if (multi_cpp != NULL) {
                
                //如果count数量为2的时候说明是对的，继续执行if
                if (multi_cpp->count + 1 >= 2) {
                    
                    //将acquired设置为1表示已经获取过multi数据了，后续可以直接使用，不需要重复获取了。
                    multi.acquired = 1;
                    
                    //从multi_cpp中读取rects[0]的x,y,width,height数据保存到成员变量multi中。
                    multi.primary = FSFrameRange();
                    multi.primary.x = multi_cpp->rects[0].x;
                    multi.primary.y = multi_cpp->rects[0].y;
                    multi.primary.width = multi_cpp->rects[0].w;
                    multi.primary.height = multi_cpp->rects[0].h;
                    
                    //从multi_cpp中读取rects[1]的x,y,width,height数据保存到成员变量multi中。
                    multi.secondary = FSFrameRange();
                    multi.secondary.x = multi_cpp->rects[1].x;
                    multi.secondary.y = multi_cpp->rects[1].y;
                    multi.secondary.width = multi_cpp->rects[1].w;
                    multi.secondary.height = multi_cpp->rects[1].h;
                }
            }
        }
    }
}

4. 在获取到视频的AVPacket时调用ffp_handleSEI方法读取SDI信息

cpp 复制代码

int FSPlay::read_thread(void *arg) {
    //...省略代码
    if (pkt->stream_index == is->video_stream && pkt_in_play_range
                   && !(is->video_st->disposition & AV_DISPOSITION_ATTACHED_PIC)) {
        packet_queue_put(&is->videoq, pkt);

        //调用ffp_handleSEI读取SEI信息到multi
        ffp_handleSEI(pkt);
    }
    //...省略代码
}

video_image_display方法中根据multi的primary和secondary将原rgb数据拆分成两个画面分别回调给opengl端显示。

cpp 复制代码

void FSPlay::video_image_display(VideoState *is)
{
    Frame *vp;

    vp = frame_queue_peek_last(&is->pictq);
    
    if (rgbFrame == NULL) {
        rgbFrame = av_frame_alloc();
    }
    av_image_alloc(rgbFrame->data, rgbFrame->linesize, vp->width, vp->height, AV_PIX_FMT_RGB24, 1);

    enum AVPixelFormat sw_pix_fmt = (enum AVPixelFormat)(vp->format);

    swsContext = sws_getContext(vp->width, vp->height, sw_pix_fmt, vp->width, vp->height, AV_PIX_FMT_RGB24, SWS_BILINEAR, NULL, NULL, NULL);

    SDL_LockMutex(is->pictq.mutex);
    sws_scale(swsContext, vp->frame->data, vp->frame->linesize, 0, vp->frame->height, rgbFrame->data, rgbFrame->linesize);
    SDL_UnlockMutex(is->pictq.mutex);
    
    //如果不需要查询SEI信息（单摄不需要查询，或者multi没有获得时走单摄的显示逻辑。
    if (!needSearchSEI || !multi.acquired) {
        VideoFrame *videoFrame = (VideoFrame *)malloc(sizeof(VideoFrame));
        videoFrame->width = vp->width;
        videoFrame->height = vp->height;
        videoFrame->planar = 1;
        videoFrame->pixels[0] = (uint8_t *)malloc(vp->width * vp->height * 3);
        
        videoFrame->format = AV_PIX_FMT_RGB24;
        copyFrameData(videoFrame, rgbFrame);
        if (renderCallback != NULL && openglesView != NULL) {
            renderCallback(openglesView, videoFrame);
        }
        free(videoFrame->pixels[0]);
        free(videoFrame);
    } else {
        //创建destination1用来放主摄的rgb数据。
        VideoFrame *destination1 = (VideoFrame *)malloc(sizeof(VideoFrame));
        destination1->width = multi.primary.width;
        destination1->height = multi.primary.height;
        destination1->planar = 1;
        
        //根据目标主摄尺寸分配内存buf。
        destination1->pixels[0] = (uint8_t *)malloc(destination1->width * destination1->height * 3);
        destination1->format = AV_PIX_FMT_RGB24;
        
        //根据multi.primary的x,y,width,height拷贝数据从rgbFrame到destination1->pixels[0]中。
        copyFrameData(destination1, rgbFrame, &(multi.primary));
        
        //通过renderCallback将主摄的显示view和画面数据回调给opengl端进行绘制。
        if (renderCallback != NULL && openglesView != NULL) {
            renderCallback(openglesView, destination1);
        }
        
        //释放资源。
        free(destination1->pixels[0]);
        free(destination1);
        
        //创建destination2用来放次摄的rgb数据。
        VideoFrame *destination2 = (VideoFrame *)malloc(sizeof(VideoFrame));
        destination2->width = multi.secondary.width;
        destination2->height = multi.secondary.height;
        destination2->planar = 1;
        
        //根据目标次摄尺寸分配内存buf。
        destination2->pixels[0] = (uint8_t *)malloc(destination2->width * destination2->height * 3);
        destination2->format = AV_PIX_FMT_RGB24;
        
        //根据multi.secondary的x,y,width,height拷贝数据从rgbFrame到destination2->pixels[0]中。
        copyFrameData(destination2, rgbFrame, &(multi.secondary));
        
        //通过renderSecondCallback将次摄的显示view和画面数据回调给opengl端进行绘制。
        if (renderSecondCallback != NULL && renderSecondView != NULL) {
            renderSecondCallback(renderSecondView, destination2);
        }
        
        //释放资源。
        free(destination2->pixels[0]);
        free(destination2);
    }
    
    av_freep(&rgbFrame->data[0]);
    sws_freeContext(swsContext);
    swsContext = NULL;
}

copyFrameData方法用于将原rgb数据以指定的range拷贝到目标Frame中。

cpp 复制代码

//将source中的数据根据range标识的x,y,width,height拷贝到destination中
void FSPlay::copyFrameData(VideoFrame *destination, AVFrame *source, FSFrameRange *range) {
    //获取原始数据指针
    uint8_t *src = source->data[0];
    
    //获取目标数据指针
    uint8_t *dst = destination->pixels[0];
    
    //获取linesize，src每次换行时通过linesize进行偏移。
    int linesize = source->linesize[0];
    
    //拿到目标的宽高，width为byte数，height为循环次数
    int width = destination->width * 3;
    int height = destination->height;
    
    //重置内存为0
    memset(dst, 0, width * height);
    
    //将src指针偏移到需要拷贝的首行
    src += linesize * range->y;
    
    //遍历height次。
    for (int i = 0; i < height; ++i) {
        //拷贝单行数据，从src偏移x * 3开始拷贝，共拷贝width长度。
        memcpy(dst, src + range->x * 3, width);
        
        //目标指针偏移一行
        dst += width;
        
        //src指针偏移一行
        src += linesize;
    }
}

sei_saas_data_without_nal_unit方法用于根据uuid去搜索SEI信息，搜索前256字节和后256字节。

cpp 复制代码

ByteBuf sei_saas_data_without_nal_unit(const uint8_t *buf, int size) {
    /* 先搜索前IV_SEI_PROBE_SIZE字节 */
    int index = 0;
    int end = MIN(IV_SEI_PROBE_SIZE, size);
    
__SEARCH__:
    while (index < end) {
        auto byteBuf = search_sei_data_by_uuid(buf + index, size - index);
        if (byteBuf.size() == 0) {
            index = end;
            break;
        }
        return byteBuf;
    }
    
    /* 若后面还有数据，再搜索后IV_SEI_PROBE_SIZE字节 */
    if (index < size) {
        index = MAX(index, size - IV_SEI_PROBE_SIZE);
        end   = size;
        goto __SEARCH__;
    }
    
    return ByteBuf();
}

/**
 跟据UUID搜索自定义SEIData数据位置
 
 - Parameters:
 - p: 搜索起始地址
 - size: 搜索区间长度
 - sei_buf: 查找到的SEIData数据位置, IV_SEI_UUID[]开头
 @return 返回SEIData数据长度
 */
static ByteBuf search_sei_data_by_uuid(const uint8_t *p, int size) {
    int i = 2;
    
    while (size - i > IV_SEI_UUID_LEN) {
        // 检查是哪个版本的SEI协议
        for (int v = 0; v < sizeof(SEI_UUIDs) / sizeof(SEI_UUIDs[0]); v++) {
            auto &uuid = SEI_UUIDs[v];
            // a.（低成本）初步匹配前4字节
            // b.（高成本）校验UUID是否全匹配T平台协议
            if (CHECK_FIRST_4BYTES_EQUAL(p+i, uuid) && memcmp(p + i + 4, &uuid[4], IV_SEI_UUID_LEN-4) == 0){
                int k = i - 1;
                // c. 获取净荷长度, 往后累加数值直到不是0xFF后为止，累加的数值作为数据长度
                int payloadLen = p[k];
                while (k > 0 && p[--k] == 0xFF) {
                    payloadLen += 0xFF;
                }
                
                ByteBuf res;
                if (v == 0) {
                    res = ByteBuf(p + i, p + i + payloadLen);
                } else {
                    res = remove_redundant_bytes(p + i, payloadLen);
                }
                
                // d. SEI类型值是用户自定义的固定为0x05
                // e. 校验SEI帧结尾是否为0x80
                if (p[k             ] == IV_SEI_USER_DATA &&
                    p[i + payloadLen] == IV_SEI_DATA_END) {
                    return res;
                }
            }
        }
        
        i++;
    }
    return ByteBuf();
}

通过renderCallback和renderSecondCallback回调的画面数据，用opengles进行渲染，显示到两个对应的view上。