WebRTC视频 05 - 视频采集类 VideoCaptureDS 下篇

WebRTC视频 01 - 视频采集整体架构
 WebRTC视频 02 - 视频采集类 VideoCaptureModule
WebRTC视频 03 - 视频采集类 VideoCaptureDS 上篇
 WebRTC视频 04 - 视频采集类 VideoCaptureDS 中篇

WebRTC视频 05 - 视频采集类 VideoCaptureDS 下篇（本文）

一、前言：

前面介绍了视频数据采集，那么采集到的数据如何上传给上层应用的？本节看看。

二、流程图：

CaptureFilter从Camera采集到数据之后交给SinkFilter；
通过SinkFilter获取到数据之后，会调用VideoCaptureImpl的IncomingFrame；
接着调用DeliverCapturedFrame，里面会调用RegisterCaptureDataCallback 的onFrame，将数据传给onFrame函数;
接下来再交给VcmCapturer::OnFrame，里面会交给VideoBroadcaster；
VideoBroadcaster::OnFrame会将采集到数据进行分两路，本地渲染和编码器（注意看数据有没有copy）；
还记得本地渲染器和编码器什么时候加入到VideoBroadcaster当中的吗？本地渲染器是StartLocalRenderer时候加入的，编码器是媒体协商最后一步SetRemoteDescription的时候加入的；

三、代码走读：

首先看看SinkFilter接收数据的地方：

cpp 复制代码

/**
 * 接收采集到的视频数据时候，首先会进入到这儿
 * @param media_sample：就是采集到的数据
 */
STDMETHODIMP CaptureInputPin::Receive(IMediaSample* media_sample) {
  RTC_DCHECK_RUN_ON(&capture_checker_);
  // 通过Filter()获取到这个pin所属的filter，也就是sinkFilter
  CaptureSinkFilter* const filter = static_cast<CaptureSinkFilter*>(Filter());

  if (flushing_.load(std::memory_order_relaxed))
    return S_FALSE;

  if (runtime_error_.load(std::memory_order_relaxed))
    return VFW_E_RUNTIME_ERROR;
  // 确保采集线程id已经获得，并给线程起个名字webrtc_video_capture
  if (!capture_thread_id_) {
    // Make sure we set the thread name only once.
    capture_thread_id_ = GetCurrentThreadId();
    rtc::SetCurrentThreadName("webrtc_video_capture");
  }
  // 获取采样属性，看其中是否包含AM_SAMPLE_TYPECHANGED，如果包含了，会做一些处理
  AM_SAMPLE2_PROPERTIES sample_props = {};
  GetSampleProperties(media_sample, &sample_props);
  // Has the format changed in this sample?
  if (sample_props.dwSampleFlags & AM_SAMPLE_TYPECHANGED) {
    // Check the derived class accepts the new format.
    // This shouldn't fail as the source must call QueryAccept first.

    // Note: This will modify resulting_capability_.
    // That should be OK as long as resulting_capability_ is only modified
    // on this thread while it is running (filter is not stopped), and only
    // modified on the main thread when the filter is stopped (i.e. this thread
    // is not running).
    if (!TranslateMediaTypeToVideoCaptureCapability(sample_props.pMediaType,
                                                    &resulting_capability_)) {
      // Raise a runtime error if we fail the media type
      runtime_error_ = true;
      EndOfStream();
      Filter()->NotifyEvent(EC_ERRORABORT, VFW_E_TYPE_NOT_ACCEPTED, 0);
      return VFW_E_INVALIDMEDIATYPE;
    }
  }
  // 收到数据之后调用这个方法将数据从pin传给filter
  filter->ProcessCapturedFrame(sample_props.pbBuffer, sample_props.lActual,
                               resulting_capability_);

  return S_OK;
}

其实，关键的代码就是最后一步，ProcessCapturedFrame将数据从pin传给filter。

cpp 复制代码

/**
 * 收到数据之后调用这个方法将数据从pin传给filter
 */
void CaptureSinkFilter::ProcessCapturedFrame(
    unsigned char* buffer,
    size_t length,
    const VideoCaptureCapability& frame_info) {
  // Called on the capture thread.
  // CaptureSinkFilter的observer就是VideoCaptureImpl
  capture_observer_->IncomingFrame(buffer, length, frame_info);
}

由于使用的是观察者模式，这个capture_observer其实就是VideoCaptureImpl，于是我们去VideoCaptureImpl看看。

cpp 复制代码

/**
 * 通过 SinkFilter 获取到数据之后，会调用此函数，
 * 这个函数会将采集到的数据统一转换为I420格式的数据（因为用户request的格式是I420）
 */
int32_t VideoCaptureImpl::IncomingFrame(uint8_t* videoFrame,
                                        size_t videoFrameLength,
                                        const VideoCaptureCapability& frameInfo,
                                        int64_t captureTime /*=0*/) {
  MutexLock lock(&api_lock_);

  const int32_t width = frameInfo.width;
  const int32_t height = frameInfo.height;

  TRACE_EVENT1("webrtc", "VC::IncomingFrame", "capture_time", captureTime);

  // Not encoded, convert to I420.
  // 判断采集的数据类型是不是kMJPEG（这个是已经编码的，比如外接了家里那种DV）
  // 如果不是kMJPEG，那么使用CalcBufferSize计算的缓冲区长度和视频帧长度不相等，则不能处理
  if (frameInfo.videoType != VideoType::kMJPEG &&
      CalcBufferSize(frameInfo.videoType, width, abs(height)) !=
          videoFrameLength) {
    RTC_LOG(LS_ERROR) << "Wrong incoming frame length.";
    return -1;
  }
  // 由于我们最终采集的数据肯定是YUV，下面计算一些YUV相关的参数
  int stride_y = width;
  int stride_uv = (width + 1) / 2;
  int target_width = width;
  int target_height = abs(height);

  // SetApplyRotation doesn't take any lock. Make a local copy here.
  // 采集到数据帧是否进行了旋转
  bool apply_rotation = apply_rotation_;
  // 如果进行了旋转，那么，还要旋转回来
  if (apply_rotation) {
    // Rotating resolution when for 90/270 degree rotations.
    if (_rotateFrame == kVideoRotation_90 ||
        _rotateFrame == kVideoRotation_270) {
      target_width = abs(height);
      target_height = width;
    }
  }

  // Setting absolute height (in case it was negative).
  // In Windows, the image starts bottom left, instead of top left.
  // Setting a negative source height, inverts the image (within LibYuv).

  // TODO(nisse): Use a pool?
  // 由于我们采集的数据不是I420，因此我们分配个I420的buffer，将数据转换为I420
  rtc::scoped_refptr<I420Buffer> buffer = I420Buffer::Create(
      target_width, target_height, stride_y, stride_uv, stride_uv);

  libyuv::RotationMode rotation_mode = libyuv::kRotate0;
  if (apply_rotation) {
    switch (_rotateFrame) {
      case kVideoRotation_0:
        rotation_mode = libyuv::kRotate0;
        break;
      case kVideoRotation_90:
        rotation_mode = libyuv::kRotate90;
        break;
      case kVideoRotation_180:
        rotation_mode = libyuv::kRotate180;
        break;
      case kVideoRotation_270:
        rotation_mode = libyuv::kRotate270;
        break;
    }
  }

  // 通过libyuv的方法将数据转换成I420
  const int conversionResult = libyuv::ConvertToI420(
      videoFrame, videoFrameLength, buffer.get()->MutableDataY(),
      buffer.get()->StrideY(), buffer.get()->MutableDataU(),
      buffer.get()->StrideU(), buffer.get()->MutableDataV(),
      buffer.get()->StrideV(), 0, 0,  // No Cropping
      width, height, target_width, target_height, rotation_mode,
      ConvertVideoType(frameInfo.videoType));
  if (conversionResult < 0) {
    RTC_LOG(LS_ERROR) << "Failed to convert capture frame from type "
                      << static_cast<int>(frameInfo.videoType) << "to I420.";
    return -1;
  }
  // 将转换后的数据重新封装成一个 VideoFrame 格式
  VideoFrame captureFrame =
      VideoFrame::Builder()
          .set_video_frame_buffer(buffer)
          .set_timestamp_rtp(0)
          .set_timestamp_ms(rtc::TimeMillis())
          .set_rotation(!apply_rotation ? _rotateFrame : kVideoRotation_0)
          .build();
  captureFrame.set_ntp_time_ms(captureTime);
  // 里面会调用 RegisterCaptureDataCallback 的onFrame，将数据传给onFrame函数
  DeliverCapturedFrame(captureFrame);

  return 0;
}

主要完成了：

将我们采集的YUV数据转换成I420格式。
将转换后的数据重新封装成一个 VideoFrame 格式。
调用DeliverCapturedFrame将数据继续向上抛。

看看抛给谁了？

cpp 复制代码

/**
 * 里面会调用 RegisterCaptureDataCallback 的onFrame，将数据传给onFrame函数
 */
int32_t VideoCaptureImpl::DeliverCapturedFrame(VideoFrame& captureFrame) {
  UpdateFrameCount();  // frame count used for local frame rate callback.

  if (_dataCallBack) {
    _dataCallBack->OnFrame(captureFrame);
  }

  return 0;
}

小插曲开始：

那么问题来了，这个_dataCallBack是谁？看上面流程图也可以知道，或者看下面代码也可以知道是VcmCapturer：

cpp 复制代码

bool VcmCapturer::Init(size_t width,
                       size_t height,
                       size_t target_fps,
                       size_t capture_device_index) {
  // 创建 VideoCapture 对象
  vcm_ = webrtc::VideoCaptureFactory::Create(unique_name);
  if (!vcm_) {
    return false;
  }
  // 这儿传入了VcmCapture自己
  vcm_->RegisterCaptureDataCallback(this);
}
// 注册监听者
void VideoCaptureImpl::RegisterCaptureDataCallback(
    rtc::VideoSinkInterface<VideoFrame>* dataCallBack) {
  MutexLock lock(&api_lock_);
  _dataCallBack = dataCallBack;
}

小插曲完毕！

于是，我们来到VcmCapturer的OnFrame函数看看：

cpp 复制代码

// 接收采集到视频数据（格式已经转换成用户请求的了）
void VcmCapturer::OnFrame(const VideoFrame& frame) {
  TestVideoCapturer::OnFrame(frame);
}

继续向上层看：

cpp 复制代码

/**
 * 从 VcmCapturer::OnFrame 抛上来的
 */
void TestVideoCapturer::OnFrame(const VideoFrame& original_frame) {
  int cropped_width = 0;
  int cropped_height = 0;
  int out_width = 0;
  int out_height = 0;
  // 对原始视频帧进行处理（比如你加一些特效）
  VideoFrame frame = MaybePreprocess(original_frame);
  // 对初步处理后的视频帧分辨率进行调整（如果不是16:9或者4:3要调整成这样）
  if (!video_adapter_.AdaptFrameResolution(
          frame.width(), frame.height(), frame.timestamp_us() * 1000,
          &cropped_width, &cropped_height, &out_width, &out_height)) {
    // Drop frame in order to respect frame rate constraint.
    return;
  }

  if (out_height != frame.height() || out_width != frame.width()) {
    // Video adapter has requested a down-scale. Allocate a new buffer and
    // return scaled version.
    // For simplicity, only scale here without cropping.
    // 如果视频宽高和请求的不一致，进行缩放（不裁剪）
    rtc::scoped_refptr<I420Buffer> scaled_buffer =
        I420Buffer::Create(out_width, out_height);
    scaled_buffer->ScaleFrom(*frame.video_frame_buffer()->ToI420());
    VideoFrame::Builder new_frame_builder =
        VideoFrame::Builder()
            .set_video_frame_buffer(scaled_buffer)
            .set_rotation(kVideoRotation_0)
            .set_timestamp_us(frame.timestamp_us())
            .set_id(frame.id());
    if (frame.has_update_rect()) {
      VideoFrame::UpdateRect new_rect = frame.update_rect().ScaleWithFrame(
          frame.width(), frame.height(), 0, 0, frame.width(), frame.height(),
          out_width, out_height);
      new_frame_builder.set_update_rect(new_rect);
    }
    // 缩放后的视频帧交给VideoBroadCaster
    broadcaster_.OnFrame(new_frame_builder.build());

  } else {
    // 如果不需要缩放，那么直接交给 VideoBroadcaster 进行分发
    // No adaptations needed, just return the frame as is.
    broadcaster_.OnFrame(frame);
  }
}

里面做了几件事：

可以对裸数据做一些处理，比如加特效什么的；
对分辨率规范化；
宽高和请求的不一致，进行一些缩放；
最终都是调用了视频分发器VideoBroadcaster的OnFrame方法，进入视频分发器就可以分为本地渲染器，或者编码发送出去，前面已经讲过不再赘述；

四、总结：

其实逻辑还是非常明确的，也是常见的设计模式（观察者模式）。将底层数据一层层抛给应用层，注意要关注，处理媒体数据，由于量非常大，尤其高清视频，我们所说的抛数据不是你一层层拷贝，留意每个环节怎么做的，哪些环节拷贝了数据，因为什么迫不得已拷贝的，这对诸位写出软件实时性有很大影响。

各位看官，觉得写得还行，动动小手点个赞，真的通宵达旦在写文章！！！