本文深入分析 ZLMediaKit 中 RTP 包的封装与解封装、RTCP 反馈报文的构造、RTP 排序与抖动缓冲、Frame 抽象层等核心实现,追到网络收发的系统调用。
1. RTP 在 ZLMediaKit 中的位置
RTP 是流媒体数据传输的基础承载协议,在 ZLMediaKit 中:
-
RTSP 推拉流:音视频数据通过 RTP 包在 TCP/UDP 上传输
-
WebRTC:SRTP 加密封装的 RTP 包通过 UDP 传输
-
GB28181:设备推流使用 RTP/PS 封装
┌──────────┐ │ RTSP │ ← RTP over TCP/UDP │ WebRTC │ ← SRTP over UDP │ GB28181 │ ← RTP/PS over UDP └────┬─────┘ │ ┌───────┴──────────┐ │ RtpPacket │ ← RTP 封装/解封装 │ RtpReceiver │ ← RTP 排序/抖动缓冲 │ Frame / Track │ ← 帧抽象 / 媒体轨道 └──────────────────┘
2. RtpPacket --- RTP 包封装
2.1 数据结构
文件:src/Rtp/RtpPacket.h
cpp
class RtpPacket : public Buffer {
public:
using Ptr = std::shared_ptr<RtpPacket>;
// RTP 头部字段(直接映射 RFC 3550)
struct Header {
#if __BYTE_ORDER == __BIG_ENDIAN
uint8_t version : 2; // 版本,固定 2
uint8_t padding : 1; // 填充标志
uint8_t extension : 1; // 扩展标志
uint8_t csrc : 4; // CSRC 计数
uint8_t marker : 1; // 标志位(视频帧结尾)
uint8_t pt : 7; // 载荷类型
#else
uint8_t csrc : 4;
uint8_t extension : 1;
uint8_t padding : 1;
uint8_t version : 2;
uint8_t pt : 7;
uint8_t marker : 1;
#endif
uint16_t seq; // 序列号
uint32_t stamp; // 时间戳
uint32_t ssrc; // 同步源标识
};
Header *getHeader() const;
uint8_t *getPayload() const;
size_t getPayloadSize() const;
// ZLMediaKit 扩展字段
uint64_t _ntp_stamp = 0; // NTP 时间戳(用于音视频同步)
uint64_t _time_stamp = 0; // 毫秒级 PTS
TrackType _type = TrackInvalid; // 媒体轨道类型
};
2.2 RTP 包构造
cpp
RtpPacket::Ptr RtpPacket::create() {
return std::make_shared<RtpPacket>();
}
// 设置 RTP 头部
void RtpPacket::setHeader(uint16_t seq, uint32_t stamp,
uint32_t ssrc, bool marker,
uint8_t pt) {
auto header = getHeader();
header->version = 2;
header->padding = 0;
header->extension = 0;
header->csrc = 0;
header->marker = marker ? 1 : 0;
header->pt = pt;
header->seq = htons(seq);
header->stamp = htonl(stamp);
header->ssrc = htonl(ssrc);
}
2.3 RTP 打包发送 --- H264
文件:src/Extension/H264Rtp.cpp
ZLMediaKit 将 H264 NALU 按 RTP 协议打包,遵循 RFC 6184:
cpp
void H264RtpEncoder::makeRtp(const uint8_t *data, size_t len,
uint32_t stamp, bool marker) {
const size_t kMaxRtpPayload = 1400; // 最大 RTP 载荷(MTU 1500 - 头部开销)
if (len <= kMaxRtpPayload) {
// 单 NALU 包模式(Single NAL Unit Packet)
auto rtp = RtpPacket::create();
rtp->setHeader(_seq++, stamp, _ssrc, marker, _pt);
rtp->setPayload(data, len);
_on_rtp(rtp);
} else {
// FU-A 分片模式(Fragmentation Unit)
uint8_t fu_indicator = (data[0] & 0xE0) | 28; // NAL type = 28 (FU-A)
uint8_t fu_header_start = 0x80 | (data[0] & 0x1F); // S=1, E=0
uint8_t fu_header_mid = 0x00 | (data[0] & 0x1F); // S=0, E=0
uint8_t fu_header_end = 0x40 | (data[0] & 0x1F); // S=0, E=1
data += 1; // 跳过原始 NALU 头
len -= 1;
// 第一个分片
auto rtp = RtpPacket::create();
rtp->setHeader(_seq++, stamp, _ssrc, false, _pt);
rtp->setPayload({fu_indicator, fu_header_start}, data, kMaxRtpPayload - 2);
_on_rtp(rtp);
data += kMaxRtpPayload - 2;
len -= kMaxRtpPayload - 2;
// 中间分片
while (len > kMaxRtpPayload - 2) {
rtp = RtpPacket::create();
rtp->setHeader(_seq++, stamp, _ssrc, false, _pt);
rtp->setPayload({fu_indicator, fu_header_mid}, data, kMaxRtpPayload - 2);
_on_rtp(rtp);
data += kMaxRtpPayload - 2;
len -= kMaxRtpPayload - 2;
}
// 最后一个分片
rtp = RtpPacket::create();
rtp->setHeader(_seq++, stamp, _ssrc, true, _pt); // marker=true
rtp->setPayload({fu_indicator, fu_header_end}, data, len);
_on_rtp(rtp);
}
}
RFC 6184 FU-A 包格式:
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| FU indicator | FU header | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| |
| FU payload |
| |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
2.4 RTP 解包 --- H264
cpp
void H264RtpDecoder::onRtp(const RtpPacket::Ptr &rtp) {
auto payload = rtp->getPayload();
auto len = rtp->getPayloadSize();
uint8_t nal_type = payload[0] & 0x1F;
switch (nal_type) {
case 1 ... 23:
// 单 NALU 包
_frame = std::make_shared<H264Frame>();
_frame->_buffer.assign(payload, payload + len);
_frame->_prefix_size = 4; // 00 00 00 01
break;
case 28: // FU-A
handleFU_A(payload, len, rtp->getHeader()->marker);
break;
case 24: // STAP-A
handleSTAP_A(payload, len);
break;
}
}
void H264RtpDecoder::handleFU_A(const uint8_t *data, size_t len,
bool marker) {
uint8_t fu_indicator = data[0];
uint8_t fu_header = data[1];
uint8_t start = fu_header & 0x80; // S bit
uint8_t end = fu_header & 0x40; // E bit
if (start) {
// FU-A 起始包,构造原始 NALU 头
uint8_t nalu_header = (fu_indicator & 0xE0) | (fu_header & 0x1F);
_frame = std::make_shared<H264Frame>();
_frame->_buffer = {0x00, 0x00, 0x00, 0x01, nalu_header};
_frame->_prefix_size = 4;
}
// 追加载荷数据
_frame->_buffer.append(data + 2, len - 2);
if (end) {
// FU-A 结束包,输出完整帧
_frame->_pts = _last_rtp_stamp;
onFrame(_frame);
}
}
3. RtpReceiver --- 排序与抖动缓冲
3.1 核心问题
网络传输中 RTP 包可能:
- 乱序:后发的包先到(seq 非递增)
- 丢包:某个 seq 缺失
- 重复:同一 seq 收到两次
3.2 RtpReceiver 实现
文件:src/Rtp/RtpReceiver.h
cpp
template<typename T>
class RtpReceiver {
public:
void onRtp(const RtpPacket::Ptr &rtp) {
auto seq = rtp->getHeader()->seq;
// 1. 检查是否是期望的下一个 seq
if (seq == (uint16_t)(_next_seq)) {
// 顺序到达
outputFrame(rtp);
_next_seq = seq + 1;
// 检查缓存的后续包
while (_rtp_cache.count(_next_seq)) {
outputFrame(_rtp_cache[_next_seq]);
_rtp_cache.erase(_next_seq);
_next_seq++;
}
} else if (seqAfter(seq, _next_seq)) {
// 乱序到达,缓存起来
_rtp_cache[seq] = rtp;
// 缓存超限,丢弃最旧的
if (_rtp_cache.size() > kMaxCacheSize) {
auto it = _rtp_cache.begin();
_next_seq = it->first + 1;
outputFrame(it->second);
_rtp_cache.erase(it);
}
}
// 重复包,丢弃
}
private:
uint16_t _next_seq = 0;
std::map<uint16_t, RtpPacket::Ptr> _rtp_cache;
static bool seqAfter(uint16_t seq, uint16_t expected) {
// 处理 seq 环回(65535 → 0)
return (seq > expected && seq - expected < 32768) ||
(seq < expected && expected - seq > 32768);
}
};
4. Frame 抽象层
4.1 Frame 类层次
Frame (基类)
├─ FrameFromPtr --- 持有原始指针的帧
│ ├─ H264Frame --- H264 帧
│ ├─ H265Frame --- H265 帧
│ └─ AACFrame --- AAC 帧
└─ FrameMerger --- 多帧合并器
cpp
class Frame : public Buffer {
public:
// 帧类型
virtual TrackType getTrackType() const = 0;
// 编码类型
virtual CodecId getCodecId() const = 0;
// 是否关键帧
virtual bool keyFrame() const = 0;
// 是否可配置
virtual bool configFrame() const = 0;
// 前缀长度(如 00 00 00 01)
virtual size_t prefixSize() const = 0;
// PTS
uint64_t _pts = 0;
// DTS
uint64_t _dts = 0;
};
4.2 H264Frame
cpp
class H264Frame : public FrameFromPtr {
TrackType getTrackType() const override { return TrackVideo; }
CodecId getCodecId() const override { return CodecH264; }
bool keyFrame() const override {
// IDR 帧:NAL type = 5
auto nal_type = data()[_prefix_size] & 0x1F;
return nal_type == 5;
}
bool configFrame() const override {
// SPS(7) / PPS(8)
auto nal_type = data()[_prefix_size] & 0x1F;
return nal_type == 7 || nal_type == 8;
}
};
5. Track --- 媒体轨道
5.1 Track 基类
cpp
class Track : public TrackSource {
public:
virtual CodecId getCodecId() const = 0;
virtual int getSampleRate() const = 0;
virtual int getChannel() const = 0;
virtual int getVideoWidth() const { return 0; }
virtual int getVideoHeight() const { return 0; }
// 输入帧数据 → 分发给 RtpEncoder / Muxer
void inputFrame(const Frame::Ptr &frame);
};
5.2 H264Track
cpp
class H264Track : public VideoTrack {
public:
H264Track(const std::string &sps, const std::string &pps,
int width, int height, int fps);
CodecId getCodecId() const override { return CodecH264; }
// 从 SPS 解析宽高
void ready() override {
if (_sps.empty()) return;
// 解析 SPS 获取 width/height/fps
getAVCInfo(_sps, _width, _height, _fps);
}
};
6. RTCP 实现
6.1 SR(Sender Report)
cpp
// src/Rtp/Rtcp.cpp
class RtcpSR {
public:
uint8_t version : 2;
uint8_t padding : 1;
uint8_t rc : 5; // Reception report count
uint8_t pt = 200; // Packet type: SR
uint16_t length;
uint32_t ssrc;
uint64_t ntpmsb; // NTP timestamp MSB
uint64_t ntplsb; // NTP timestamp LSB
uint32_t rtpstamp; // RTP timestamp
uint32_t packetcount; // 发送包数
uint32_t octetcount; // 发送字节数
};
6.2 RR(Receiver Report)
cpp
class RtcpRR {
public:
uint8_t version : 2;
uint8_t padding : 1;
uint8_t rc : 5;
uint8_t pt = 201; // Packet type: RR
uint16_t length;
uint32_t ssrc;
struct ReportBlock {
uint32_t ssrc; // 数据源 SSRC
uint8_t fraction; // 丢包率分数
uint32_t lost; // 累计丢包数
uint32_t lastseq; // 最高接收 seq
uint32_t jitter; // 抖动
uint64_t lsr; // 最后 SR 时间戳
uint64_t dlsr; // 自最后 SR 以来延迟
};
};
6.3 丢包率计算
cpp
uint8_t RtcpRR::calculateFractionLost(uint32_t expected,
uint32_t received) {
if (expected == 0) return 0;
uint32_t lost = expected - received;
// 比例 * 256
return (uint8_t)((lost << 8) / expected);
}
6.4 抖动计算
cpp
uint32_t RtcpRR::calculateJitter(uint32_t rtp_stamp,
uint64_t arrival_time,
uint32_t clock_rate) {
// RFC 3550 Section 6.4.1
// D = |Ri - Si| - |Rj - Sj|
auto D = (arrival_time - rtp_stamp * 1000 / clock_rate);
_jitter += (D - _jitter + 8) / 16;
return (uint32_t)_jitter;
}
7. RTP 收发完整调用链
7.1 RTP 发送(RTSP 推流方向)
推流端发送 H264 NALU
→ H264RtpEncoder::makeRtp(data, len, stamp, marker)
→ len <= 1400: 单 NALU 包
→ RtpPacket::create() → setHeader() → setPayload()
→ _on_rtp(rtp) → RtspSession::onSendRtp()
→ Socket::send(rtp->data(), rtp->size())
→ ::send(fd, data, len, MSG_NOSIGNAL) 系统调用
→ len > 1400: FU-A 分片
→ 循环创建 RtpPacket(首片/中间片/末片 marker=true)
→ 逐片 Socket::send()
7.2 RTP 接收(RTSP 拉流方向)
网卡收到数据 → TCP 协议栈 → 接收缓冲区
→ epoll_wait → EPOLLIN
→ Socket::onRead()
→ recv(fd, buf, len, 0)
→ RtspSession::onRecv(buf)
→ RtspSplitter::onRecv() --- 解析 RTSP 信令/RTP 交织
→ 如果是 RTP 交织包:
→ RtpReceiver::onRtp(rtp)
→ 排序/缓存
→ H264RtpDecoder::onRtp(rtp)
→ FU-A 重组 / 单包解析
→ 输出 H264Frame
→ Track::inputFrame(frame)
→ MediaSource::onWrite(frame)
8. 小结
ZLMediaKit 的 RTP/RTCP 实现要点:
- RtpPacket 直接映射 RFC 3550:位域与协议头一一对应,零拷贝解析
- FU-A 分片/重组:H264 大 NALU 超过 MTU 自动分片,接收端按 marker 重组
- 排序缓冲 :
std::map<uint16_t, RtpPacket>按 seq 排序,处理乱序和环回 - Frame 抽象:RTP 解封装后统一为 Frame,与协议无关
- RTCP 反馈:SR/RR 按 RFC 3550 标准实现,支持丢包率和抖动计算
下一篇:RTSP 服务端推拉流全流程分析