1、README
前言
注意:flv是不支持h.265封装的。
a. demo使用
bash
$ make clean && make DEBUG=1
$
$ ./flv_mux_h264_aac
Usage:
./flv_mux_h264_aac avfile/test1_856x480_24fps.h264 24 avfile/test1_44100_stereo.aac out1.flv
./flv_mux_h264_aac avfile/test2_960x544_25fps.h264 25 avfile/test2_44100_mono.aac out2.flv
b. 参考链接
【参考文章】
【参考源码】
【工具下载】
-
SpecialAAAC.exe:https://sourceforge.net/projects/aacstreamanalysis/
-
H264BSAnalyzer.exe:https://github.com/latelee/H264BSAnalyzer/tree/master/release
-
FlvParse.exe:https://github.com/ty6815/AvStackDocs/tree/master/media format/flv
c. demo目录架构
bash
$ tree
.
├── aac_adts.c
├── aac_adts.h
├── avfile
│ ├── out1.flv
│ ├── out2.flv
│ ├── test1_44100_stereo.aac
│ ├── test1_856x480_24fps.h264
│ ├── test2_44100_mono.aac
│ └── test2_960x544_25fps.h264
├── docs
│ ├── FLV封装格式介绍及解析 - 简书.mhtml
│ ├── FLV格式详解_JT同学的博客-CSDN博客_flv格式.mhtml
│ ├── 音视频封装:FLV格式详解和打包H264、AAC方案(上) - 云+社区 - 腾讯云.mhtml
│ └── 音视频封装:FLV格式详解和打包H264、AAC方案(下) - 云+社区 - 腾讯云.mhtml
├── flv.c
├── flv_format.h
├── flv.h
├── h264_nalu.c
├── h264_nalu.h
├── main.c
├── Makefile
├── README.md
├── reference_code
│ ├── flvmuxer-master.zip
│ └── H.264toFLV-master.zip
└── tools
├── FlvParse.exe
├── H264BSAnalyzer.exe
└── SpecialAAAC.exe
2、主要代码片段
flv_format.h
c
/***************************************************************
* describe: Flv file format description(Mainly for H.264 & AAC)
* author: linriming
* e-mail: linriming20@163.com
***************************************************************/
#ifndef __FLV_FORMAT_H__
#define __FLV_FORMAT_H__
#include <stdint.h>
#define AUDIO_SUPPORT(x) (x << 2) /* bit[2] in flvheader's type_flag */
#define VIDEO_SUPPORT(x) (x << 0) /* bit[0] in flvheader's type_flag */
#define SIZE_FLV_HEADER sizeof(struct flvHeader) /* 9 Bytes */
#define SIZE_FLV_TAG_HEADER sizeof(struct flvTagHeader) /* 11 Bytes */
#define SIZE_PREVIOUS_TAG_SIZE sizeof(uint32_t) /* 4 Bytes */
/* FLV tag type */
typedef enum{
FLVTAG_TYPE_AUDIO = 0x08,
FLVTAG_TYPE_VIDEO = 0x09,
FLVTAG_TYPE_SCRIPT = 0x12,
}flvTagType;
/* AMF data type in <Script Tag> */
typedef enum{
AMF_DATA_TYPE_NUMBER = 0x00,
AMF_DATA_TYPE_BOOL = 0x01,
AMF_DATA_TYPE_STRING = 0x02,
AMF_DATA_TYPE_OBJECT = 0x03,
AMF_DATA_TYPE_NULL = 0x05,
AMF_DATA_TYPE_UNDEFINED = 0x06,
AMF_DATA_TYPE_REFERENCE = 0x07,
AMF_DATA_TYPE_MIXEDARRAY = 0x08,
AMF_DATA_TYPE_OBJECT_END = 0x09,
AMF_DATA_TYPE_ARRAY = 0x0a,
AMF_DATA_TYPE_DATE = 0x0b,
AMF_DATA_TYPE_LONG_STRING = 0x0c,
AMF_DATA_TYPE_UNSUPPORTED = 0x0d,
} amfDataType;
/* audio tag */
typedef enum{
SFI_LINEAR_PCM_PLATFORM_ENDIAN = 0,
SFI_ADPCM = 1,
SFI_MP3 = 2,
SFI_LINEAR_PCM_LITTLE_ENDIAN = 3,
SFI_NELLYMOSER_16KHZ_MONO = 4,
SFI_NELLYMOSER_8KHZ_MONO = 5,
SFI_NELLYMOSER = 6,
SFI_G711A = 7,
SFI_G711MU = 8,
SFI_RESERVED = 9,
SFI_AAC = 10,
SFI_SPEEX = 11,
SFI_MP3_8KHZ = 14,
SFI_DEVIVE_SPECIFIC_SOUND = 15,
}soundFormatIndex;
typedef enum{
SRI_5_5KHZ = 0,
SRI_11KHZ = 1,
SRI_22KHZ = 2,
SRI_44KHZ = 3,
}soundSamplerateIndex;
typedef enum{
SSI_8BIT = 0,
SSI_16BIT = 1,
}soundSizeIndex;
typedef enum{
STI_MONO = 0,
STI_STEREO = 1,
}soundTypeIndex;
#define AAC_PACKET_TYPE_SEQUENCE_HEADER (0)
#define AAC_PACKET_TYPE_RAW (1)
typedef enum{
AAC_MAIN = 1,
AAC_LC = 2,
AAC_SSR = 3,
}aacProfileIndex;
typedef enum{
AAC_96KHz = 0x0,
AAC_88_2KHz = 0x1,
AAC_64KHz = 0x2,
AAC_48KHz = 0x3,
AAC_44_1KHz = 0x4,
AAC_32KHz = 0x5,
AAC_24KHz = 0x6,
AAC_22_05KHz = 0x7,
AAC_16KHz = 0x8,
AAC_12KHz = 0x9,
AAC_11_025KHz = 0xa,
AAC_8KHz = 0xb,
AAC_RESERVED = 0xc,
}aacSamplerateIndex;
typedef enum{
AAC_CHANNEL_SPECIAL = 0x0,
AAC_CHANNEL_MONO = 0x1,
AAC_CHANNEL_STEREO = 0x2,
AAC_CHANNEL_3 = 0x3,
AAC_CHANNEL_4 = 0x4,
AAC_CHANNEL_5 = 0x5,
AAC_CHANNEL_5_1 = 0x6,
AAC_CHANNEL_7_1 = 0x7,
AAC_CHANNELRESERVED = 0x8,
}aacChannelIndex;
#define AVC_PACKET_TYPE_SEQUENCE_HEADER (0)
#define AVC_PACKET_TYPE_NALU (1)
#define AVC_PACKET_TYPE_END_OF_SEQUENCE (2)
/* next for video tag */
#define VIDEOTAG_FRAMETYPE_KEYFRAME (1)
#define VIDEOTAG_FRAMETYPE_INTER_FRAME (2)
#define VIDEOTAG_FRAMETYPE_DISPOSABLE_INTER_FRAME (3)
#define VIDEOTAG_FRAMETYPE_GENERATED_KEYFRAME (4)
#define VIDEOTAG_FRAMETYPE_VIDEO_INFO_FRAME (5)
#define VIDEOTAG_CODECID_JPEG (1)
#define VIDEOTAG_CODECID_SORENSON_H263 (2)
#define VIDEOTAG_CODECID_SCREEN_VIDEO (3)
#define VIDEOTAG_CODECID_ON2_VP6 (4)
#define VIDEOTAG_CODECID_ON2_VP6_WITH_ALPHA_CHANNEL (5)
#define VIDEOTAG_CODECID_SCREEN_VIDEO_VERSION_2 (6)
#define VIDEOTAG_CODECID_AVC (7)
#pragma pack(push)
#pragma pack(1) /* 1 bytes align */
typedef struct flvHeader{
uint8_t signature[3]; /* signature bytes always 'F' 'L' 'V': 0x46 0x4C 0x56 */
uint8_t version; /* file version, always 0x01 */
uint8_t type_flag; /* bit[7:3] and bit[1] always 0, bit[2] for aduio, bit[0] for video */
uint32_t data_offset; /* size of header, 00 00 00 09(big-endian) for version 1 */
}T_FlvHeader, *PT_FlvHeader; /* 9 bytes totally */
typedef struct flvTagHeader{
uint8_t TagType; /* Type of this tag. Value are 8(audio), 9(video), 18(script), other(reserved). */
uint8_t DataSize[3]; /* Length of the data in the Data filed. */
uint8_t Timestamp[3]; /* Time in milliseconds at which the data in this applies. 0 in first tag in the FLV file. */
uint8_t TimestampExtended; /* Extension of Timestamp field to form a SI32 value, it is upper 8 bits. */
uint8_t StreamID[3]; /* Always 0 */
}T_FlvTagHeader, *PT_FlvTagHeader; /* 11 bytes total */
typedef struct flvTag{
T_FlvTagHeader flvheader; /* tag header */
uint8_t flvdata[0]; /* tag data index */
}T_FlvTag, *PT_FlvTag;
typedef struct avcVideoPacket{
T_FlvTagHeader flvheader;
uint8_t flvdata[0]; /* flv tag data index */
}T_AvcVideoPacket, *PT_AvcVideoPacket;
#pragma pack(pop)
#endif /* __FLV_FORMAT_H__ */
flv.c
c
#include "h264_nalu.h"
#include "aac_adts.h"
#include "flv.h"
static int generateFlvHeader(int hasVideo, int hasAudio, uint8_t *pOutData, uint32_t *pOutDataLen)
{
T_FlvHeader flvheader = {0};
if(!pOutData || !pOutDataLen)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
flvheader.signature[0] = 'F';
flvheader.signature[1] = 'L';
flvheader.signature[2] = 'V';
flvheader.version = 0x01;
flvheader.type_flag = AUDIO_SUPPORT(hasAudio) | VIDEO_SUPPORT(hasVideo);
flvheader.data_offset = 0x09000000; /* 9 Bytes, size of flv header. big-endian. */
memcpy(pOutData, (uint8_t *)&flvheader, sizeof(T_FlvHeader));
*pOutDataLen = SIZE_FLV_HEADER;
return 0;
}
static int generatePreviousTagSize(uint32_t size, uint8_t *pOutData, uint32_t *pOutDataLen)
{
if(!pOutData || !pOutDataLen)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
/* storge in file with big-endian */
pOutData[0] = (uint8_t)((size >> 24) & 0xFF);
pOutData[1] = (uint8_t)((size >> 16) & 0xFF);
pOutData[2] = (uint8_t)((size >> 8) & 0xFF);
pOutData[3] = (uint8_t)(size & 0xFF);
*pOutDataLen = SIZE_PREVIOUS_TAG_SIZE;
return 0;
}
static int generateScriptTag(uint8_t *pOutData, uint32_t *pOutDataLen)
{
PT_FlvTag ptScriptTag = NULL;
char *pString = NULL;
uint32_t stringLen = -1;
uint32_t dataSize = -1;
uint32_t amf2ArrayCnt = 0; /* have no member in this demo! */
uint32_t curPos = 0;
#if 0
/* have no member in this demo, so not need to convert! */
union{
double d;
uint8_t c[8];
} un;
#endif
if(!pOutData || !pOutDataLen)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
ptScriptTag = (PT_FlvTag)pOutData;
//dataSize = -1; // calcurate bihind.
//*pOutDataLen = -1; // calcurate bihind.
ptScriptTag->flvheader.TagType = FLVTAG_TYPE_SCRIPT;
//ptScriptTag->flvheader.DataSize[0] = -1; // calcurate bihind.
//ptScriptTag->flvheader.DataSize[1] = -1;
//ptScriptTag->flvheader.DataSize[2] = -1;
ptScriptTag->flvheader.Timestamp[0] = 0;
ptScriptTag->flvheader.Timestamp[1] = 0;
ptScriptTag->flvheader.Timestamp[2] = 0;
ptScriptTag->flvheader.TimestampExtended = 0;
ptScriptTag->flvheader.StreamID[0] = 0;
ptScriptTag->flvheader.StreamID[1] = 0;
ptScriptTag->flvheader.StreamID[2] = 0;
/* AMF1 */
pString = "onMetaData";
stringLen = strlen(pString);
ptScriptTag->flvdata[curPos + 0] = AMF_DATA_TYPE_STRING;
ptScriptTag->flvdata[curPos + 1] = (uint8_t)((stringLen >> 8) & 0xFF);
ptScriptTag->flvdata[curPos + 2] = (uint8_t)(stringLen & 0xFF);
memcpy(&ptScriptTag->flvdata[curPos + 3], pString, stringLen);
curPos += (3 + stringLen);
/* AMF2 */
ptScriptTag->flvdata[curPos + 0] = AMF_DATA_TYPE_MIXEDARRAY;
ptScriptTag->flvdata[curPos + 1] = (uint8_t)((amf2ArrayCnt >> 24) & 0xFF);
ptScriptTag->flvdata[curPos + 2] = (uint8_t)((amf2ArrayCnt >> 16) & 0xFF);
ptScriptTag->flvdata[curPos + 3] = (uint8_t)((amf2ArrayCnt >> 8) & 0xFF);
ptScriptTag->flvdata[curPos + 4] = (uint8_t)((amf2ArrayCnt >> 0) & 0xFF);
curPos += 5;
#if 0 /* reference to `uint32_t amf2ArrayCnt = ...` */
pString = "duration";
stringLen = strlen(pString);
ptScriptTag->flvdata[curPos + 0] = (uint8_t)((stringLen >> 8) & 0xFF);
ptScriptTag->flvdata[curPos + 1] = (uint8_t)(stringLen & 0xFF);
memcpy(&ptScriptTag->flvdata[curPos + 2], pString, stringLen);
curPos += (2 + stringLen);
un.d = 30; /* Second */
ptScriptTag->flvdata[curPos + 0] = AMF_DATA_TYPE_NUMBER;
ptScriptTag->flvdata[curPos + 1] = un.c[7];
ptScriptTag->flvdata[curPos + 2] = un.c[6];
ptScriptTag->flvdata[curPos + 3] = un.c[5];
ptScriptTag->flvdata[curPos + 4] = un.c[4];
ptScriptTag->flvdata[curPos + 5] = un.c[3];
ptScriptTag->flvdata[curPos + 6] = un.c[2];
ptScriptTag->flvdata[curPos + 7] = un.c[1];
ptScriptTag->flvdata[curPos + 8] = un.c[0];
curPos += (1+8);
#endif
/* end */
ptScriptTag->flvdata[curPos + 0] = 0x00;
ptScriptTag->flvdata[curPos + 1] = 0x00;
ptScriptTag->flvdata[curPos + 2] = AMF_DATA_TYPE_OBJECT_END;
curPos += 3;
// now we can calculate it.
dataSize = curPos;
*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;
ptScriptTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
ptScriptTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
ptScriptTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
return 0;
}
static int generateAvcSequenceHeader(uint8_t *spsBuf, uint16_t spsLen, uint8_t *ppsBuf, uint16_t ppsLen,
uint32_t timestamp_ms, uint8_t *pOutData, uint32_t *pOutDataLen)
{
PT_FlvTag ptVideoTag = NULL;
uint32_t dataSize = 0;
if(!spsBuf || !spsLen || !ppsBuf || !ppsLen || !pOutData || !pOutDataLen)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
ptVideoTag = (PT_FlvTag)pOutData;
dataSize = (13 + spsLen) + (3 + ppsLen);
*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;
ptVideoTag->flvheader.TagType = FLVTAG_TYPE_VIDEO;
ptVideoTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
ptVideoTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
ptVideoTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
ptVideoTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
ptVideoTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
ptVideoTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
ptVideoTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
ptVideoTag->flvheader.StreamID[0] = 0;
ptVideoTag->flvheader.StreamID[1] = 0;
ptVideoTag->flvheader.StreamID[2] = 0;
ptVideoTag->flvdata[0] = ((VIDEOTAG_FRAMETYPE_KEYFRAME << 4) |\
(VIDEOTAG_CODECID_AVC)); /* 0x17, keyframe, avc */
/* next for AVCVIDEOPACKET */
ptVideoTag->flvdata[1] = AVC_PACKET_TYPE_SEQUENCE_HEADER; /* AVCPacketType: 0, AVC sequence header */
ptVideoTag->flvdata[2] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
ptVideoTag->flvdata[3] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
ptVideoTag->flvdata[4] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
/* next for AVCDecoderConfigurationRecord */
ptVideoTag->flvdata[5] = 0x01; /* ConfigurationVersion: always 0x01*/
ptVideoTag->flvdata[6] = spsBuf[1]; /* AVCProfileIndication: the first byte after the 'nalu type'(buf no include 'start code') */
ptVideoTag->flvdata[7] = spsBuf[2]; /* profile_compatibility: the second byte after the 'nalu type'(buf no include 'start code') */
ptVideoTag->flvdata[8] = spsBuf[3]; /* AVCLevelIndication: the third byte after the 'nalu type'(buf no include 'start code') */
ptVideoTag->flvdata[9] = 0xFF; /* lengthSizeMinusOne: always 0xFF, bit[7:2]: '111111b'reversed */
ptVideoTag->flvdata[10] = 0xE1; /* NumOfSequenceParmeterSets: always 0xE1, bit[7:5]: '111b'reversed */
ptVideoTag->flvdata[11] = (uint8_t)((spsLen >> 8) & 0xFF); /* SequenceParamterSetLength: big-endian, H */
ptVideoTag->flvdata[12] = (uint8_t)(spsLen & 0xFF); /* SequenceParamterSetLength: big-endian, L */
memcpy(&ptVideoTag->flvdata[13], spsBuf, spsLen);
ptVideoTag->flvdata[13+spsLen] = 0x01; /* NumOfPictureParmeterSets: always 0x01 */
ptVideoTag->flvdata[13+spsLen+1] = (uint8_t)((ppsLen >> 8) & 0xFF); /* PictureParamterSetLength: big-endian, H */
ptVideoTag->flvdata[13+spsLen+2] = (uint8_t)(ppsLen& 0xFF); /* PictureParamterSetLength: big-endian, L */
memcpy(&ptVideoTag->flvdata[13+spsLen+3], ppsBuf, ppsLen);
return 0;
}
static int generateAvcNALU(uint8_t *pNaluData, uint32_t naluDataLen, uint32_t isIDRNalu, uint32_t timestamp_ms, uint8_t *pOutData, uint32_t *pOutDataLen)
{
PT_FlvTag ptVideoTag = NULL;
uint32_t dataSize = 0;
if(!pNaluData || !naluDataLen || !pOutData || !pOutDataLen)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
ptVideoTag = (PT_FlvTag)pOutData;
dataSize = 9 + naluDataLen;
*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;
ptVideoTag->flvheader.TagType = FLVTAG_TYPE_VIDEO;
ptVideoTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
ptVideoTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
ptVideoTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
ptVideoTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
ptVideoTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
ptVideoTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
ptVideoTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
ptVideoTag->flvheader.StreamID[0] = 0;
ptVideoTag->flvheader.StreamID[1] = 0;
ptVideoTag->flvheader.StreamID[2] = 0;
if(isIDRNalu)
{
ptVideoTag->flvdata[0] = ((VIDEOTAG_FRAMETYPE_KEYFRAME << 4) |\
(VIDEOTAG_CODECID_AVC)); /* 0x17, keyframe, avc */
}
else
{
ptVideoTag->flvdata[0] = ((VIDEOTAG_FRAMETYPE_INTER_FRAME << 4) |\
(VIDEOTAG_CODECID_AVC)); /* 0x27, inter frame, avc */
}
/* next for AVCVIDEOPACKET */
ptVideoTag->flvdata[1] = AVC_PACKET_TYPE_NALU; /* AVCPacketType: 1, NALU */
ptVideoTag->flvdata[2] = 0x00; /* CompositionTime: CTS = 0, because no b'frame */
ptVideoTag->flvdata[3] = 0x00; /* CompositionTime: CTS = 0, because no b'frame */
ptVideoTag->flvdata[4] = 0x00; /* CompositionTime: CTS = 0, because no b'frame */
ptVideoTag->flvdata[5] = (uint8_t)((naluDataLen >> 24 & 0xFF));
ptVideoTag->flvdata[6] = (uint8_t)((naluDataLen >> 16 & 0xFF));
ptVideoTag->flvdata[7] = (uint8_t)((naluDataLen >> 8 & 0xFF));
ptVideoTag->flvdata[8] = (uint8_t)(naluDataLen & 0xFF);
memcpy(&ptVideoTag->flvdata[9], pNaluData, naluDataLen);
return 0;
}
static int generateAvcEndOfSequence(uint32_t timestamp_ms, uint8_t *pOutData, uint32_t *pOutDataLen)
{
PT_FlvTag ptVideoTag = NULL;
uint32_t dataSize = 0;
if(!pOutData || !pOutDataLen)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
ptVideoTag = (PT_FlvTag)pOutData;
dataSize = 5; /* fixed, flvdata[0~4] */
*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;
ptVideoTag->flvheader.TagType = FLVTAG_TYPE_VIDEO;
ptVideoTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
ptVideoTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
ptVideoTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
ptVideoTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
ptVideoTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
ptVideoTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
ptVideoTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
ptVideoTag->flvheader.StreamID[0] = 0;
ptVideoTag->flvheader.StreamID[1] = 0;
ptVideoTag->flvheader.StreamID[2] = 0;
ptVideoTag->flvdata[0] = ((VIDEOTAG_FRAMETYPE_KEYFRAME << 4) |\
(VIDEOTAG_CODECID_AVC)); /* 0x17, keyframe, avc */
/* next for AVCVIDEOPACKET */
ptVideoTag->flvdata[1] = AVC_PACKET_TYPE_END_OF_SEQUENCE; /* AVCPacketType: 2, AVC end of sequence header */
ptVideoTag->flvdata[2] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
ptVideoTag->flvdata[3] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
ptVideoTag->flvdata[4] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
return 0;
}
static int generateAacSequenceHeader(uint32_t timestamp_ms, PT_AdtsHeader adtsInfo, uint8_t *pOutData, uint32_t *pOutDataLen)
{
PT_FlvTag ptAudioTag = NULL;
uint32_t dataSize = 0;
soundSamplerateIndex sri;
soundTypeIndex sti;
aacSamplerateIndex asi;
aacChannelIndex aci;
if(!pOutData || !pOutDataLen)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
switch(adtsInfo->sampling_freq_index)
{
case SFI_44100: sri = SRI_44KHZ;asi = AAC_44_1KHz; break;
case SFI_22050: sri = SRI_22KHZ;asi = AAC_22_05KHz; break;
case SFI_11025: sri = SRI_11KHZ;asi = AAC_11_025KHz;break;
default:
printf("[%s:%d]: Params invaild!\n", __FUNCTION__, __LINE__);
return -1;
}
switch(adtsInfo->channel_configuration)
{
case 1: sti = STI_MONO; aci = AAC_CHANNEL_MONO; break;
case 2: sti = STI_STEREO; aci = AAC_CHANNEL_STEREO; break;
default:
printf("[%s:%d]: Params invaild!\n", __FUNCTION__, __LINE__);
return -1;
}
ptAudioTag = (PT_FlvTag)pOutData;
dataSize = 4; /* fixed, flvdata[0~3] */
*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;
ptAudioTag->flvheader.TagType = FLVTAG_TYPE_AUDIO;
ptAudioTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
ptAudioTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
ptAudioTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
ptAudioTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
ptAudioTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
ptAudioTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
ptAudioTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
ptAudioTag->flvheader.StreamID[0] = 0;
ptAudioTag->flvheader.StreamID[1] = 0;
ptAudioTag->flvheader.StreamID[2] = 0;
ptAudioTag->flvdata[0] = (SFI_AAC << 4) |\
(sri << 2) |\
(SSI_16BIT << 1) |\
(sti);
ptAudioTag->flvdata[1] = AAC_PACKET_TYPE_SEQUENCE_HEADER;
ptAudioTag->flvdata[2] = (AAC_LC << 3) |\
((asi >> 1) & 0x7);
ptAudioTag->flvdata[3] = ((asi & 0x1) << 7) |\
(aci << 3);
return 0;
}
static int generateAacRaw(uint8_t *pAtdsRawData, PT_AdtsHeader pAdtsInfo, uint32_t timestamp_ms, uint8_t *pOutData, uint32_t *pOutDataLen)
{
PT_FlvTag ptAudioTag = NULL;
uint32_t dataSize = 0;
soundSamplerateIndex sri;
soundTypeIndex sti;
if(!pAtdsRawData || !pAdtsInfo || !pOutData || !pOutDataLen)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
switch(pAdtsInfo->sampling_freq_index)
{
case SFI_44100: sri = SRI_44KHZ; break;
case SFI_22050: sri = SRI_22KHZ; break;
case SFI_11025: sri = SRI_11KHZ; break;
default:
printf("[%s:%d]: Params invaild!\n", __FUNCTION__, __LINE__);
return -1;
}
switch(pAdtsInfo->channel_configuration)
{
case 1: sti = STI_MONO; break;
case 2: sti = STI_STEREO; break;
default:
printf("[%s:%d]: Params invaild!\n", __FUNCTION__, __LINE__);
return -1;
}
ptAudioTag = (PT_FlvTag)pOutData;
dataSize = 2 + (pAdtsInfo->aac_frame_length - AAC_ADTS_HEADER_SIZE);
*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;
ptAudioTag->flvheader.TagType = FLVTAG_TYPE_AUDIO;
ptAudioTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
ptAudioTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
ptAudioTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
ptAudioTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
ptAudioTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
ptAudioTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
ptAudioTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
ptAudioTag->flvheader.StreamID[0] = 0;
ptAudioTag->flvheader.StreamID[1] = 0;
ptAudioTag->flvheader.StreamID[2] = 0;
ptAudioTag->flvdata[0] = (SFI_AAC << 4) |\
(sri << 2) |\
(SSI_16BIT << 1) |\
(sti);
ptAudioTag->flvdata[1] = AAC_PACKET_TYPE_RAW;
memcpy(&ptAudioTag->flvdata[2], pAtdsRawData, pAdtsInfo->aac_frame_length - AAC_ADTS_HEADER_SIZE);
return 0;
}
int flv_mux_h264_aac(char *h264FileName, uint32_t vFps, char *aacFileName, char *flvFileName)
{
FILE *fpH264 = NULL;
FILE *fpAAC = NULL;
FILE *fpFLV = NULL;
uint8_t *h264Buf = NULL;
uint8_t *aacBuf = NULL;
uint8_t *flvBuf = NULL;
uint32_t flvBufLen = 0;
uint64_t timeStamp_ms = 0; // the timestamp is start from 0 in flv file.
uint8_t spsBuf[1024] = {0}; // note!!!: it maybe happen the 'Segmentation fault', as 1024 is too long for 'sps', but it maybe save the 'sei' or other unused data if first nalu isn't 'sps'.
uint8_t ppsBuf[64] = {0};
uint32_t previousTagSize = 0;
uint32_t videoFps = vFps;
uint32_t audioFps = -1; // calcurate bihind.
int ret = -1;
T_NaluInfo spsNaluInfo = {};
T_NaluInfo ppsNaluInfo = {};
T_NaluInfo naluInfo = {};
T_AdtsHeader adtsHeader = {};
if(!h264FileName || !vFps || !aacFileName || !flvFileName)
{
printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
return -1;
}
/* open file */
fpH264 = fopen(h264FileName, "rb");
if (!fpH264)
{
printf("open %s error!\n", h264FileName);
goto exit;
}
fpAAC = fopen(aacFileName, "rb");
if (!fpAAC)
{
printf("open %s error!\n", aacFileName);
goto exit;
}
fpFLV = fopen(flvFileName, "wb");
if (!fpFLV)
{
printf("open %s error!\n", flvFileName);
goto exit;
}
/* alloc memory */
h264Buf = (uint8_t *)malloc(MAX_NALU_SIZE);
if (!h264Buf)
{
printf("malloc error!\n");
goto exit;
}
aacBuf = (uint8_t *)malloc(MAX_ADTS_SIZE);
if (!aacBuf)
{
printf("malloc error!\n");
goto exit;
}
flvBuf = (uint8_t *)malloc(MAX_FLV_BUF_SIZE);
if (!flvBuf)
{
printf("malloc error!\n");
goto exit;
}
/* parse AAC-ADTS */
ret = getAdtsFrame(fpAAC, aacBuf, &adtsHeader);
if(!ret)
{
fseek(fpAAC, 0, SEEK_SET); // reset
switch(adtsHeader.sampling_freq_index)
{
case SFI_96000: audioFps = 1000.0/(1024*1000/96000); break;
case SFI_88200: audioFps = 1000.0/(1024*1000/88200); break;
case SFI_64000: audioFps = 1000.0/(1024*1000/64000); break;
case SFI_48000: audioFps = 1000.0/(1024*1000/48000); break;
case SFI_44100: audioFps = 1000.0/(1024*1000/44100); break;
case SFI_32000: audioFps = 1000.0/(1024*1000/32000); break;
case SFI_24000: audioFps = 1000.0/(1024*1000/24000); break;
case SFI_22050: audioFps = 1000.0/(1024*1000/22050); break;
case SFI_16000: audioFps = 1000.0/(1024*1000/16000); break;
case SFI_12000: audioFps = 1000.0/(1024*1000/12000); break;
case SFI_11025: audioFps = 1000.0/(1024*1000/11025); break;
case SFI_8000: audioFps = 1000.0/(1024*1000/ 8000); break;
case SFI_7350: audioFps = 1000.0/(1024*1000/ 7350); break;
default: audioFps = -1; break;
}
DEBUG("AAC Info:\n"
"\t id: %d\n"
"\t profile: %d\n"
"\t freq index: %d\n"
"\t fps: %d\n"
"\t channels: %d\n",
adtsHeader.id, adtsHeader.profile,
adtsHeader.sampling_freq_index, audioFps,
adtsHeader.channel_configuration);
}
/* part 1/7: FLV Header */
generateFlvHeader(1, 1, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
previousTagSize = 0; // 0, because there's no tag before
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
#if 1 //todo
/* part 2/7: Script Tag */
generateScriptTag(flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
previousTagSize = flvBufLen;
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
#endif
/* part 3/7: Video Tag (AVC sequence header) */
while(1)
{
DEBUG("find sps now!\n");
ret = getOneH264Nalu(fpH264, spsBuf, &spsNaluInfo);
if (ret == -1)
{
printf("get h264 nalu failed!\n");
goto exit;
}
DEBUG("type = 0x%02x\n", spsBuf[spsNaluInfo.startcode_len]);
if(spsNaluInfo.nalu_type == NALU_TYPE_SPS)
{
DEBUG("had found sps, now find pps!\n");
ret = getOneH264Nalu(fpH264, ppsBuf, &ppsNaluInfo);
if (ret == -1)
{
printf("get h264 nalu failed!\n");
goto exit;
}
DEBUG("ppsBuf[%d] = 0x%02x\n", ppsNaluInfo.startcode_len, ppsBuf[ppsNaluInfo.startcode_len]);
if(ppsNaluInfo.nalu_type == NALU_TYPE_PPS)
{
DEBUG("had found pps\n");
break;
}
}
else
{
DEBUG("sps not found, continue!\n");
/* find next NALU and judge */
continue;
}
}
generateAvcSequenceHeader(spsBuf + spsNaluInfo.startcode_len, spsNaluInfo.data_len - spsNaluInfo.startcode_len,
ppsBuf + ppsNaluInfo.startcode_len, ppsNaluInfo.data_len - ppsNaluInfo.startcode_len,
timeStamp_ms/* =0 */, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
previousTagSize = flvBufLen;
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
/* part 4/7: Audio Tag (Audio sequence header) */
generateAacSequenceHeader(timeStamp_ms/* =0 */, &adtsHeader, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
previousTagSize = flvBufLen;
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
#if 0
/* just for debug !!! */
ret = getOneH264Nalu(fpH264, h264Buf, &naluInfo);
if (ret < 0)
{
printf("getOneH264Nalu error!\n");
}
generateAvcNALU(h264Buf + naluInfo.startcode_len, naluInfo.data_len - naluInfo.startcode_len, 1, timeStamp_ms, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
previousTagSize = flvBufLen;
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
ret = getAdtsFrame(fpAAC, aacBuf, &adtsHeader);
if (ret < 0)
{
printf("getAdtsFrame error!\n");
}
generateAacRaw(aacBuf+AAC_ADTS_HEADER_SIZE, &adtsHeader, timeStamp_ms, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
previousTagSize = flvBufLen;
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
#else
while(1)
{
if (timeStamp_ms % (1000/videoFps) == 0)
{
/* part 5/7: Video Tag (AVC NALU) */
do
{
ret = getOneH264Nalu(fpH264, h264Buf, &naluInfo);
if (ret < 0)
{
if(ret == -2)
DEBUG("h264 file end!\n");
else
printf(RED"getOneH264Nalu error!\n"COLOR_END);
goto mux_end;
}
DEBUG(GREEN"[video] get one H.264 NALU(0x%02X) with length: %d\n"COLOR_END, h264Buf[naluInfo.startcode_len], naluInfo.data_len);
}while((naluInfo.nalu_type != NALU_TYPE_IDR) &&\
(naluInfo.nalu_type != NALU_TYPE_SLICE));
if (naluInfo.nalu_type == NALU_TYPE_IDR)
{
generateAvcNALU(h264Buf + naluInfo.startcode_len, naluInfo.data_len - naluInfo.startcode_len, 1, timeStamp_ms, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
}
else
{
generateAvcNALU(h264Buf + naluInfo.startcode_len, naluInfo.data_len - naluInfo.startcode_len, 0, timeStamp_ms, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
}
previousTagSize = flvBufLen;
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
}
if (timeStamp_ms % (1000/audioFps) == 0)
{
/* part 6/7: Audio Tag (AAC raw) */
ret = getAdtsFrame(fpAAC, aacBuf, &adtsHeader);
if (ret < 0)
{
if(ret == -2)
DEBUG("aac file end!\n");
else
printf(RED"getAdtsFrame error!\n"COLOR_END);
goto mux_end;
}
DEBUG(RED"[audio] get one AAC-ADTS frame with length: %d\n"COLOR_END, adtsHeader.aac_frame_length);
generateAacRaw(aacBuf+AAC_ADTS_HEADER_SIZE, &adtsHeader, timeStamp_ms, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
previousTagSize = flvBufLen;
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
}
timeStamp_ms++;
}
#endif
mux_end:
/* part 7/7: Video Tag (AVC end of sequence) */
generateAvcEndOfSequence(timeStamp_ms, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
previousTagSize = flvBufLen;
generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
fwrite(flvBuf, flvBufLen, 1, fpFLV);
exit:
if(flvBuf) free(flvBuf);
if(h264Buf) free(h264Buf);
if(aacBuf) free(aacBuf);
if(fpH264) fclose(fpH264);
if(fpAAC) fclose(fpAAC);
if(fpFLV) {fflush(fpFLV); fclose(fpFLV);}
return 0;
}
main.c
c
#include <stdio.h>
#include "flv.h"
int main(int argc, char *argv[])
{
if(argc == 1)
{
printf("Usage: \n"
" %s avfile/test1_856x480_24fps.h264 24 avfile/test1_44100_stereo.aac out1.flv\n"
" %s avfile/test2_960x544_25fps.h264 25 avfile/test2_44100_mono.aac out2.flv\n",
argv[0], argv[0]);
return -1;
}
flv_mux_h264_aac(argv[1], atoi(argv[2]), argv[3], argv[4]);
printf("\e[32mSuccess!\n\e[0m");
return 0;
}