音视频封装demo:将h264数据和aac数据封装(mux)成FLV文件(纯手工,不依赖第三方开源库)

1、README

前言

注意:flv是不支持h.265封装的。

a. demo使用
bash 复制代码
$ make clean && make DEBUG=1
$
$ ./flv_mux_h264_aac
Usage:
   ./flv_mux_h264_aac avfile/test1_856x480_24fps.h264 24 avfile/test1_44100_stereo.aac out1.flv
   ./flv_mux_h264_aac avfile/test2_960x544_25fps.h264 25 avfile/test2_44100_mono.aac   out2.flv
b. 参考链接

【参考文章】

【参考源码】

【工具下载】

c. demo目录架构
bash 复制代码
$ tree
.
├── aac_adts.c
├── aac_adts.h
├── avfile
│   ├── out1.flv
│   ├── out2.flv
│   ├── test1_44100_stereo.aac
│   ├── test1_856x480_24fps.h264
│   ├── test2_44100_mono.aac
│   └── test2_960x544_25fps.h264
├── docs
│   ├── FLV封装格式介绍及解析 - 简书.mhtml
│   ├── FLV格式详解_JT同学的博客-CSDN博客_flv格式.mhtml
│   ├── 音视频封装:FLV格式详解和打包H264、AAC方案(上) - 云+社区 - 腾讯云.mhtml
│   └── 音视频封装:FLV格式详解和打包H264、AAC方案(下) - 云+社区 - 腾讯云.mhtml
├── flv.c
├── flv_format.h
├── flv.h
├── h264_nalu.c
├── h264_nalu.h
├── main.c
├── Makefile
├── README.md
├── reference_code
│   ├── flvmuxer-master.zip
│   └── H.264toFLV-master.zip
└── tools
    ├── FlvParse.exe
    ├── H264BSAnalyzer.exe
    └── SpecialAAAC.exe

2、主要代码片段

flv_format.h
c 复制代码
/***************************************************************
 * describe: Flv file format description(Mainly for H.264 & AAC)
 * author: linriming
 * e-mail: linriming20@163.com
 ***************************************************************/

#ifndef __FLV_FORMAT_H__
#define __FLV_FORMAT_H__


#include <stdint.h>


#define AUDIO_SUPPORT(x)    (x << 2)    /* bit[2] in flvheader's type_flag */
#define VIDEO_SUPPORT(x)    (x << 0)    /* bit[0] in flvheader's type_flag */


#define SIZE_FLV_HEADER         sizeof(struct flvHeader)    /* 9 Bytes */
#define SIZE_FLV_TAG_HEADER     sizeof(struct flvTagHeader) /* 11 Bytes */
#define SIZE_PREVIOUS_TAG_SIZE  sizeof(uint32_t)            /* 4 Bytes */


/* FLV tag type */
typedef enum{
	FLVTAG_TYPE_AUDIO  = 0x08,
	FLVTAG_TYPE_VIDEO  = 0x09,
	FLVTAG_TYPE_SCRIPT = 0x12,
}flvTagType;


/* AMF data type in <Script Tag> */
typedef enum{
    AMF_DATA_TYPE_NUMBER      = 0x00,
    AMF_DATA_TYPE_BOOL        = 0x01,
    AMF_DATA_TYPE_STRING      = 0x02,
    AMF_DATA_TYPE_OBJECT      = 0x03,
    AMF_DATA_TYPE_NULL        = 0x05,
    AMF_DATA_TYPE_UNDEFINED   = 0x06,
    AMF_DATA_TYPE_REFERENCE   = 0x07,
    AMF_DATA_TYPE_MIXEDARRAY  = 0x08,
    AMF_DATA_TYPE_OBJECT_END  = 0x09,
    AMF_DATA_TYPE_ARRAY       = 0x0a,
    AMF_DATA_TYPE_DATE        = 0x0b,
    AMF_DATA_TYPE_LONG_STRING = 0x0c,
    AMF_DATA_TYPE_UNSUPPORTED = 0x0d,
} amfDataType;


/* audio tag */
typedef enum{
	SFI_LINEAR_PCM_PLATFORM_ENDIAN = 0,
	SFI_ADPCM                      = 1,
	SFI_MP3                        = 2,
	SFI_LINEAR_PCM_LITTLE_ENDIAN   = 3,
	SFI_NELLYMOSER_16KHZ_MONO      = 4,
	SFI_NELLYMOSER_8KHZ_MONO       = 5,
	SFI_NELLYMOSER                 = 6,
	SFI_G711A                      = 7,
	SFI_G711MU                     = 8,
	SFI_RESERVED                   = 9,
	SFI_AAC                        = 10,
	SFI_SPEEX                      = 11,
	SFI_MP3_8KHZ                   = 14,
	SFI_DEVIVE_SPECIFIC_SOUND      = 15,
}soundFormatIndex;


typedef enum{
	SRI_5_5KHZ = 0,
	SRI_11KHZ  = 1,
	SRI_22KHZ  = 2,
	SRI_44KHZ  = 3,
}soundSamplerateIndex;


typedef enum{
	SSI_8BIT  = 0,
	SSI_16BIT = 1,
}soundSizeIndex;


typedef enum{
	STI_MONO   = 0,
	STI_STEREO = 1,
}soundTypeIndex;



#define  AAC_PACKET_TYPE_SEQUENCE_HEADER 	(0)
#define  AAC_PACKET_TYPE_RAW 				(1)

typedef enum{
	AAC_MAIN = 1,
    AAC_LC   = 2,
    AAC_SSR  = 3,
}aacProfileIndex;

typedef enum{
	AAC_96KHz = 0x0,
    AAC_88_2KHz = 0x1,
    AAC_64KHz = 0x2,
   	AAC_48KHz = 0x3,
    AAC_44_1KHz = 0x4,
    AAC_32KHz = 0x5,
   	AAC_24KHz = 0x6,
    AAC_22_05KHz = 0x7,
    AAC_16KHz  = 0x8,
    AAC_12KHz  = 0x9,
    AAC_11_025KHz  = 0xa,
    AAC_8KHz  = 0xb,
    AAC_RESERVED = 0xc,
}aacSamplerateIndex;

typedef enum{
	AAC_CHANNEL_SPECIAL = 0x0,
    AAC_CHANNEL_MONO = 0x1,
    AAC_CHANNEL_STEREO = 0x2,
   	AAC_CHANNEL_3 = 0x3,
	AAC_CHANNEL_4 = 0x4,
	AAC_CHANNEL_5 = 0x5,
	AAC_CHANNEL_5_1 = 0x6,
	AAC_CHANNEL_7_1 = 0x7,
    AAC_CHANNELRESERVED = 0x8,
}aacChannelIndex;



#define  AVC_PACKET_TYPE_SEQUENCE_HEADER 			 (0)
#define  AVC_PACKET_TYPE_NALU 						 (1)
#define  AVC_PACKET_TYPE_END_OF_SEQUENCE 			 (2)

/* next for video tag */
#define  VIDEOTAG_FRAMETYPE_KEYFRAME                 (1)
#define  VIDEOTAG_FRAMETYPE_INTER_FRAME              (2)
#define  VIDEOTAG_FRAMETYPE_DISPOSABLE_INTER_FRAME   (3)
#define  VIDEOTAG_FRAMETYPE_GENERATED_KEYFRAME       (4)
#define  VIDEOTAG_FRAMETYPE_VIDEO_INFO_FRAME         (5)

#define  VIDEOTAG_CODECID_JPEG                       (1)
#define  VIDEOTAG_CODECID_SORENSON_H263              (2)
#define  VIDEOTAG_CODECID_SCREEN_VIDEO               (3)
#define  VIDEOTAG_CODECID_ON2_VP6                    (4)
#define  VIDEOTAG_CODECID_ON2_VP6_WITH_ALPHA_CHANNEL (5)
#define  VIDEOTAG_CODECID_SCREEN_VIDEO_VERSION_2     (6)
#define  VIDEOTAG_CODECID_AVC                        (7)



#pragma pack(push)
#pragma pack(1)   /* 1 bytes align */

typedef struct flvHeader{
	uint8_t  signature[3]; 		/* signature bytes always 'F' 'L' 'V': 0x46 0x4C 0x56 */
	uint8_t  version; 			/* file version, always 0x01 */
	uint8_t  type_flag; 		/* bit[7:3] and bit[1] always 0, bit[2] for aduio, bit[0] for video */
	uint32_t data_offset; 		/* size of header, 00 00 00 09(big-endian) for version 1 */
}T_FlvHeader, *PT_FlvHeader;   	    /* 9 bytes totally */


typedef struct flvTagHeader{
	uint8_t TagType; 					/* Type of this tag. Value are 8(audio), 9(video), 18(script), other(reserved). */
	uint8_t DataSize[3]; 				/* Length of the data in the Data filed. */
	uint8_t Timestamp[3]; 				/* Time in milliseconds at which the data in this applies. 0 in first tag in the FLV file. */
	uint8_t TimestampExtended; 			/* Extension of Timestamp field to form a SI32 value, it is upper 8 bits. */
	uint8_t StreamID[3]; 				/* Always 0 */
}T_FlvTagHeader, *PT_FlvTagHeader; 	/* 11 bytes total */


typedef struct flvTag{
	T_FlvTagHeader flvheader; 	/* tag header */
	uint8_t flvdata[0]; 		/* tag data index */
}T_FlvTag, *PT_FlvTag;


typedef struct avcVideoPacket{
	T_FlvTagHeader flvheader;
	uint8_t flvdata[0]; 		/* flv tag data index */
}T_AvcVideoPacket, *PT_AvcVideoPacket;


#pragma pack(pop)


#endif /* __FLV_FORMAT_H__ */
flv.c
c 复制代码
#include "h264_nalu.h"
#include "aac_adts.h"
#include "flv.h"


static int generateFlvHeader(int hasVideo, int hasAudio, uint8_t *pOutData, uint32_t *pOutDataLen)
{
	T_FlvHeader flvheader = {0};

	if(!pOutData || !pOutDataLen)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	flvheader.signature[0] = 'F';
	flvheader.signature[1] = 'L';
	flvheader.signature[2] = 'V';
	flvheader.version = 0x01;
	flvheader.type_flag = AUDIO_SUPPORT(hasAudio) | VIDEO_SUPPORT(hasVideo);
	flvheader.data_offset = 0x09000000; /* 9 Bytes, size of flv header. big-endian. */

	memcpy(pOutData, (uint8_t *)&flvheader, sizeof(T_FlvHeader));
	*pOutDataLen = SIZE_FLV_HEADER;

	return 0;
}


static int generatePreviousTagSize(uint32_t size, uint8_t *pOutData, uint32_t *pOutDataLen)
{
	if(!pOutData || !pOutDataLen)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	/* storge in file with big-endian */
	pOutData[0] = (uint8_t)((size >> 24) & 0xFF);
	pOutData[1] = (uint8_t)((size >> 16) & 0xFF);
	pOutData[2] = (uint8_t)((size >> 8) & 0xFF);
	pOutData[3] = (uint8_t)(size & 0xFF);

	*pOutDataLen = SIZE_PREVIOUS_TAG_SIZE;

	return 0;
}


static int generateScriptTag(uint8_t *pOutData, uint32_t *pOutDataLen)
{
	PT_FlvTag ptScriptTag = NULL;
	char *pString = NULL;
	uint32_t stringLen = -1;
	uint32_t dataSize = -1;
	uint32_t amf2ArrayCnt = 0; /* have no member in this demo! */
	uint32_t curPos = 0;
#if 0
	/* have no member in this demo, so not need to convert! */
	union{
		double d;
		uint8_t c[8];
	} un;
#endif

	if(!pOutData || !pOutDataLen)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	ptScriptTag = (PT_FlvTag)pOutData;
	//dataSize = -1; // calcurate bihind.
	//*pOutDataLen = -1; // calcurate bihind.

	ptScriptTag->flvheader.TagType = FLVTAG_TYPE_SCRIPT;
	//ptScriptTag->flvheader.DataSize[0] = -1; // calcurate bihind.
	//ptScriptTag->flvheader.DataSize[1] = -1;
	//ptScriptTag->flvheader.DataSize[2] = -1;
	ptScriptTag->flvheader.Timestamp[0] = 0;
	ptScriptTag->flvheader.Timestamp[1] = 0;
	ptScriptTag->flvheader.Timestamp[2] = 0;
	ptScriptTag->flvheader.TimestampExtended = 0;
	ptScriptTag->flvheader.StreamID[0] = 0;
	ptScriptTag->flvheader.StreamID[1] = 0;
	ptScriptTag->flvheader.StreamID[2] = 0;

	/* AMF1 */
	pString = "onMetaData";
	stringLen = strlen(pString);
	ptScriptTag->flvdata[curPos + 0] = AMF_DATA_TYPE_STRING;
	ptScriptTag->flvdata[curPos + 1] = (uint8_t)((stringLen >> 8) & 0xFF);
	ptScriptTag->flvdata[curPos + 2] = (uint8_t)(stringLen & 0xFF);
	memcpy(&ptScriptTag->flvdata[curPos + 3], pString, stringLen);
	curPos += (3 + stringLen);

	/* AMF2 */
	ptScriptTag->flvdata[curPos + 0] = AMF_DATA_TYPE_MIXEDARRAY;
	ptScriptTag->flvdata[curPos + 1] = (uint8_t)((amf2ArrayCnt >> 24) & 0xFF);
	ptScriptTag->flvdata[curPos + 2] = (uint8_t)((amf2ArrayCnt >> 16) & 0xFF);
	ptScriptTag->flvdata[curPos + 3] = (uint8_t)((amf2ArrayCnt >>  8) & 0xFF);
	ptScriptTag->flvdata[curPos + 4] = (uint8_t)((amf2ArrayCnt >>  0) & 0xFF);
	curPos += 5;

	#if 0 /* reference to `uint32_t amf2ArrayCnt = ...` */
	pString = "duration";
	stringLen = strlen(pString);
	ptScriptTag->flvdata[curPos + 0] = (uint8_t)((stringLen >> 8) & 0xFF);
	ptScriptTag->flvdata[curPos + 1] = (uint8_t)(stringLen & 0xFF);
	memcpy(&ptScriptTag->flvdata[curPos + 2], pString, stringLen);
	curPos += (2 + stringLen);

	un.d = 30; /* Second */
	ptScriptTag->flvdata[curPos + 0] = AMF_DATA_TYPE_NUMBER;
	ptScriptTag->flvdata[curPos + 1] = un.c[7];
	ptScriptTag->flvdata[curPos + 2] = un.c[6];
	ptScriptTag->flvdata[curPos + 3] = un.c[5];
	ptScriptTag->flvdata[curPos + 4] = un.c[4];
	ptScriptTag->flvdata[curPos + 5] = un.c[3];
	ptScriptTag->flvdata[curPos + 6] = un.c[2];
	ptScriptTag->flvdata[curPos + 7] = un.c[1];
	ptScriptTag->flvdata[curPos + 8] = un.c[0];
	curPos += (1+8);
	#endif

	/* end */
	ptScriptTag->flvdata[curPos + 0] = 0x00;
	ptScriptTag->flvdata[curPos + 1] = 0x00;
	ptScriptTag->flvdata[curPos + 2] = AMF_DATA_TYPE_OBJECT_END;
	curPos += 3;

	// now we can calculate it.
	dataSize = curPos;
	*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;
	ptScriptTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
	ptScriptTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
	ptScriptTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);

	return 0;
}


static int generateAvcSequenceHeader(uint8_t *spsBuf, uint16_t spsLen, uint8_t *ppsBuf, uint16_t ppsLen,
									uint32_t timestamp_ms, uint8_t *pOutData, uint32_t *pOutDataLen)
{
	PT_FlvTag ptVideoTag = NULL;
	uint32_t dataSize = 0;

	if(!spsBuf || !spsLen || !ppsBuf || !ppsLen || !pOutData || !pOutDataLen)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	ptVideoTag = (PT_FlvTag)pOutData;
	dataSize = (13 + spsLen) + (3 + ppsLen);
	*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;

	ptVideoTag->flvheader.TagType = FLVTAG_TYPE_VIDEO;
	ptVideoTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
	ptVideoTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
	ptVideoTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
	ptVideoTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
	ptVideoTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
	ptVideoTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
	ptVideoTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
	ptVideoTag->flvheader.StreamID[0] = 0;
	ptVideoTag->flvheader.StreamID[1] = 0;
	ptVideoTag->flvheader.StreamID[2] = 0;

	ptVideoTag->flvdata[0] = ((VIDEOTAG_FRAMETYPE_KEYFRAME << 4) |\
							  (VIDEOTAG_CODECID_AVC)); /* 0x17, keyframe, avc */
	/* next for AVCVIDEOPACKET */
	ptVideoTag->flvdata[1] = AVC_PACKET_TYPE_SEQUENCE_HEADER; /* AVCPacketType: 0, AVC sequence header */
	ptVideoTag->flvdata[2] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
	ptVideoTag->flvdata[3] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
	ptVideoTag->flvdata[4] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
								   /* next for AVCDecoderConfigurationRecord */
	ptVideoTag->flvdata[5] = 0x01; /* ConfigurationVersion: always 0x01*/
	ptVideoTag->flvdata[6] = spsBuf[1]; /* AVCProfileIndication: the first byte after the 'nalu type'(buf no include 'start code') */
	ptVideoTag->flvdata[7] = spsBuf[2]; /* profile_compatibility: the second byte after the 'nalu type'(buf no include 'start code') */
	ptVideoTag->flvdata[8] = spsBuf[3]; /* AVCLevelIndication: the third byte after the 'nalu type'(buf no include 'start code') */
	ptVideoTag->flvdata[9] = 0xFF; /* lengthSizeMinusOne: always 0xFF, bit[7:2]: '111111b'reversed */
	ptVideoTag->flvdata[10] = 0xE1; /* NumOfSequenceParmeterSets: always 0xE1, bit[7:5]: '111b'reversed  */
	ptVideoTag->flvdata[11] = (uint8_t)((spsLen >> 8) & 0xFF); /* SequenceParamterSetLength: big-endian, H  */
	ptVideoTag->flvdata[12] = (uint8_t)(spsLen & 0xFF); /* SequenceParamterSetLength: big-endian, L */
	memcpy(&ptVideoTag->flvdata[13], spsBuf, spsLen);

	ptVideoTag->flvdata[13+spsLen] = 0x01; /* NumOfPictureParmeterSets: always 0x01 */
	ptVideoTag->flvdata[13+spsLen+1] = (uint8_t)((ppsLen >> 8) & 0xFF); /* PictureParamterSetLength: big-endian, H */
	ptVideoTag->flvdata[13+spsLen+2] = (uint8_t)(ppsLen& 0xFF); /* PictureParamterSetLength: big-endian, L */
	memcpy(&ptVideoTag->flvdata[13+spsLen+3], ppsBuf, ppsLen);

	return 0;
}


static int generateAvcNALU(uint8_t *pNaluData, uint32_t naluDataLen, uint32_t isIDRNalu, uint32_t timestamp_ms, uint8_t *pOutData, uint32_t *pOutDataLen)
{
	PT_FlvTag ptVideoTag = NULL;
	uint32_t dataSize = 0;

	if(!pNaluData || !naluDataLen || !pOutData || !pOutDataLen)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	ptVideoTag = (PT_FlvTag)pOutData;
	dataSize = 9 + naluDataLen;
	*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;

	ptVideoTag->flvheader.TagType = FLVTAG_TYPE_VIDEO;
	ptVideoTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
	ptVideoTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
	ptVideoTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
	ptVideoTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
	ptVideoTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
	ptVideoTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
	ptVideoTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
	ptVideoTag->flvheader.StreamID[0] = 0;
	ptVideoTag->flvheader.StreamID[1] = 0;
	ptVideoTag->flvheader.StreamID[2] = 0;

	if(isIDRNalu)
	{
		ptVideoTag->flvdata[0] = ((VIDEOTAG_FRAMETYPE_KEYFRAME << 4) |\
									(VIDEOTAG_CODECID_AVC)); /* 0x17, keyframe, avc */
	}
	else
	{
		ptVideoTag->flvdata[0] = ((VIDEOTAG_FRAMETYPE_INTER_FRAME << 4) |\
									(VIDEOTAG_CODECID_AVC)); /* 0x27, inter frame, avc */
	}
	/* next for AVCVIDEOPACKET */
	ptVideoTag->flvdata[1] = AVC_PACKET_TYPE_NALU; /* AVCPacketType: 1, NALU */
	ptVideoTag->flvdata[2] = 0x00; /* CompositionTime: CTS = 0, because no b'frame */
	ptVideoTag->flvdata[3] = 0x00; /* CompositionTime: CTS = 0, because no b'frame */
	ptVideoTag->flvdata[4] = 0x00; /* CompositionTime: CTS = 0, because no b'frame */

	ptVideoTag->flvdata[5] = (uint8_t)((naluDataLen >> 24 & 0xFF));
	ptVideoTag->flvdata[6] = (uint8_t)((naluDataLen >> 16 & 0xFF));
	ptVideoTag->flvdata[7] = (uint8_t)((naluDataLen >> 8 & 0xFF));
	ptVideoTag->flvdata[8] = (uint8_t)(naluDataLen & 0xFF);

	memcpy(&ptVideoTag->flvdata[9], pNaluData, naluDataLen);

	return 0;
}


static int generateAvcEndOfSequence(uint32_t timestamp_ms, uint8_t *pOutData, uint32_t *pOutDataLen)
{
	PT_FlvTag ptVideoTag = NULL;
	uint32_t dataSize = 0;

	if(!pOutData || !pOutDataLen)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	ptVideoTag = (PT_FlvTag)pOutData;
	dataSize = 5; /* fixed, flvdata[0~4] */
	*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;

	ptVideoTag->flvheader.TagType = FLVTAG_TYPE_VIDEO;
	ptVideoTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
	ptVideoTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
	ptVideoTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
	ptVideoTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
	ptVideoTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
	ptVideoTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
	ptVideoTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
	ptVideoTag->flvheader.StreamID[0] = 0;
	ptVideoTag->flvheader.StreamID[1] = 0;
	ptVideoTag->flvheader.StreamID[2] = 0;

	ptVideoTag->flvdata[0] = ((VIDEOTAG_FRAMETYPE_KEYFRAME << 4) |\
							  (VIDEOTAG_CODECID_AVC)); /* 0x17, keyframe, avc */
	/* next for AVCVIDEOPACKET */
	ptVideoTag->flvdata[1] = AVC_PACKET_TYPE_END_OF_SEQUENCE; /* AVCPacketType: 2, AVC end of sequence header */
	ptVideoTag->flvdata[2] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
	ptVideoTag->flvdata[3] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */
	ptVideoTag->flvdata[4] = 0x00; /* CompositionTime: AVCPacketType != 1, so it is 0, otherwise data[2~4] is CTS */

	return 0;
}


static int generateAacSequenceHeader(uint32_t timestamp_ms, PT_AdtsHeader adtsInfo, uint8_t *pOutData, uint32_t *pOutDataLen)
{
	PT_FlvTag ptAudioTag = NULL;
	uint32_t dataSize = 0;
	soundSamplerateIndex sri;
	soundTypeIndex sti;
	aacSamplerateIndex asi;
	aacChannelIndex aci;

	if(!pOutData || !pOutDataLen)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	switch(adtsInfo->sampling_freq_index)
	{
		case SFI_44100: sri = SRI_44KHZ;asi = AAC_44_1KHz; 	break;
		case SFI_22050: sri = SRI_22KHZ;asi = AAC_22_05KHz; break;
		case SFI_11025: sri = SRI_11KHZ;asi = AAC_11_025KHz;break;
		default:
			printf("[%s:%d]: Params invaild!\n", __FUNCTION__, __LINE__);
			return -1;
	}
	switch(adtsInfo->channel_configuration)
	{
		case 1: sti = STI_MONO; aci = AAC_CHANNEL_MONO; break;
		case 2: sti = STI_STEREO; aci = AAC_CHANNEL_STEREO; break;
		default:
			printf("[%s:%d]: Params invaild!\n", __FUNCTION__, __LINE__);
			return -1;
	}

	ptAudioTag = (PT_FlvTag)pOutData;
	dataSize = 4; /* fixed, flvdata[0~3] */
	*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;

	ptAudioTag->flvheader.TagType = FLVTAG_TYPE_AUDIO;
	ptAudioTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
	ptAudioTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
	ptAudioTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
	ptAudioTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
	ptAudioTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
	ptAudioTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
	ptAudioTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
	ptAudioTag->flvheader.StreamID[0] = 0;
	ptAudioTag->flvheader.StreamID[1] = 0;
	ptAudioTag->flvheader.StreamID[2] = 0;

	ptAudioTag->flvdata[0] = (SFI_AAC << 4) |\
							 (sri << 2) |\
							 (SSI_16BIT << 1) |\
							 (sti);
	ptAudioTag->flvdata[1] = AAC_PACKET_TYPE_SEQUENCE_HEADER;
	ptAudioTag->flvdata[2] = (AAC_LC << 3) |\
							 ((asi >> 1) & 0x7);
	ptAudioTag->flvdata[3] = ((asi & 0x1) << 7) |\
							 (aci << 3);

	return 0;
}


static int generateAacRaw(uint8_t *pAtdsRawData, PT_AdtsHeader pAdtsInfo, uint32_t timestamp_ms, uint8_t *pOutData, uint32_t *pOutDataLen)
{
	PT_FlvTag ptAudioTag = NULL;
	uint32_t dataSize = 0;
	soundSamplerateIndex sri;
	soundTypeIndex sti;

	if(!pAtdsRawData || !pAdtsInfo || !pOutData || !pOutDataLen)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	switch(pAdtsInfo->sampling_freq_index)
	{
		case SFI_44100: sri = SRI_44KHZ; break;
		case SFI_22050: sri = SRI_22KHZ; break;
		case SFI_11025: sri = SRI_11KHZ; break;
		default:
			printf("[%s:%d]: Params invaild!\n", __FUNCTION__, __LINE__);
			return -1;
	}
	switch(pAdtsInfo->channel_configuration)
	{
		case 1: sti = STI_MONO;   break;
		case 2: sti = STI_STEREO; break;
		default:
			printf("[%s:%d]: Params invaild!\n", __FUNCTION__, __LINE__);
			return -1;
	}

	ptAudioTag = (PT_FlvTag)pOutData;
	dataSize = 2 + (pAdtsInfo->aac_frame_length - AAC_ADTS_HEADER_SIZE);
	*pOutDataLen = SIZE_FLV_TAG_HEADER + dataSize;

	ptAudioTag->flvheader.TagType = FLVTAG_TYPE_AUDIO;
	ptAudioTag->flvheader.DataSize[0] = (uint8_t)((dataSize >> 16 & 0xFF));
	ptAudioTag->flvheader.DataSize[1] = (uint8_t)((dataSize >> 8 & 0xFF));
	ptAudioTag->flvheader.DataSize[2] = (uint8_t)(dataSize & 0xFF);
	ptAudioTag->flvheader.Timestamp[0] = (uint8_t)((timestamp_ms >> 16) & 0xFF);
	ptAudioTag->flvheader.Timestamp[1] = (uint8_t)((timestamp_ms >> 8) & 0xFF);
	ptAudioTag->flvheader.Timestamp[2] = (uint8_t)((timestamp_ms) & 0xFF);
	ptAudioTag->flvheader.TimestampExtended = (uint8_t)((timestamp_ms >> 24) & 0xFF);
	ptAudioTag->flvheader.StreamID[0] = 0;
	ptAudioTag->flvheader.StreamID[1] = 0;
	ptAudioTag->flvheader.StreamID[2] = 0;

	ptAudioTag->flvdata[0] = (SFI_AAC << 4) |\
							 (sri << 2) |\
							 (SSI_16BIT << 1) |\
							 (sti);
	ptAudioTag->flvdata[1] = AAC_PACKET_TYPE_RAW;
	memcpy(&ptAudioTag->flvdata[2], pAtdsRawData, pAdtsInfo->aac_frame_length - AAC_ADTS_HEADER_SIZE);

	return 0;
}


int flv_mux_h264_aac(char *h264FileName, uint32_t vFps, char *aacFileName, char *flvFileName)
{
	FILE *fpH264 = NULL;
	FILE *fpAAC  = NULL;
	FILE *fpFLV  = NULL;
	uint8_t *h264Buf = NULL;
	uint8_t *aacBuf = NULL;
	uint8_t *flvBuf = NULL;
	uint32_t flvBufLen = 0;
	uint64_t timeStamp_ms = 0; // the timestamp is start from 0 in flv file.
	uint8_t spsBuf[1024] = {0};  // note!!!: it maybe happen the 'Segmentation fault', as 1024 is too long for 'sps', but it maybe save the 'sei' or other unused data if first nalu isn't 'sps'.
	uint8_t ppsBuf[64] = {0};
	uint32_t previousTagSize = 0;
	uint32_t videoFps = vFps;
	uint32_t audioFps = -1; // calcurate bihind.
	int ret = -1;
	T_NaluInfo spsNaluInfo = {};
	T_NaluInfo ppsNaluInfo = {};
	T_NaluInfo naluInfo = {};
	T_AdtsHeader adtsHeader = {};

	if(!h264FileName || !vFps || !aacFileName || !flvFileName)
	{
		printf("[%s:%d] Params invalid!\n", __FUNCTION__, __LINE__);
		return -1;
	}

	/* open file */
	fpH264 = fopen(h264FileName, "rb");
	if (!fpH264)
	{
		printf("open %s error!\n", h264FileName);
		goto exit;
	}
	fpAAC  = fopen(aacFileName,  "rb");
	if (!fpAAC)
	{
		printf("open %s error!\n", aacFileName);
		goto exit;
	}
	fpFLV  = fopen(flvFileName,  "wb");
	if (!fpFLV)
	{
		printf("open %s error!\n", flvFileName);
		goto exit;
	}


	/* alloc memory */
	h264Buf = (uint8_t *)malloc(MAX_NALU_SIZE);
	if (!h264Buf)
	{
		printf("malloc error!\n");
		goto exit;
	}
	aacBuf = (uint8_t *)malloc(MAX_ADTS_SIZE);
	if (!aacBuf)
	{
		printf("malloc error!\n");
		goto exit;
	}
	flvBuf = (uint8_t *)malloc(MAX_FLV_BUF_SIZE);
	if (!flvBuf)
	{
		printf("malloc error!\n");
		goto exit;
	}


	/* parse AAC-ADTS */
	ret = getAdtsFrame(fpAAC, aacBuf, &adtsHeader);
	if(!ret)
	{
		fseek(fpAAC, 0, SEEK_SET); // reset
		switch(adtsHeader.sampling_freq_index)
		{
			case SFI_96000: audioFps = 1000.0/(1024*1000/96000); break;
			case SFI_88200: audioFps = 1000.0/(1024*1000/88200); break;
			case SFI_64000: audioFps = 1000.0/(1024*1000/64000); break;
			case SFI_48000: audioFps = 1000.0/(1024*1000/48000); break;
			case SFI_44100: audioFps = 1000.0/(1024*1000/44100); break;
			case SFI_32000: audioFps = 1000.0/(1024*1000/32000); break;
			case SFI_24000: audioFps = 1000.0/(1024*1000/24000); break;
			case SFI_22050: audioFps = 1000.0/(1024*1000/22050); break;
			case SFI_16000: audioFps = 1000.0/(1024*1000/16000); break;
			case SFI_12000: audioFps = 1000.0/(1024*1000/12000); break;
			case SFI_11025: audioFps = 1000.0/(1024*1000/11025); break;
			case SFI_8000:  audioFps = 1000.0/(1024*1000/ 8000); break;
			case SFI_7350:  audioFps = 1000.0/(1024*1000/ 7350); break;
			default:        audioFps = -1; break;
		}
		DEBUG("AAC Info:\n"
				"\t id: %d\n"
				"\t profile: %d\n"
				"\t freq index: %d\n"
				"\t fps: %d\n"
				"\t channels: %d\n",
				adtsHeader.id, adtsHeader.profile,
				adtsHeader.sampling_freq_index, audioFps,
				adtsHeader.channel_configuration);
	}


	/* part 1/7: FLV Header */
	generateFlvHeader(1, 1, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);

	previousTagSize = 0;  // 0, because there's no tag before
	generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);


#if 1 //todo
	/* part 2/7: Script Tag */
	generateScriptTag(flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);

	previousTagSize = flvBufLen;
	generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);
#endif


	/* part 3/7: Video Tag (AVC sequence header) */
	while(1)
	{

		DEBUG("find sps now!\n");
		ret = getOneH264Nalu(fpH264, spsBuf, &spsNaluInfo);
		if (ret == -1)
		{
			printf("get h264 nalu failed!\n");
			goto exit;
		}

		DEBUG("type = 0x%02x\n", spsBuf[spsNaluInfo.startcode_len]);
		if(spsNaluInfo.nalu_type == NALU_TYPE_SPS)
		{
			DEBUG("had found sps, now find pps!\n");
			ret = getOneH264Nalu(fpH264, ppsBuf, &ppsNaluInfo);
			if (ret == -1)
			{
				printf("get h264 nalu failed!\n");
				goto exit;
			}

			DEBUG("ppsBuf[%d] = 0x%02x\n", ppsNaluInfo.startcode_len, ppsBuf[ppsNaluInfo.startcode_len]);
			if(ppsNaluInfo.nalu_type == NALU_TYPE_PPS)
			{
				DEBUG("had found pps\n");
				break;
			}
		}
		else
		{
			DEBUG("sps not found, continue!\n");
			/* find next NALU and judge */
			continue;
		}

	}
	generateAvcSequenceHeader(spsBuf + spsNaluInfo.startcode_len, spsNaluInfo.data_len - spsNaluInfo.startcode_len,
							  ppsBuf + ppsNaluInfo.startcode_len, ppsNaluInfo.data_len - ppsNaluInfo.startcode_len,
							  timeStamp_ms/* =0 */, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);

	previousTagSize = flvBufLen;
	generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);


	/* part 4/7: Audio Tag (Audio sequence header) */
	generateAacSequenceHeader(timeStamp_ms/* =0 */, &adtsHeader, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);

	previousTagSize = flvBufLen;
	generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);


#if 0
	/* just for debug !!! */
	ret = getOneH264Nalu(fpH264, h264Buf, &naluInfo);
	if (ret < 0)
	{
		printf("getOneH264Nalu error!\n");
	}
	generateAvcNALU(h264Buf + naluInfo.startcode_len, naluInfo.data_len - naluInfo.startcode_len, 1, timeStamp_ms, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);

	previousTagSize = flvBufLen;
	generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);


	ret = getAdtsFrame(fpAAC, aacBuf, &adtsHeader);
	if (ret < 0)
	{
		printf("getAdtsFrame error!\n");
	}

	generateAacRaw(aacBuf+AAC_ADTS_HEADER_SIZE, &adtsHeader, timeStamp_ms, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);

	previousTagSize = flvBufLen;
	generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);
#else
	while(1)
	{
		if (timeStamp_ms % (1000/videoFps) == 0)
		{
			/* part 5/7: Video Tag (AVC NALU) */
			do
			{
				ret = getOneH264Nalu(fpH264, h264Buf, &naluInfo);
				if (ret < 0)
				{
					if(ret == -2)
						DEBUG("h264 file end!\n");
					else
						printf(RED"getOneH264Nalu error!\n"COLOR_END);
					goto mux_end;
				}
				DEBUG(GREEN"[video] get one H.264 NALU(0x%02X) with length: %d\n"COLOR_END, h264Buf[naluInfo.startcode_len], naluInfo.data_len);

			}while((naluInfo.nalu_type != NALU_TYPE_IDR) &&\
				   (naluInfo.nalu_type != NALU_TYPE_SLICE));

			if (naluInfo.nalu_type == NALU_TYPE_IDR)
			{
				generateAvcNALU(h264Buf + naluInfo.startcode_len, naluInfo.data_len - naluInfo.startcode_len, 1, timeStamp_ms, flvBuf, &flvBufLen);
				fwrite(flvBuf, flvBufLen, 1, fpFLV);
			}
			else
			{
				generateAvcNALU(h264Buf + naluInfo.startcode_len, naluInfo.data_len - naluInfo.startcode_len, 0, timeStamp_ms, flvBuf, &flvBufLen);
				fwrite(flvBuf, flvBufLen, 1, fpFLV);
			}

			previousTagSize = flvBufLen;
			generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
			fwrite(flvBuf, flvBufLen, 1, fpFLV);
		}

		if (timeStamp_ms % (1000/audioFps) == 0)
		{
			/* part 6/7: Audio Tag (AAC raw) */
			ret = getAdtsFrame(fpAAC, aacBuf, &adtsHeader);
			if (ret < 0)
			{
				if(ret == -2)
					DEBUG("aac file end!\n");
				else
					printf(RED"getAdtsFrame error!\n"COLOR_END);
				goto mux_end;
			}
			DEBUG(RED"[audio] get one AAC-ADTS frame with length: %d\n"COLOR_END, adtsHeader.aac_frame_length);

			generateAacRaw(aacBuf+AAC_ADTS_HEADER_SIZE, &adtsHeader, timeStamp_ms, flvBuf, &flvBufLen);
			fwrite(flvBuf, flvBufLen, 1, fpFLV);

			previousTagSize = flvBufLen;
			generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
			fwrite(flvBuf, flvBufLen, 1, fpFLV);
		}
		timeStamp_ms++;
	}
#endif

mux_end:
	/* part 7/7: Video Tag (AVC end of sequence) */
	generateAvcEndOfSequence(timeStamp_ms, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);

	previousTagSize = flvBufLen;
	generatePreviousTagSize(previousTagSize, flvBuf, &flvBufLen);
	fwrite(flvBuf, flvBufLen, 1, fpFLV);

exit:
	if(flvBuf) free(flvBuf);
	if(h264Buf) free(h264Buf);
	if(aacBuf) free(aacBuf);
	if(fpH264) fclose(fpH264);
	if(fpAAC)  fclose(fpAAC);
	if(fpFLV)  {fflush(fpFLV); fclose(fpFLV);}

	return 0;
}
main.c
c 复制代码
#include <stdio.h>

#include "flv.h"



int main(int argc, char *argv[])
{
	if(argc == 1)
	{
		printf("Usage: \n"
			   "   %s avfile/test1_856x480_24fps.h264 24 avfile/test1_44100_stereo.aac out1.flv\n"
			   "   %s avfile/test2_960x544_25fps.h264 25 avfile/test2_44100_mono.aac   out2.flv\n",
			   argv[0], argv[0]);
		return -1;
	}

	flv_mux_h264_aac(argv[1], atoi(argv[2]), argv[3], argv[4]);

	printf("\e[32mSuccess!\n\e[0m");

	return 0;
}

3、demo下载地址(任选一个)

相关推荐
广东数字化转型19 小时前
JT808,JT1078 —— AAC编码 —— 部标机语音对讲Java实现
aac·h264·h265·g711a·部标机
課代表4 天前
Visual Basic.NET 的特性
多态·继承·封装·overload·vb·重载·visual basic
Sam Xiao5 天前
JT808,JT1078 —— AAC编码 —— 部标机语音对讲Java实现
aac·h264·h265·g711a·metro·部标机
DogDaoDao9 天前
OpenCV音视频编解码器详解
人工智能·opencv·音视频·视频编解码·h264·h265·音视频编解码
小狮子安度因2 个月前
AAC ADTS格式分析
网络·ffmpeg·aac
却道天凉_好个秋2 个月前
音视频学习(六十三):AVCC和HVCC
音视频·h264·h265·avcc·hvcc
一阵没来由的风3 个月前
拒绝造轮子(C#篇)ZLG CAN卡驱动封装应用
c#·can·封装·zlg·基础封装·轮子
DogDaoDao3 个月前
WebRTC音视频编码模块深度解析:从编解码器到自适应码率控制(2025技术实践)
音视频·webrtc·实时音视频·视频编解码·h264·vp9·svc编码
程序员老舅3 个月前
C++音视频开发:基础面试题
c++·ffmpeg·音视频·视频编码·h264·音视频编解码·视频解码
SY.ZHOU4 个月前
AAC编解码
音视频·aac