本文基于 MediaCodec + AudioRecord 实现一套可直接上线 的 Android 底层音频编码框架,适用于一对一通话、会议、直播、教育硬件对讲 等场景。 核心能力:PCM 采集 → 回声消除 / 降噪 / 自动增益 → AAC 硬编码 → 输出带 ADTS 头的 AAC 裸流。
一、整体架构说明
这套 BaseAudioEncoder 是音频编码基类,采用标准音视频采集编码流程:
- AudioRecord 采集麦克风 PCM 数据
- 系统音效预处理(AEC 回声消除 + NS 噪声抑制 + AGC 自动增益)
- MediaCodec 硬编码为 AAC
- 添加 ADTS 头(必须!否则播放器无法解码)
- 回调输出 AAC 帧,供推流 / 传输 / 播放
二、核心成员变量解释
java
// 单线程串行执行编码(避免多线程乱序)
private var mAudioExecutor = Executors.newSingleThreadExecutor()
// 系统三大音效(通话必备)
private var mAcousticEchoCanceler: AcousticEchoCanceler? = null // 回声消除
private var mNoiseSuppressor: NoiseSuppressor? = null // 噪声抑制
private var mAutomaticGainControl: AutomaticGainControl? = null // 自动增益
// 采集与编码参数
private var mMicSampleRateInHz = -1
private val mAudioEncodeParam = AudioEncodeParam()
private var bufferSizeInBytes = 0
private var mAudioRecord: AudioRecord? = null // 麦克风采集
private var mAudioCodec: MediaCodec? = null // AAC硬编码器
// 状态控制
@Volatile private var stopEncode = false // 线程安全停止标记
open val mAudioSampleRate = 16000 // 通话标准采样率
private val mAudioBitRate = 48000 // AAC标准码率
private var isMuted = false // 静音开关
三、启动流程:startAudio ()
scss
override fun startAudio() {
initAudioEncodeParam() // 初始化AAC参数
initAudio() // 初始化AudioRecord + 音效 + MediaCodec
startRecord() // 开启采集+编码循环
}
1)初始化麦克风参数(标准通话配置)
kotlin
open fun initMicParam(): MicParam {
return MicParam().apply {
audioFormat = ENCODING_PCM_16BIT // 安卓通用PCM格式
channelConfig = CHANNEL_IN_MONO // 通话必用单声道
sampleRateInHz = 16000 // 语音标准采样率
audioSource = MediaRecorder.AudioSource.MIC
}
}
2)初始化 AudioRecord + 三大音效
- AEC 回声消除:消除对方声音从喇叭传回麦克风的回声
- NS 噪声抑制:降低环境底噪、电流声、风声
- AGC 自动增益:声音小自动放大、声音大不爆音
scss
mAudioRecord = AudioRecord(
micParam.audioSource,
micParam.sampleRateInHz,
micParam.channelConfig,
micParam.audioFormat,
bufferSizeInBytes
).apply {
initAEC(audioSessionId) // 回声消除
initNoiseSuppressor(audioSessionId) // 降噪
initAGC(audioSessionId) // 自动音量
}
3)初始化 MediaCodec AAC 编码器
ini
mAudioCodec = MediaCodec.createEncoderByType(MediaFormat.MIMETYPE_AUDIO_AAC)
配置标准格式:
-
采样率:16000
-
声道:1(单声道)
-
码率:48kbps
-
AAC Profile:LC(标准低复杂度,全平台兼容)
scssval mAudioFormat = MediaFormat.createAudioFormat( MIMETYPE_AUDIO_AAC, sampleRateInHz, channelCount ).apply { setInteger(KEY_BIT_RATE, 48000) setInteger(KEY_AAC_PROFILE, AACObjectLC) }
四、核心编码循环:startRecord ()
这是真正干活的死循环,运行在单线程线程池。
步骤 1:读取 PCM 数据
ini
val readSize = audioRecord.read(inputBuffer, bufferSizeInBytes)
步骤 2:静音处理(填 0)
scss
if (isMuted && readSize > 0) {
for (i in 0 until readSize) inputBuffer.put(i, 0.toByte())
}
步骤 3:送入编码器
scss
mAudioCodec.queueInputBuffer(...)
步骤 4:取出编码后的 AAC 数据
ini
val audioOutputBufferIndex = mAudioCodec.dequeueOutputBuffer(audioInfo, 0)
五、最关键:ADTS 头添加
AAC 裸流无法播放,必须加 7 字节 ADTS 头!
scss
private fun addADTStoPacket(packet: ByteArray, packetLen: Int) {
val profile = 2 // AAC LC
val chanCfg = 1 // 单声道
val freqIdx = samplingFrequencyIndexMap[mMicSampleRateInHz]!!
packet[0] = 0xFF.toByte()
packet[1] = 0xF9.toByte()
packet[2] = ((profile-1 shl 6) + (freqIdx shl 2) + (chanCfg shr 2)).toByte()
packet[3] = ((chanCfg and 3 shl 6) + (packetLen shr 11)).toByte()
packet[4] = (packetLen and 0x7FF shr 3).toByte()
packet[5] = ((packetLen and 7 shl 5) + 0x1F).toByte()
packet[6] = 0xFC.toByte()
}
作用:告诉解码器这一帧的采样率、声道、长度,没有它就会:
- 无声
- 杂音
- 爆破音
- 播放器无法识别
六、停止与释放(非常重要,避免崩溃 / 占用)
scss
private fun release() {
stopEncode = true
mAudioExecutor.shutdown()
mAudioRecord?.stop()
mAudioRecord?.release()
mAudioCodec?.stop()
mAudioCodec?.release()
// 关闭音效
mAcousticEchoCanceler?.release()
mAutomaticGainControl?.release()
}
七、亮点
- 单线程编码:避免 PCM 数据乱序、爆音、卡顿
- 标准 16k / 单声道 / 48kbps:通话行业标准
- AEC+NS+AGC 全套预处理:通话体验接近微信 / 钉钉
- ADTS 头自动添加:输出可直接播放、推流、传输
- 支持实时静音:静音填 0,不中断编码
- 完全基于系统 API:无第三方库,兼容所有 Android 设备
- 可继承扩展:基类封装,子类可重写采样率、码率、声道
kotlin
open class BaseAudioEncoder : IAudioEncoder {
private var mAudioExecutor = Executors.newSingleThreadExecutor()
private var mAcousticEchoCanceler: AcousticEchoCanceler? = null //回声消除器
private var mNoiseSuppressor: NoiseSuppressor? = null //回声消除器 噪声抑制
private var mAutomaticGainControl: AutomaticGainControl? = null //自动增益控制
private var mMicSampleRateInHz = -1 //麦克风采样率
private val mAudioEncodeParam = AudioEncodeParam() //音频编码参数
private var bufferSizeInBytes = 0
private var mAudioRecord: AudioRecord? = null
private var mAudioCodec: MediaCodec? = null //
@Volatile
private var stopEncode = false
open val mAudioSampleRate = 16000
private val mAudioBitRate = 48000
private var isMuted = false
private var seq = 0L
private var mEncodeListener: OnEncodeListener? = null
override fun startAudio() {
initAudioEncodeParam()
initAudio()
startRecord()
}
override fun stopAudio() {
release()
}
/**
* 初始化 mic麦克风参数
*/
open fun initMicParam(): MicParam {
val mMicParam = MicParam().apply {
audioFormat = android.media.AudioFormat.ENCODING_PCM_16BIT
channelConfig = android.media.AudioFormat.CHANNEL_IN_MONO
sampleRateInHz = mAudioSampleRate
audioSource = MediaRecorder.AudioSource.MIC
}
LogUtil.d(TAG, "initMicParam 初始化mic麦克风参数 mMicParam =$mMicParam")
return mMicParam
}
/**
* 初始化 音频编码参数
*/
private fun initAudioEncodeParam() {
mAudioEncodeParam.apply {
audioMimeType = MediaFormat.MIMETYPE_AUDIO_AAC
bitRate = mAudioBitRate
}
LogUtil.d(TAG, "initAudioEncodeParam 初始化音频编码参数 mAudioEncodeParam =$mAudioEncodeParam")
}
/**
* 初始化 音频编码器
*/
private fun initAudio(micParam: MicParam = initMicParam()) {
mMicSampleRateInHz = micParam.sampleRateInHz
bufferSizeInBytes = 2 * AudioRecord.getMinBufferSize(micParam.sampleRateInHz, micParam.channelConfig, micParam.audioFormat)
LogUtil.d(TAG, "initAudioRecord bufferSizeInBytes =$bufferSizeInBytes")
mAudioRecord =
AudioRecord(micParam.audioSource, micParam.sampleRateInHz, micParam.channelConfig, micParam.audioFormat, bufferSizeInBytes).apply {
initAEC(audioSessionId)
initNoiseSuppressor(audioSessionId)
initAGC(audioSessionId)
}
try {
LogUtil.d(TAG, msg = "initAudio 创建编码器${mAudioEncodeParam.audioMimeType}音频编码器,解码类型详见 MediaFormat")
mAudioCodec = MediaCodec.createEncoderByType(mAudioEncodeParam.audioMimeType)
mAudioCodec?.configure(
initAudioFormat(mAudioEncodeParam.audioMimeType, 1, mMicSampleRateInHz),
null,
null,
MediaCodec.CONFIGURE_FLAG_ENCODE
)
} catch (e: IOException) {
e.printStackTrace()
mAudioRecord = null
mAudioCodec = null
}
}
/**
* 初始化 音频格式
* @param audioMimeType 音频解码器类型 默认 MIMETYPE_AUDIO_AAC模式
* @param channelCount 1:CHANNEL_OUT_STEREO(立体声) 2:CHANNEL_OUT_MONO(单声道)
* @param sampleRateInHz 麦克风采样率
*/
private fun initAudioFormat(audioMimeType: String, channelCount: Int, sampleRateInHz: Int): MediaFormat? {
val mAudioFormat = MediaFormat.createAudioFormat(audioMimeType, sampleRateInHz, channelCount).apply {
setInteger(MediaFormat.KEY_BIT_RATE, mAudioEncodeParam.bitRate)
setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC)
setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, mAudioEncodeParam.maxInputSize)
}
LogUtil.d(TAG, "initAudioFormat 初始化音频格式 mAudioFormat =$mAudioFormat")
return mAudioFormat
}
/**
* 回声消除器 麦克风
* 作用:消除或减少由扬声器音频回馈到麦克风的回声(语音通话、视频通话中的回声消除)
* @param audioSessionId
*/
private fun initAEC(audioSessionId: Int) {
if (AcousticEchoCanceler.isAvailable()) {
if (mAcousticEchoCanceler == null) {
mAcousticEchoCanceler = AcousticEchoCanceler.create(audioSessionId)
}
mAcousticEchoCanceler?.enabled = true
LogUtil.d(TAG, "initAEC 开启回音消除")
} else {
LogUtil.d(TAG, "initAEC 当前设备不支持回音消除")
}
}
/**
* 开启噪声抑制
* 作用:有些设备的回声是由于环境噪音导致的,可以使用 NoiseSuppressor 进行降噪
* @param audioSessionId
*/
private fun initNoiseSuppressor(audioSessionId: Int) {
if (NoiseSuppressor.isAvailable()) {
if (mNoiseSuppressor == null) {
mNoiseSuppressor = NoiseSuppressor.create(audioSessionId)
}
mNoiseSuppressor?.enabled = true
LogUtil.d(TAG, "initNoiseSuppressor 开启噪声抑制")
} else {
LogUtil.d(TAG, "initNoiseSuppressor 当前设备不支持噪声抑制")
}
}
/**
* 自动增益控制
* 作用:自动调整音量,保持音频信号的音量一致(语音通话、录音、视频通话中的音量自动调整。)
* @param audioSessionId
*/
private fun initAGC(audioSessionId: Int) {
if (AutomaticGainControl.isAvailable()) {
if (mAutomaticGainControl == null) {
mAutomaticGainControl = AutomaticGainControl.create(audioSessionId)
}
mAutomaticGainControl?.enabled = true
LogUtil.d(TAG, "initAGC 开启自动增益控制")
} else {
LogUtil.d(TAG, "initAGC 当前设备不支持自动增益控制")
}
}
open fun setMuted(muted: Boolean) {
isMuted = muted
}
fun setOnEncodeListener(listener: OnEncodeListener?) {
mEncodeListener = listener
}
/**
* 开始录制 音频
*/
override fun startRecord() {
mAudioExecutor.submit {
mAudioCodec?.let {
try {
stopEncode = false
mAudioRecord?.startRecording()
it.start()
val audioInfo = MediaCodec.BufferInfo()
while (!stopEncode) {
// 将 AudioRecord 获取的 PCM 原始数据送入编码器
val audioInputBufferId = it.dequeueInputBuffer(0)
if (audioInputBufferId >= 0) {
var inputBuffer = it.getInputBuffer(audioInputBufferId)
var readSize = -1
if (inputBuffer != null) {
mAudioRecord?.let { audioRecord ->
readSize = audioRecord.read(inputBuffer, bufferSizeInBytes)
}
if (isMuted && readSize > 0) {
for (i in 0 until readSize) {
inputBuffer.put(i, 0.toByte())
}
}
}
if (readSize >= 0) {
it.queueInputBuffer(audioInputBufferId, 0, readSize, System.nanoTime() / 1000, 0)
}
}
var audioOutputBufferIndex = it.dequeueOutputBuffer(audioInfo, 0)
LogUtil.i(TAG, "startRecord audioOutputBufferIndex===$audioOutputBufferIndex")
while (audioOutputBufferIndex >= 0) {
var outputBuffer = it.getOutputBuffer(audioOutputBufferIndex)
if (audioInfo.size > 2) {
outputBuffer?.position(audioInfo.offset)
outputBuffer?.limit(audioInfo.offset + audioInfo.size)
addADTStoPacket(outputBuffer)
}
it.releaseOutputBuffer(audioOutputBufferIndex, false)
audioOutputBufferIndex = it.dequeueOutputBuffer(audioInfo, 0)
}
}
} catch (e: Exception) {
e.printStackTrace()
}
}
}
}
private fun addADTStoPacket(outputBuffer: ByteBuffer?) {
val bytes = ByteArray(outputBuffer!!.remaining())
outputBuffer[bytes, 0, bytes.size]
val dataBytes = ByteArray(bytes.size + 7)
System.arraycopy(bytes, 0, dataBytes, 7, bytes.size)
addADTStoPacket(dataBytes, dataBytes.size)
if (stopEncode) {
return
}
LogUtil.i(TAG, "startRecord audioEncoder dataBytes===$dataBytes")
if (mEncodeListener != null) {
mEncodeListener?.onAudioEncoded(dataBytes, System.currentTimeMillis(), seq)
seq++
} else {
LogUtil.d(TAG, "Encode listener is null, please set encode listener.")
}
}
private fun addADTStoPacket(packet: ByteArray, packetLen: Int) {
// AAC LC
val profile = 2
// CPE
val chanCfg = 1
val freqIdx = samplingFrequencyIndexMap[mMicSampleRateInHz]!!
packet[0] = 0xFF.toByte()
packet[1] = 0xF9.toByte()
packet[2] = ((profile - 1 shl 6) + (freqIdx shl 2) + (chanCfg shr 2)).toByte()
packet[3] = ((chanCfg and 3 shl 6) + (packetLen shr 11)).toByte()
packet[4] = (packetLen and 0x7FF shr 3).toByte()
packet[5] = ((packetLen and 7 shl 5) + 0x1F).toByte()
packet[6] = 0xFC.toByte()
}
override fun stopRecord() {
stopEncode = true
}
private fun release() {
LogUtil.d(TAG, "release...")
tryCatch<BaseAudioEncoder> {
stopEncode = true
mAudioExecutor?.let {
it.shutdown()
mAudioExecutor = null
}
mAudioRecord?.let {
it.stop()
it.release()
mAudioRecord = null
}
mAudioCodec?.let {
it.stop()
it.release()
mAudioCodec = null
}
mAcousticEchoCanceler?.let {
it.enabled = false
it.release()
mAcousticEchoCanceler = null
}
mAutomaticGainControl?.let {
it.enabled = false
it.release()
mAutomaticGainControl = null
}
}
}
companion object {
private const val TAG = "BaseAudioEncoder"
/**
* 采样频率对照表
*/
val samplingFrequencyIndexMap: MutableMap<Int, Int> = HashMap<Int, Int>().apply {
this[96000] = 0
this[96000] = 0
this[88200] = 1
this[64000] = 2
this[48000] = 3
this[44100] = 4
this[32000] = 5
this[24000] = 6
this[22050] = 7
this[16000] = 8
this[12000] = 9
this[11025] = 10
this[8000] = 11
}
}
fun getAudioSessionId(): Int = mAudioRecord?.audioSessionId ?: AudioManager.AUDIO_SESSION_ID_GENERATE
}