iOS - 音频: Core Audio - 播放

环境

iOS 18

Xcode 16.3

swift-driver version: 1.120.5 Apple Swift version 6.1 (swiftlang-6.1.0.110.21 clang-1700.0.13.3) Target: x86_64-apple-macosx15.0

Core Audio 的架构

声音数据的描述

sample : 一个声道采样的值,采样率定义了每秒从连续 信号中提取并组成离散 信号的采样个数,它用赫兹(Hz)来表示。

frame : 一个时间常量内的sample 的集合 packet: 一个或多个连续的帧

AudioStreamBasicDescription

c 复制代码
// CoreAudioBaseTypes.h
// 音频的格式信息
struct AudioStreamBasicDescription
{
    Float64             mSampleRate;  // 采样率
    AudioFormatID       mFormatID;    // 采样格式类型
    AudioFormatFlags    mFormatFlags; // 采样格式标志位
    UInt32              mBytesPerPacket;   // 每个packet的字节数
    // 每个packet的frame数,在未压缩的音频中 mFramesPerPacket == 1 
    UInt32              mFramesPerPacket;
    UInt32              mBytesPerFrame;    // 每个frame的字节数
    UInt32              mChannelsPerFrame; // 每个frame的声道数
    UInt32              mBitsPerChannel;   // 每个通道的比特数
    UInt32              mReserved;         // 保留
};
typedef struct AudioStreamBasicDescription  AudioStreamBasicDescription;
c 复制代码
CF_ENUM(AudioFormatID)
{
    kAudioFormatLinearPCM               = 'lpcm',
    kAudioFormatAC3                     = 'ac-3',
    kAudioFormat60958AC3                = 'cac3',
    kAudioFormatAppleIMA4               = 'ima4',
    kAudioFormatMPEG4AAC                = 'aac',
    kAudioFormatMPEG4CELP               = 'celp',
    kAudioFormatMPEG4HVXC               = 'hvxc',
    kAudioFormatMPEG4TwinVQ             = 'twvq',
    kAudioFormatMACE3                   = 'MAC3',
    kAudioFormatMACE6                   = 'MAC6',
    kAudioFormatULaw                    = 'ulaw',
    kAudioFormatALaw                    = 'alaw',
    kAudioFormatQDesign                 = 'QDMC',
    kAudioFormatQDesign2                = 'QDM2',
    kAudioFormatQUALCOMM                = 'Qclp',
    kAudioFormatMPEGLayer1              = '.mp1',
    kAudioFormatMPEGLayer2              = '.mp2',
    kAudioFormatMPEGLayer3              = '.mp3',
    kAudioFormatTimeCode                = 'time',
    kAudioFormatMIDIStream              = 'midi',
    kAudioFormatParameterValueStream    = 'apvs',
    kAudioFormatAppleLossless           = 'alac',
    kAudioFormatMPEG4AAC_HE             = 'aach',
    kAudioFormatMPEG4AAC_LD             = 'aacl',
    kAudioFormatMPEG4AAC_ELD            = 'aace',
    kAudioFormatMPEG4AAC_ELD_SBR        = 'aacf',
    kAudioFormatMPEG4AAC_ELD_V2         = 'aacg',
    kAudioFormatMPEG4AAC_HE_V2          = 'aacp',
    kAudioFormatMPEG4AAC_Spatial        = 'aacs',
    kAudioFormatMPEGD_USAC              = 'usac',
    kAudioFormatAMR                     = 'samr',
    kAudioFormatAMR_WB                  = 'sawb',
    kAudioFormatAudible                 = 'AUDB',
    kAudioFormatiLBC                    = 'ilbc',
    kAudioFormatDVIIntelIMA             = 0x6D730011,
    kAudioFormatMicrosoftGSM            = 0x6D730031,
    kAudioFormatAES3                    = 'aes3',
    kAudioFormatEnhancedAC3             = 'ec-3',
    kAudioFormatFLAC                    = 'flac',
    kAudioFormatOpus                    = 'opus',
    kAudioFormatAPAC                    = 'apac',
};
c 复制代码
CF_ENUM(AudioFormatFlags)
{
    kAudioFormatFlagIsFloat                     = (1U << 0),     // 0x1
    kAudioFormatFlagIsBigEndian                 = (1U << 1),     // 0x2
    kAudioFormatFlagIsSignedInteger             = (1U << 2),     // 0x4
    kAudioFormatFlagIsPacked                    = (1U << 3),     // 0x8
    kAudioFormatFlagIsAlignedHigh               = (1U << 4),     // 0x10
    kAudioFormatFlagIsNonInterleaved            = (1U << 5),     // 0x20
    kAudioFormatFlagIsNonMixable                = (1U << 6),     // 0x40
    kAudioFormatFlagsAreAllClear                = 0x80000000,
	
    kLinearPCMFormatFlagIsFloat                 = kAudioFormatFlagIsFloat,
    kLinearPCMFormatFlagIsBigEndian             = kAudioFormatFlagIsBigEndian,
    kLinearPCMFormatFlagIsSignedInteger         = kAudioFormatFlagIsSignedInteger,
    kLinearPCMFormatFlagIsPacked                = kAudioFormatFlagIsPacked,
    kLinearPCMFormatFlagIsAlignedHigh           = kAudioFormatFlagIsAlignedHigh,
    kLinearPCMFormatFlagIsNonInterleaved        = kAudioFormatFlagIsNonInterleaved,
    kLinearPCMFormatFlagIsNonMixable            = kAudioFormatFlagIsNonMixable,
    kLinearPCMFormatFlagsSampleFractionShift    = 7,
    kLinearPCMFormatFlagsSampleFractionMask     = (0x3F << kLinearPCMFormatFlagsSampleFractionShift),
    kLinearPCMFormatFlagsAreAllClear            = kAudioFormatFlagsAreAllClear,
	
    kAppleLosslessFormatFlag_16BitSourceData    = 1,
    kAppleLosslessFormatFlag_20BitSourceData    = 2,
    kAppleLosslessFormatFlag_24BitSourceData    = 3,
    kAppleLosslessFormatFlag_32BitSourceData    = 4
};

About the name: Although this data type has "stream" in its name, you use it in every instance where you need to represent an audio data format in Core Audio---including in non-streamed, standard files. You could think of it as the "audio format basic description" data type. The "stream" in the name refers to the fact that audio formats come into play whenever you need to move (that is, stream) audio data around in hardware or software.

类型中的stream只是表示播放的概念,这里定义的就是iOS音频格式的描述

获取声音的格式信息
swift 复制代码
public func audioFormat() -> AudioStreamBasicDescription? {
        var size: UInt32 = 0
        var audioFile: AudioFileID? // 使用可选类型
        var status = AudioFileOpenURL(self.path as CFURL, .readPermission, 0, &audioFile)
        guard status == noErr else {
            print("*** Error *** filePath: \(self.path) -- code: \(status)")
            return nil
        }
        
        // 获取音频数据格式
        var dataFormat = AudioStreamBasicDescription()
        size = UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
        
        status = AudioFileGetProperty(
            audioFile!,
            kAudioFilePropertyDataFormat,
            &size,
            &dataFormat
        )
        
        // 可选:检查属性获取是否成功
        guard status == noErr else {
            print("Failed to get data format: \(status)")
            AudioFileClose(audioFile!) // 关闭文件防止泄漏
            return nil
        }
        
        return dataFormat
}

我加载的是一个44.1kHz,双声道的mp3文件 778924083 = 0x2E6D7033 => ".mp3"

AudioStreamPacketDescription

c 复制代码
// CoreAudioBaseTypes.h
// 表示每个音频packet的数据
struct  AudioStreamPacketDescription {
    // For example, if the data buffer contains 5 bytes of data, with one byte per packet, then the start offset for the last packet is 4. There are 4 bytes in the buffer before the start of the last packet.
    // 偏移量
    // 比如缓冲区有5字节的数据,当每个packet包含1个字节,那最后一个包的这个值就是4
    SInt64  mStartOffset;
    UInt32  mVariableFramesInPacket; // frame的个数,如果是格式是CBR按常量编码的,这个值设置为0
    UInt32  mDataByteSize; // 字节数
};
typedef struct AudioStreamPacketDescription AudioStreamPacketDescription;

Proxy Objects,Properties, Scopes, Elements

名称 说明
Proxy Objects 音频文件,流的代码抽象
Properties 管理对象的状态或行为
Scopes 作用域:输入/输出
Elements 具体的设备,比如蓝牙,有线...

Properties的key一般是个常量值,比如 kAudioFilePropertyFileFormat或者kAudioQueueDeviceProperty_NumberChannels.

回调函数: 与Core Audio交互

Core Audio接口通过回调函数与应用交互,一般有下面几种场景

  • 音频数据传送到应用程序(例如用于录制;然后通过回调将新数据写入磁盘)
  • 从应用程序请求音频数据(例如用于播放;回调从磁盘/内存读取并提供数据)。
  • 通知应用程序音频对象已更改状态(回调会执行适当操作,比如更新界面)。

回调函数实现分两步: 注册 && 回调

相关框架

名称 说明
AVFoundation 提供AVAudioPlayer类,这是一个用于音频播放的简化的原生接口,还提供AVAudioEngine类,用于更复杂的音频处理
AudioToolbox 为Core Audio中的高级服务提供接口,包括Audio Session服务,用于在iOS管理应用程序的音频行为,允许应用程序使用音频插件,包括音频单元和编解码器
OpenAL 提供了与OpenAL配合使用的接口
CoreAudioTypes 提供Core Audio使用的数据类型以及低级服务的接口。

示例

播放音乐

使用AudioToolbox.framework

swift 复制代码
func play() {
        var format = self.audioFormat()
        guard format != nil else {
            return;
        }
        
        // 打开音频文件
        var status = AudioFileOpenURL(path as CFURL, .readPermission, 0, &audioFile)
        guard status == noErr else {
            return
        }
        
        let context = UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())
        /**
        Audio Queue(音频队列)是 iOS 或 Mac OS X 中用来 录音 或 播放音频 的软件对象。它由 AudioQueueRef 数据类型表示,在 AudioQueue.h 中声明
        音频队列的工作是:

        * 连接音频硬件
        * 管理内存
        * 根据需要,为压缩音频格式使用编解码器
        * 调解录音或回放
        */
        // 创建输出队列,注册回调函数
        status = AudioQueueNewOutput(&format!, playbackCallback, context, nil, nil, 0, &queue)
        guard status == noErr else {
            print("Create new output queue error")
            return
        }
        
        status = AudioFileGetProperty(audioFile!, kAudioFilePropertyPacketSizeUpperBound, &size, &maxPacketSize)
        guard status == noErr else {
            print("Get maxPacketSize property error")
            return
        }
        
        // 计算缓冲区大小
        // For uncompressed audio, the value is 1. For variable bit-rate formats, the value is a larger fixed number, such as 1024 for AAC. For formats with a variable number of frames per packet, such as Ogg Vorbis, set this field to 0.
        // MP3的值是1152
        if format!.mFramesPerPacket != 0 {
            // 每秒的包的数量 = 采样率/每个包的帧数
            // 比如MP3每秒采样44100次,每个包有1152帧,两者相除就是每秒的包数量
            let numPacketsPerSecond = format!.mSampleRate / Double(format!.mFramesPerPacket)
            // 每秒的包的数量 * 包大小的理论峰值, 即1s内占用内存的峰值
            outBufferSize = UInt32(numPacketsPerSecond * Double(maxPacketSize))
        } else {
            outBufferSize = max(maxBufferSize, maxPacketSize)
        }
        
        // 调整缓冲区大小
        if outBufferSize > maxBufferSize && outBufferSize > maxPacketSize {
            outBufferSize = maxBufferSize
        } else if outBufferSize < minBufferSize {
            outBufferSize = minBufferSize
        }
        
        // 计算包数量并分配包描述数组
        // 每次读取的包的数量 = 缓冲区大小 / 包大小的理论峰值
        numPacketsToRead = outBufferSize / maxPacketSize
        packetDescs = UnsafeMutablePointer<AudioStreamPacketDescription>.allocate(capacity: Int(numPacketsToRead))
        packetDescs?.initialize(repeating: AudioStreamPacketDescription(), count: Int(numPacketsToRead))
        
        // 分配3个缓冲区
        for i in 0..<maxBufferNum {
            var buffer: AudioQueueBufferRef?
            /**
             Once allocated, the pointer to the buffer and the buffer's size are fixed and cannot be
             changed. The mAudioDataByteSize field in the audio queue buffer structure,
             AudioQueueBuffer, is initially set to 0.
             
             @param      inAQ
             // 分配缓冲区的音频队列
             The audio queue you want to allocate a buffer.
             @param      inBufferByteSize
             // 缓冲区大小
             The desired size of the new buffer, in bytes. An appropriate buffer size depends on the
             processing you will perform on the data as well as on the audio data format.
             @param      outBuffer
             // 指向新创建的缓冲区
             On return, points to the newly created audio buffer. The mAudioDataByteSize field in the
             audio queue buffer structure, AudioQueueBuffer, is initially set to 0.
             @result     An OSStatus result code.
             */
            AudioQueueAllocateBuffer(queue!, outBufferSize, &buffer)
            buffers[i] = buffer
            // 预先读maxBufferNum个缓冲区
            audioQueueOutput(audioQueue: queue!, queueBuffer: buffer!)
        }
        
        // 启动队列
        AudioQueueStart(queue!, nil)
    }

在回调方法中读取音频文件并将数据入队供系统的队列播放

swift 复制代码
private let playbackCallback: AudioQueueOutputCallback = {
        inUserData,          // UnsafeMutableRawPointer?
        inAQ,                // AudioQueueRef
        buffer               // AudioQueueBufferRef
        in
        
        // 1. 保护
        guard let inUserData = inUserData else { return }
        
        // 2. 将指针转换为对象实例
        let player:AudioToolboxTest = Unmanaged<AudioToolboxTest>.fromOpaque(inUserData).takeUnretainedValue()
        
        // 3. 实际处理逻辑
        // 处理音频缓冲区
        player.audioQueueOutput(audioQueue: inAQ, queueBuffer: buffer)
    }
swift 复制代码
public func audioQueueOutput(audioQueue: AudioQueueRef, queueBuffer audioQueueBuffer: AudioQueueBufferRef) {
        var ioNumBytes = outBufferSize
        var ioNumPackets = numPacketsToRead
        
        // 读取音频文件
        let status = AudioFileReadPacketData(
            audioFile!,  // 音频文件
            false, // 是否缓存
            // on input the size of outBuffer in bytes.
            // on output, the number of bytes actually returned.
            // 输入场景等于缓存大小
            // 输出场景:比如播放根据文件的实际情况
            &ioNumBytes, // 包的长度
            packetDescs, // 音频包信息
            packetIndex, // 索引值
            &ioNumPackets, // 包的个数
            audioQueueBuffer.pointee.mAudioData // 读取到的数据
        )
        
        guard status == noErr else {
            print("Error reading packet data: \(status)")
            return
        }
        
        if ioNumPackets > 0 {
            // 设置音频缓冲区数据大小
            audioQueueBuffer.pointee.mAudioDataByteSize = ioNumBytes
            
            // 将音频数据入队列让系统去播放
            AudioQueueEnqueueBuffer(
                audioQueue,
                audioQueueBuffer,
                ioNumPackets,
                packetDescs
            )
            
            // 更新包的索引值
            self.packetIndex += Int64(ioNumPackets)
        }
    }

示例工程

参考

  1. Apple Core Audio Doc
  2. AudioFileOpenURL(: :::)
  3. iOS音频(1)------AudioToolbox
相关推荐
幸福回头3 天前
ms-swift 代码推理数据集
llm·swift
不二狗3 天前
每日算法 -【Swift 算法】Two Sum 问题:从暴力解法到最优解法的演进
开发语言·算法·swift
struggle20255 天前
适用于 iOS 的 开源Ultralytics YOLO:应用程序和 Swift 软件包,用于在您自己的 iOS 应用程序中运行 YOLO
yolo·ios·开源·app·swift
一丝晨光5 天前
数值溢出保护?数值溢出应该是多少?Swift如何让整数计算溢出不抛出异常?类型最大值和最小值?
java·javascript·c++·rust·go·c·swift
Swift社区5 天前
Swift实战:如何优雅地从二叉搜索树中挑出最接近的K个值
开发语言·ios·swift
fydw_7155 天前
大语言模型RLHF训练框架全景解析:OpenRLHF、verl、LLaMA-Factory与SWIFT深度对比
语言模型·swift·llama
文件夹__iOS6 天前
深入浅出 iOS 对象模型:isa 指针 与 Swift Metadata
ios·swift
I烟雨云渊T7 天前
iOS实名认证模块的具体实现过程(swift)
ios·cocoa·swift
Swift社区8 天前
LeetCode 270:在二叉搜索树中寻找最接近的值(Swift 实战解析)
算法·leetcode·swift
I烟雨云渊T8 天前
iOS瀑布流布局的实现(swift)
开发语言·ios·swift