Unreal定制IAudioCaptureStream接口获取Microphone实时PCM数据输入
Unreal的UAudioCaptureComponent组件只能在录制一段时间以后一次性获取PCM数据,不能实时获取WAVE音频流。
借助IAudioCaptureStream接口可以获得实时PCM音频流输入。
第一步:引入AudioCaptureCore模块
Project.Build.cs中引入模块
csharp
PublicDependencyModuleNames.AddRange(new string[] {
"Core",
"CoreUObject",
"Engine",
"InputCore",
...
"AudioCaptureCore"
});
第二步:创建IAudioCaptureStream接口
通过IAudioCaptureFactory接口创建IAudioCaptureStream,IAudioCaptureFactory有不同平台的实现方法,但我没找到快速创建方法,源代码中有一段私有创建方式,但没公开。
cpp
TUniquePtr<IAudioCaptureStream> AudioCapture;
cpp
IModularFeatures::Get().LockModularFeatureList();
TArray<IAudioCaptureFactory*> AudioCaptureStreamFactories = IModularFeatures::Get().GetModularFeatureImplementations<IAudioCaptureFactory>(IAudioCaptureFactory::GetModularFeatureName());
IModularFeatures::Get().UnlockModularFeatureList();
// For now, just return the first audio capture stream implemented. We can make this configurable at a later point.
if (AudioCaptureStreamFactories.Num() > 0 && AudioCaptureStreamFactories[0] != nullptr)
{
AudioCapture = AudioCaptureStreamFactories[0]->CreateNewAudioCaptureStream();
if (!AudioCapture.IsValid())
{
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, TEXT("CreateNewAudioCaptureStream return null"));
}
}
else {
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, TEXT("no Audio Capture Stream Factories"));
}
第三步:开始录制PCM音频流
cpp
bool bRecording;
cpp
bool UFxAudioCaptureComponent::StartRecord()
{
if (AudioCapture.IsValid())
{
FAudioCaptureDeviceParams Params;
/*
* 设置声卡不支持的采样数和通道数开始音频流不会成功,这里不能修改
* Params.NumInputChannels = 1;
* Params.SampleRate = 16000;
*
* 可以修改PCM数据格式,默认是32位浮点数FLOATING_POINT_32
* 我这里修改为32位整数PCM_32
*/
Params.PCMAudioEncoding = EPCMAudioEncoding::PCM_32;
// 使用 TFunction 包装成员函数
FOnAudioCaptureFunction OnCapture = [this](const void* AudioData, int32 NumFrames, int32 NumChannels, int32 SampleRate, double StreamTime, bool bOverFlow)
{
this->OnAudioCapture(AudioData, NumFrames, NumChannels, SampleRate, StreamTime, bOverFlow);
};
bool r = AudioCapture->OpenAudioCaptureStream(Params, MoveTemp(OnCapture), 1600);
if (r) {
r = AudioCapture->StartStream();
if (!r) {
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, TEXT("StartStream return false"));
}
}
else {
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, TEXT("OpenAudioCaptureStream return false"));
}
bRecording = r;
}
return bRecording;
}
void UFxAudioCaptureComponent::OnAudioCapture(const void* InAudio, int32 NumFrames, int32 NumChannels, int32 SampleRate, double StreamTime, bool bOverFlow)
{
// 这里可以传过来音频数据实际通道数和采样率,音频数据具体格式(位数)需要自己在打开音频流时记录
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, FString::Printf(TEXT("OnAudioCapture - %d,%d,%d,%f"), NumFrames, NumChannels, SampleRate, (float)StreamTime));
}
全部代码
以下代码是用作捕获默认MIC输入,并转为16KHz Mono 16Bit PCM数据,用于语音转文本。
cpp
#pragma once
#include "CoreMinimal.h"
#include "Components/ActorComponent.h"
#include "AudioCaptureDeviceInterface.h"
#include "FxAudioCaptureComponent.generated.h"
using namespace Audio;
UCLASS(ClassGroup = (Custom), meta = (BlueprintSpawnableComponent))
class UFxAudioCaptureComponent : public UActorComponent
{
GENERATED_BODY()
public:
UFxAudioCaptureComponent();
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "XML")
int ID;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "XML")
float Intensity;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "XML")
int DataNum;
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "XML")
TArray<uint8> AudioData;
protected:
virtual void BeginPlay() override;
virtual void EndPlay(const EEndPlayReason::Type EndPlayReason);
TArray<uint8> ResampleAndConvert16KHzMono16Bit(const uint8_t* inputData, int dataNum, int inputChannels, int inputBitsPerSample, int inputSampleRate);
public:
UFUNCTION(BlueprintCallable, Category = "FxAudioCapture")
bool StartRecord(float seconds = 10.0f);
UFUNCTION(BlueprintCallable, Category = "FxAudioCapture")
void StopRecord();
UFUNCTION(BlueprintCallable, Category = "FxAudioCapture")
int RemoveRecordData(int Length);
UFUNCTION(BlueprintPure, Category = "FxAudioCapture")
bool IsRecording();
private:
bool bRecording;
float RecordSeconds;
TUniquePtr<IAudioCaptureStream> AudioCapture;
void OnAudioCapture(const void* InAudio, int32 NumFrames, int32 NumChannels, int32 SampleRate, double StreamTime, bool bOverFlow);
};
cpp
#include "FxAudioCaptureComponent.h"
#include "AudioCaptureDeviceInterface.h"
#include "AudioCaptureCore.h"
#include "AudioMixer.h"
UFxAudioCaptureComponent::UFxAudioCaptureComponent()
: bRecording(false)
{
PrimaryComponentTick.bCanEverTick = true;
}
void UFxAudioCaptureComponent::BeginPlay()
{
Super::BeginPlay();
IModularFeatures::Get().LockModularFeatureList();
TArray<IAudioCaptureFactory*> AudioCaptureStreamFactories = IModularFeatures::Get().GetModularFeatureImplementations<IAudioCaptureFactory>(IAudioCaptureFactory::GetModularFeatureName());
IModularFeatures::Get().UnlockModularFeatureList();
// For now, just return the first audio capture stream implemented. We can make this configurable at a later point.
if (AudioCaptureStreamFactories.Num() > 0 && AudioCaptureStreamFactories[0] != nullptr)
{
AudioCapture = AudioCaptureStreamFactories[0]->CreateNewAudioCaptureStream();
if (!AudioCapture.IsValid())
{
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, TEXT("CreateNewAudioCaptureStream return null"));
}
}
else {
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, TEXT("no Audio Capture Stream Factories"));
}
}
void UFxAudioCaptureComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
{
Super::EndPlay(EndPlayReason);
StopRecord();
}
bool UFxAudioCaptureComponent::StartRecord(float seconds)
{
StopRecord();
RecordSeconds = seconds;
if (AudioCapture.IsValid())
{
FAudioCaptureDeviceParams Params;
/*
* 设置声卡不支持的采样数和通道数开始音频流不会成功,这里不能修改
* Params.NumInputChannels = 1;
* Params.SampleRate = 16000;
*
* 可以修改PCM数据格式,默认是32位浮点数FLOATING_POINT_32
* 我这里修改为32位整数PCM_32
*/
Params.PCMAudioEncoding = EPCMAudioEncoding::PCM_32;
// 使用 TFunction 包装成员函数
FOnAudioCaptureFunction OnCapture = [this](const void* AudioData, int32 NumFrames, int32 NumChannels, int32 SampleRate, double StreamTime, bool bOverFlow)
{
this->OnAudioCapture(AudioData, NumFrames, NumChannels, SampleRate, StreamTime, bOverFlow);
};
bool r = AudioCapture->OpenAudioCaptureStream(Params, MoveTemp(OnCapture), 1600);
if (r) {
r = AudioCapture->StartStream();
if (!r) {
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, TEXT("StartStream return false"));
}
}
else {
GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, TEXT("OpenAudioCaptureStream return false"));
}
bRecording = r;
}
return IsRecording();
}
void UFxAudioCaptureComponent::StopRecord()
{
if (bRecording && AudioCapture.IsValid())
{
AudioCapture->StopStream();
AudioCapture->CloseStream();
}
bRecording = false;
}
void UFxAudioCaptureComponent::OnAudioCapture(const void* InAudio, int32 NumFrames, int32 NumChannels, int32 SampleRate, double StreamTime, bool bOverFlow)
{
// GEngine->AddOnScreenDebugMessage(-1, 5.f, FColor::Red, FString::Printf(TEXT("OnAudioCapture - %d,%d,%d,%f"), NumFrames, NumChannels, SampleRate, (float)StreamTime));
const uint8* data = static_cast<const uint8*>(InAudio);
int32 bitsPerSample = 32; // PCM样本位数
int32 sampleSizeInBytes = bitsPerSample / 8; // 每个样本占2字节
DataNum = NumFrames * NumChannels * sampleSizeInBytes;
if (16 == bitsPerSample) {
const int16* p = static_cast<const int16*>(InAudio);
float val = 0;
for (int i = 0; i < NumFrames; ++i) {
int id = i * NumChannels;
val += FMath::Abs((float)(p[id]) / 32768.0f);
}
Intensity = val / NumFrames;
}
else if (32 == bitsPerSample) {
const int32* p = static_cast<const int32*>(InAudio);
float val = 0;
for (int i = 0; i < NumFrames; ++i) {
int id = i * NumChannels;
val += FMath::Abs((float)(p[id]) / 2147483647.0f);
}
Intensity = val / NumFrames;
}
// 按照16Khz和Mono重采样数据
TArray<uint8> Array = ResampleAndConvert16KHzMono16Bit(data, DataNum, NumChannels, bitsPerSample, SampleRate);
// 拷贝数据
AudioData.Append(Array.GetData(), Array.Num());
// 计算录制缓冲区大小,自动停止录制
int maxBuffSize = sampleSizeInBytes * NumChannels * SampleRate * RecordSeconds;
if (AudioData.Num() >= maxBuffSize) {
StopRecord();
}
}
int UFxAudioCaptureComponent::RemoveRecordData(int Length)
{
if (Length < AudioData.Num()) {
Length = AudioData.Num();
}
if (Length > 0) {
AudioData.RemoveAt(0, Length);
}
return Length;
}
bool UFxAudioCaptureComponent::IsRecording() {
if (!AudioCapture.IsValid()) {
return false;
}
if (!bRecording) {
return false;
}
return true;
}
TArray<uint8> UFxAudioCaptureComponent::ResampleAndConvert16KHzMono16Bit(const uint8_t* inputData, int dataNum, int inputChannels, int inputBitsPerSample, int inputSampleRate) {
int targetSampleRate = 16000;
int bytesPerSample = inputBitsPerSample / 8; // 每个样本的字节数
int numSamples = dataNum / (bytesPerSample * inputChannels);
// 计算重采样的步长
double resampleRate = static_cast<double>(inputSampleRate) / targetSampleRate;
// 临时存储单声道数据
std::vector<int32_t> monoSamples;
for (int i = 0; i < numSamples; ++i) {
int32_t sampleValue = 0;
// 如果是多声道,转换为单声道
if (inputChannels > 1) {
int64 monoValue = 0;
for (int ch = 0; ch < inputChannels; ++ch) {
int32_t channelValue = 0;
for (int j = 0; j < bytesPerSample; ++j) {
channelValue |= (inputData[i * bytesPerSample * inputChannels + ch * bytesPerSample + j] << (j * 8));
}
monoValue += channelValue;
}
// 取平均值以避免溢出
sampleValue = monoValue / inputChannels;
}
else {
// 处理多字节样本
for (int j = 0; j < bytesPerSample; ++j) {
sampleValue |= (inputData[i * bytesPerSample * inputChannels + j] << (j * 8));
}
}
monoSamples.push_back(sampleValue);
}
// 重采样
std::vector<int32_t> resampledSamples;
int targetNumSamples = static_cast<int>(numSamples / resampleRate);
for (int i = 0; i < targetNumSamples; ++i) {
double srcIndex = i * resampleRate;
int srcIndexInt = static_cast<int>(srcIndex);
double frac = srcIndex - srcIndexInt;
// 线性插值
int32_t sample1 = monoSamples[srcIndexInt];
int32_t sample2 = monoSamples[std::min(srcIndexInt + 1, numSamples - 1)];
int32_t resampledValue = static_cast<int32_t>((1.0 - frac) * sample1 + frac * sample2);
resampledSamples.push_back(resampledValue);
}
TArray<uint8> Array;
if (32 == inputBitsPerSample) {
for (int32_t sample : resampledSamples) {
// 将32位样本转换为16位样本
int16_t sample16Bit = static_cast<int16_t>(sample >> 16);
// 将16位样本存储到Array中
Array.Push(static_cast<uint8_t>(sample16Bit & 0xFF));
Array.Push(static_cast<uint8_t>((sample16Bit >> 8) & 0xFF));
}
}
else if (16 == inputBitsPerSample) {
for (int32_t sample : resampledSamples) {
// 将32位样本转换为16位样本
int16_t sample16Bit = static_cast<int16_t>(sample);
// 将16位样本存储到Array中
Array.Push(static_cast<uint8_t>(sample & 0xFF));
Array.Push(static_cast<uint8_t>((sample >> 8) & 0xFF));
}
}
return Array;
}
蓝图代码
TArray AudioData;存储的是16KHzMono16Bit音频数据,每次处理一段PCM数据后,调用RemoveRecordData(int Length)移除已处理数据,这样可以不断循环,实时处理音频流。