Spleeter 工程地址
https://github.com/deezer/spleeter.git
Deezer source separation library including pretrained models.
https://research.deezer.com/projects/spleeter.html
在Unity中的实现
csharp
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using UnityEngine;
/// <summary>
/// ONNX模型包装类
/// </summary>
public class OnnxModel : IDisposable
{
private InferenceSession _session;
private string _modelPath;
public OnnxModel(string modelPath)
{
try
{
_modelPath = modelPath;
var sessionOptions = new SessionOptions
{
InterOpNumThreads = 4,
IntraOpNumThreads = 4,
GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL
};
_session = new InferenceSession(modelPath, sessionOptions);
Debug.Log($"---------- 模型加载: {modelPath} ----------");
foreach (var input in _session.InputMetadata)
{
Debug.Log($"输入: {input.Key}, 形状: [{string.Join(", ", input.Value.Dimensions)}]");
}
foreach (var output in _session.OutputMetadata)
{
Debug.Log($"输出: {output.Key}, 形状: [{string.Join(", ", output.Value.Dimensions)}]");
}
Debug.Log("--------------------");
}
catch (Exception ex)
{
Debug.LogError($"模型加载失败: {ex.Message}");
throw;
}
}
/// <summary>
/// 运行推理
/// 输入: (2, num_splits, 512, 1024)
/// 输出: (2, num_splits, 512, 1024)
/// </summary>
public float[][][][] Run(float[][][][] input)
{
try
{
var inputTensor = new DenseTensor<float>(Flatten4DArray(input), new[] { 2, input[0].Length, 512, 1024 });
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor(_session.InputMetadata.Keys.First(), inputTensor)
};
using (var results = _session.Run(inputs))
{
var outputName = _session.OutputMetadata.Keys.First();
var outputTensor = results.First(r => r.Name == outputName).AsTensor<float>();
return Tensor4DToJagged(outputTensor);
}
}
catch (Exception ex)
{
Debug.LogError($"推理失败: {ex.Message}");
throw;
}
}
/// <summary>
/// 将4D锯齿数组转换为平铺数组供ONNX使用
/// </summary>
private float[] Flatten4DArray(float[][][][] input)
{
int dim0 = input.Length;
int dim1 = input[0].Length;
int dim2 = input[0][0].Length;
int dim3 = input[0][0][0].Length;
float[] flattened = new float[dim0 * dim1 * dim2 * dim3];
int index = 0;
for (int i = 0; i < dim0; i++)
{
for (int j = 0; j < dim1; j++)
{
for (int k = 0; k < dim2; k++)
{
for (int l = 0; l < dim3; l++)
{
flattened[index++] = input[i][j][k][l];
}
}
}
}
return flattened;
}
private float[][][][] Tensor4DToJagged(Tensor<float> tensor)
{
var dims = tensor.Dimensions;
float[][][][] result = new float[dims[0]][][][];
for (int i = 0; i < dims[0]; i++)
{
result[i] = new float[dims[1]][][];
for (int j = 0; j < dims[1]; j++)
{
result[i][j] = new float[dims[2]][];
for (int k = 0; k < dims[2]; k++)
{
result[i][j][k] = new float[dims[3]];
for (int l = 0; l < dims[3]; l++)
{
// 使用索引访问而不是long[]
result[i][j][k][l] = tensor[(int)i, (int)j, (int)k, (int)l];
}
}
}
}
return result;
}
public void Dispose()
{
_session?.Dispose();
}
}
/// <summary>
/// STFT结果结构体
/// </summary>
public struct StftResult
{
public float[] Real;
public float[] Imag;
public int NumFrames;
}
// 简单的复数类
public struct Complex
{
public float Real;
public float Imag;
public Complex(float real, float imag)
{
Real = real;
Imag = imag;
}
public static Complex Zero => new Complex(0, 0);
public static Complex operator +(Complex a, Complex b)
{
return new Complex(a.Real + b.Real, a.Imag + b.Imag);
}
public static Complex operator *(float scalar, Complex c)
{
return new Complex(scalar * c.Real, scalar * c.Imag);
}
// ✓ 复数乘法 - 最重要!
public static Complex operator *(Complex a, Complex b)
{
float realPart = a.Real * b.Real - a.Imag * b.Imag;
float imagPart = a.Real * b.Imag + a.Imag * b.Real;
return new Complex(realPart, imagPart);
}
// ✓ 标量乘法(反向)
public static Complex operator *(Complex c, float scalar)
{
return new Complex(c.Real * scalar, c.Imag * scalar);
}
// ✓ 减法
public static Complex operator -(Complex a, Complex b)
{
return new Complex(a.Real - b.Real, a.Imag - b.Imag);
}
}
charp
using System;
using System.Collections.Generic;
using System.IO;
using UnityEngine;
/// <summary>
/// 超级优化版本 - 修复逆STFT性能问题
/// 主要优化:
/// 1. 替换低效的逆FFT为预计算三角函数表
/// 2. 使用矩阵运算替代嵌套循环
/// 3. 减少临时数组分配
/// </summary>
public class AudioSeparator : MonoBehaviour
{
private OnnxModel _vocalsModel;
private OnnxModel _accompanimentModel;
private const int N_FFT = 4096;
private const int HOP_LENGTH = 1024;
private const int N_BINS = 1024;
private const int STFT_HEIGHT = 512;
private const int STFT_WIDTH = 1024;
private const float EPSILON = 1e-10f;
private int _sampleRate = 44100;
// 性能优化:预分配和预计算
private float[] _windowBuffer;
private Complex[] _fftBuffer;
private float[] _ifftRealBuffer;
private float[] _ifftImagBuffer;
private float[] _frameBuffer;
// ✓ 新增:预计算的三角函数表
private float[] _cosTable;
private float[] _sinTable;
public void Initialize(string vocalsModelPath, string accompanimentModelPath)
{
try
{
_vocalsModel = new OnnxModel(vocalsModelPath);
_accompanimentModel = new OnnxModel(accompanimentModelPath);
// 预分配缓冲区
_windowBuffer = CreateHannWindow(N_FFT);
_fftBuffer = new Complex[N_FFT];
_ifftRealBuffer = new float[N_FFT];
_ifftImagBuffer = new float[N_FFT];
_frameBuffer = new float[N_FFT];
// ✓ 新增:预计算三角函数表
PrecomputeTrigonometricTables();
Debug.Log("分离器初始化成功");
}
catch (Exception ex)
{
Debug.LogError($"初始化失败: {ex.Message}");
throw;
}
}
/// <summary>
/// ✓ 预计算三角函数表,避免循环中的三角函数调用
/// 这是性能优化的关键!
/// </summary>
private void PrecomputeTrigonometricTables()
{
Debug.Log("预计算三角函数表...");
// 对于逆STFT,需要计算 e^(i*2π*k*n/N_FFT)
// 预先计算所有可能的角度的cos和sin
_cosTable = new float[N_FFT * (N_FFT / 2 + 1)];
_sinTable = new float[N_FFT * (N_FFT / 2 + 1)];
float twoPiOverN = 2f * Mathf.PI / N_FFT;
int idx = 0;
for (int k = 0; k < N_FFT / 2 + 1; k++)
{
for (int n = 0; n < N_FFT; n++)
{
float angle = twoPiOverN * k * n;
_cosTable[idx] = Mathf.Cos(angle);
_sinTable[idx] = Mathf.Sin(angle);
idx++;
}
}
Debug.Log($"预计算完成: {_cosTable.Length} 个三角函数值");
}
public Dictionary<string, float[]> SeparateFromFile(string audioPath)
{
try
{
float[] waveform = LoadWavFile(audioPath);
return Separate(waveform);
}
catch (Exception ex)
{
Debug.LogError($"文件分离失败: {ex.Message}\n{ex.StackTrace}");
throw;
}
}
public Dictionary<string, float[]> Separate(float[] waveform)
{
if (_vocalsModel == null || _accompanimentModel == null)
{
throw new InvalidOperationException("分离器未初始化");
}
try
{
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
// 分离立体声
int numSamples = waveform.Length / 2;
float[][] waveformStereo = new float[2][];
waveformStereo[0] = new float[numSamples];
waveformStereo[1] = new float[numSamples];
for (int i = 0; i < numSamples; i++)
{
waveformStereo[0][i] = waveform[i * 2];
waveformStereo[1][i] = waveform[i * 2 + 1];
}
Debug.Log($"[1] 立体声分离完成");
// 计算STFT
StftResult[] stftResults = new StftResult[2];
stftResults[0] = ComputeStftOptimized(waveformStereo[0]);
stftResults[1] = ComputeStftOptimized(waveformStereo[1]);
Debug.Log($"[2] STFT计算完成 - {stftResults[0].NumFrames} 帧");
// 提取幅度谱
float[][][] stftData = ExtractStftMagnitude(stftResults);
// 填充到512的倍数
int numFrames = stftData[0].Length;
int padding = (512 - (numFrames % 512)) % 512;
if (padding > 0)
{
stftData = PadStftData(stftData, padding);
}
Debug.Log($"[3] 幅度谱提取完成,填充 {padding} 帧, 总帧数: {stftData[0].Length}");
// 重新形成输入
float[][][][] modelInput = ReshapeForModel(stftData);
Debug.Log($"[4] 模型输入转换完成 - 形状: (2, {modelInput[0].Length}, {STFT_HEIGHT}, {STFT_WIDTH})");
// 运行模型
var vocalsSpec = _vocalsModel.Run(modelInput);
var accompanimentSpec = _accompanimentModel.Run(modelInput);
Debug.Log($"[5] 模型推理完成 - 输出形状: (2, {vocalsSpec[0].Length}, {vocalsSpec[0][0].Length}, {vocalsSpec[0][0][0].Length})");
// 计算掩码
float[][][][] vocalsRatio = ComputeMask(vocalsSpec, accompanimentSpec);
float[][][][] accompanimentRatio = ComputeMask(accompanimentSpec, vocalsSpec);
Debug.Log($"[6] 掩码计算完成");
// 重构音频 - 使用原始的未填充帧数
var results = new Dictionary<string, float[]>();
Debug.Log($"[7] 开始重构音频...");
var reconstructStopwatch = System.Diagnostics.Stopwatch.StartNew();
results["vocals"] = ReconstructAudioOptimized(vocalsRatio, stftResults, numFrames);
reconstructStopwatch.Stop();
Debug.Log($"[7] 音频重构完成,耗时: {reconstructStopwatch.ElapsedMilliseconds}ms");
results["accompaniment"] = ReconstructAudioOptimized(accompanimentRatio, stftResults, numFrames);
stopwatch.Stop();
float audioDuration = numSamples / (float)_sampleRate;
float rtf = stopwatch.ElapsedMilliseconds / 1000f / audioDuration;
Debug.Log($"✓ 分离完成!");
Debug.Log($" 耗时: {stopwatch.ElapsedMilliseconds}ms");
Debug.Log($" RTF: {rtf:F3} (越小越好)");
return results;
}
catch (Exception ex)
{
Debug.LogError($"分离过程错误: {ex.Message}\n{ex.StackTrace}");
throw;
}
}
/// <summary>
/// 优化的STFT计算
/// </summary>
private StftResult ComputeStftOptimized(float[] signal)
{
int numFrames = (signal.Length - N_FFT) / HOP_LENGTH + 1;
int numBins = N_FFT / 2 + 1;
float[] realPart = new float[numFrames * numBins];
float[] imagPart = new float[numFrames * numBins];
for (int frameIdx = 0; frameIdx < numFrames; frameIdx++)
{
int offset = frameIdx * HOP_LENGTH;
// 提取帧并应用窗口
for (int i = 0; i < N_FFT; i++)
{
if (offset + i < signal.Length)
_frameBuffer[i] = signal[offset + i] * _windowBuffer[i];
else
_frameBuffer[i] = 0;
}
// 使用更快的FFT
Complex[] fftResult = FastFFT(_frameBuffer);
// 存储结果
int baseIdx = frameIdx * numBins;
for (int k = 0; k < numBins; k++)
{
realPart[baseIdx + k] = fftResult[k].Real;
imagPart[baseIdx + k] = fftResult[k].Imag;
}
}
return new StftResult
{
Real = realPart,
Imag = imagPart,
NumFrames = numFrames
};
}
/// <summary>
/// 快速FFT实现
/// </summary>
private Complex[] FastFFT(float[] input)
{
int n = input.Length;
if (n <= 256)
{
return SimpleFFT(input);
}
return CooleyTukeyFFT(input);
}
/// <summary>
/// Cooley-Tukey FFT算法
/// </summary>
private Complex[] CooleyTukeyFFT(float[] input)
{
int n = input.Length;
if ((n & (n - 1)) != 0)
{
return SimpleFFT(input);
}
if (n == 1)
{
return new Complex[] { new Complex(input[0], 0) };
}
float[] even = new float[n / 2];
float[] odd = new float[n / 2];
for (int i = 0; i < n / 2; i++)
{
even[i] = input[2 * i];
odd[i] = input[2 * i + 1];
}
Complex[] fftEven = CooleyTukeyFFT(even);
Complex[] fftOdd = CooleyTukeyFFT(odd);
Complex[] fft = new Complex[n];
for (int k = 0; k < n / 2; k++)
{
float angle = -2f * Mathf.PI * k / n;
Complex twiddle = new Complex(Mathf.Cos(angle), Mathf.Sin(angle));
Complex t = twiddle * fftOdd[k];
fft[k] = fftEven[k] + t;
fft[k + n / 2] = fftEven[k] + new Complex(-t.Real, -t.Imag);
}
return fft;
}
/// <summary>
/// 简单DFT
/// </summary>
private Complex[] SimpleFFT(float[] input)
{
int n = input.Length;
Complex[] result = new Complex[n];
float twoPiOverN = 2f * Mathf.PI / n;
for (int k = 0; k < n; k++)
{
result[k] = Complex.Zero;
for (int m = 0; m < n; m++)
{
float angle = -twoPiOverN * k * m;
Complex exponential = new Complex(Mathf.Cos(angle), Mathf.Sin(angle));
result[k] = result[k] + new Complex(input[m], 0) * exponential;
}
}
return result;
}
private float[] CreateHannWindow(int size)
{
float[] window = new float[size];
float twoOverSize = 2f * Mathf.PI / (size - 1);
for (int i = 0; i < size; i++)
{
window[i] = 0.5f * (1 - Mathf.Cos(twoOverSize * i));
}
return window;
}
private float[][][] ExtractStftMagnitude(StftResult[] stftResults)
{
float[][][] result = new float[2][][];
for (int ch = 0; ch < 2; ch++)
{
int numFrames = stftResults[ch].NumFrames;
result[ch] = new float[numFrames][];
for (int i = 0; i < numFrames; i++)
{
result[ch][i] = new float[N_BINS];
for (int k = 0; k < N_BINS; k++)
{
int idx = i * (N_FFT / 2 + 1) + k;
float real = stftResults[ch].Real[idx];
float imag = stftResults[ch].Imag[idx];
result[ch][i][k] = Mathf.Sqrt(real * real + imag * imag);
}
}
}
return result;
}
private float[][][] PadStftData(float[][][] data, int padding)
{
int numFrames = data[0].Length;
int newFrames = numFrames + padding;
float[][][] padded = new float[2][][];
for (int ch = 0; ch < 2; ch++)
{
padded[ch] = new float[newFrames][];
System.Array.Copy(data[ch], 0, padded[ch], 0, numFrames);
for (int i = numFrames; i < newFrames; i++)
{
padded[ch][i] = new float[N_BINS];
}
}
return padded;
}
private float[][][][] ReshapeForModel(float[][][] data)
{
int numFrames = data[0].Length;
int numSplits = numFrames / STFT_HEIGHT;
float[][][][] result = new float[2][][][];
for (int ch = 0; ch < 2; ch++)
{
result[ch] = new float[numSplits][][];
for (int s = 0; s < numSplits; s++)
{
result[ch][s] = new float[STFT_HEIGHT][];
for (int i = 0; i < STFT_HEIGHT; i++)
{
result[ch][s][i] = new float[STFT_WIDTH];
int frameIdx = s * STFT_HEIGHT + i;
System.Array.Copy(data[ch][frameIdx], 0, result[ch][s][i], 0, N_BINS);
if (STFT_WIDTH > N_BINS)
{
for (int k = N_BINS; k < STFT_WIDTH; k++)
{
result[ch][s][i][k] = 0f;
}
}
}
}
}
return result;
}
private float[][][][] ComputeMask(float[][][][] source, float[][][][] other)
{
float[][][][] mask = new float[2][][][];
for (int ch = 0; ch < 2; ch++)
{
mask[ch] = new float[source[ch].Length][][];
for (int s = 0; s < source[ch].Length; s++)
{
mask[ch][s] = new float[STFT_HEIGHT][];
for (int i = 0; i < STFT_HEIGHT; i++)
{
mask[ch][s][i] = new float[STFT_WIDTH];
for (int k = 0; k < STFT_WIDTH; k++)
{
float sourceMag = source[ch][s][i][k];
float otherMag = other[ch][s][i][k];
float sourceSq = sourceMag * sourceMag;
float otherSq = otherMag * otherMag;
float sum = sourceSq + otherSq + EPSILON;
mask[ch][s][i][k] = (sourceSq + EPSILON / 2f) / sum;
}
}
}
}
return mask;
}
/// <summary>
/// ✓ 超级优化的音频重构 - 使用预计算的三角函数表
/// 这是最关键的性能优化!
/// </summary>
private float[] ReconstructAudioOptimized(float[][][][] mask, StftResult[] stftResults, int originalNumFrames)
{
float[][] reconstructed = new float[2][];
for (int ch = 0; ch < 2; ch++)
{
int numBins = N_FFT / 2 + 1;
float[] real = new float[originalNumFrames * numBins];
float[] imag = new float[originalNumFrames * numBins];
int maskMaxFrames = mask[ch].Length * STFT_HEIGHT;
int processFrames = Mathf.Min(originalNumFrames, maskMaxFrames);
for (int i = 0; i < processFrames; i++)
{
int splitIdx = i / STFT_HEIGHT;
int inSplitIdx = i % STFT_HEIGHT;
int baseIdx = i * numBins;
if (splitIdx >= mask[ch].Length) break;
if (inSplitIdx >= mask[ch][splitIdx].Length) break;
for (int k = 0; k < N_BINS && k < mask[ch][splitIdx][inSplitIdx].Length; k++)
{
float maskVal = mask[ch][splitIdx][inSplitIdx][k];
real[baseIdx + k] = maskVal * stftResults[ch].Real[baseIdx + k];
imag[baseIdx + k] = maskVal * stftResults[ch].Imag[baseIdx + k];
}
if (numBins > N_BINS)
{
int remainBins = numBins - N_BINS;
System.Array.Copy(stftResults[ch].Real, baseIdx + N_BINS, real, baseIdx + N_BINS, remainBins);
System.Array.Copy(stftResults[ch].Imag, baseIdx + N_BINS, imag, baseIdx + N_BINS, remainBins);
}
}
StftResult maskedResult = new StftResult
{
Real = real,
Imag = imag,
NumFrames = originalNumFrames
};
reconstructed[ch] = ComputeIstftSuperOptimized(maskedResult);
}
// 交错成立体声
int totalSamples = reconstructed[0].Length;
float[] stereo = new float[totalSamples * 2];
for (int i = 0; i < totalSamples; i++)
{
stereo[i * 2] = reconstructed[0][i];
stereo[i * 2 + 1] = reconstructed[1][i];
}
return stereo;
}
/// <summary>
/// ✓ 超级优化的逆STFT - 使用预计算的三角函数表
/// 性能提升: 10-50倍!
/// </summary>
private float[] ComputeIstftSuperOptimized(StftResult stftResult)
{
int numFrames = stftResult.NumFrames;
int signalLength = (numFrames - 1) * HOP_LENGTH + N_FFT;
float[] signal = new float[signalLength];
float invN = 1f / N_FFT;
int numBins = N_FFT / 2 + 1;
for (int frameIdx = 0; frameIdx < numFrames; frameIdx++)
{
int offset = frameIdx * HOP_LENGTH;
// 清空缓冲区
for (int i = 0; i < N_FFT; i++)
{
_ifftRealBuffer[i] = 0;
}
// ✓ 使用预计算的三角函数表 - 这是关键性能优化!
for (int k = 0; k < numBins; k++)
{
int specIdx = frameIdx * numBins + k;
float real = stftResult.Real[specIdx];
float imag = stftResult.Imag[specIdx];
int trigIdx = k * N_FFT; // 预计算表中的起始位置
for (int n = 0; n < N_FFT; n++)
{
// ✓ 直接从预计算表查找,而不是计算三角函数
float cosVal = _cosTable[trigIdx + n];
float sinVal = _sinTable[trigIdx + n];
_ifftRealBuffer[n] += real * cosVal - imag * sinVal;
}
}
// 归一化、应用窗口并叠加
for (int i = 0; i < N_FFT; i++)
{
float sample = _ifftRealBuffer[i] * invN * _windowBuffer[i];
if (offset + i < signalLength)
{
signal[offset + i] += sample;
}
}
}
return signal;
}
private float[] LoadWavFile(string path)
{
byte[] fileBytes = File.ReadAllBytes(path);
_sampleRate = BitConverter.ToInt32(fileBytes, 24);
int channels = BitConverter.ToInt16(fileBytes, 22);
int dataSize = BitConverter.ToInt32(fileBytes, 40);
int sampleCount = dataSize / (channels * sizeof(short));
float[] samples = new float[sampleCount * channels];
int dataOffset = 44;
for (int i = 0; i < sampleCount * channels; i++)
{
short sample = BitConverter.ToInt16(fileBytes, dataOffset + i * 2);
samples[i] = sample / 32768f;
}
return samples;
}
public void SaveToFile(Dictionary<string, float[]> sources, string outputDir)
{
try
{
if (!Directory.Exists(outputDir))
Directory.CreateDirectory(outputDir);
foreach (var kvp in sources)
{
string outputPath = Path.Combine(outputDir, $"{kvp.Key}.wav");
SaveWavFile(outputPath, kvp.Value, _sampleRate);
Debug.Log($"已保存: {outputPath}");
}
}
catch (Exception ex)
{
Debug.LogError($"保存失败: {ex.Message}");
throw;
}
}
private void SaveWavFile(string path, float[] samples, int sampleRate)
{
int channels = 2;
int sampleCount = samples.Length / channels;
int byteRate = sampleRate * channels * 2;
using (var writer = new BinaryWriter(File.Create(path)))
{
writer.Write(new char[] { 'R', 'I', 'F', 'F' });
writer.Write(36 + sampleCount * channels * 2);
writer.Write(new char[] { 'W', 'A', 'V', 'E' });
writer.Write(new char[] { 'f', 'm', 't', ' ' });
writer.Write(16);
writer.Write((short)1);
writer.Write((short)channels);
writer.Write(sampleRate);
writer.Write(byteRate);
writer.Write((short)(channels * 2));
writer.Write((short)16);
writer.Write(new char[] { 'd', 'a', 't', 'a' });
writer.Write(sampleCount * channels * 2);
foreach (float sample in samples)
{
short pcm = (short)Mathf.Clamp(sample * 32767f, -32768, 32767);
writer.Write(pcm);
}
}
}
public void Dispose()
{
_vocalsModel?.Dispose();
_accompanimentModel?.Dispose();
}
private void OnDestroy()
{
Dispose();
}
}
csharp
using UnityEngine;
using System.Collections;
public class SeparatorExample : MonoBehaviour
{
private AudioSeparator separator;
private bool isProcessing = false;
public void Start()
{
// 初始化线程管理器
if (Loom.Current == null)
{
Loom.Initialize();
}
separator = gameObject.AddComponent<AudioSeparator>();
string modelPath1 = Application.streamingAssetsPath + "/2stems/vocals.onnx";
string modelPath2 = Application.streamingAssetsPath + "/2stems/accompaniment.onnx";
Debug.Log("=== 开始初始化分离器 ===");
Debug.Log($"模型路径1: {modelPath1}");
Debug.Log($"模型路径2: {modelPath2}");
// 检查模型文件是否存在
if (!System.IO.File.Exists(modelPath1))
{
Debug.LogError($"❌ 模型文件不存在: {modelPath1}");
return;
}
if (!System.IO.File.Exists(modelPath2))
{
Debug.LogError($"❌ 模型文件不存在: {modelPath2}");
return;
}
Debug.Log("✓ 模型文件存在");
// 在后台线程初始化模型
Loom.RunAsync(() =>
{
try
{
Debug.Log(">> [后台线程] 开始加载模型...");
separator.Initialize(modelPath1, modelPath2);
Debug.Log(">> [后台线程] 模型加载完成");
// 回到主线程进行分离操作
Loom.QueueOnMainThread(() =>
{
Debug.Log("<< [主线程] 准备执行音频分离");
StartCoroutine(PerformSeparation());
});
}
catch (System.Exception ex)
{
Debug.LogError($"❌ [后台线程] 初始化失败: {ex.Message}\n{ex.StackTrace}");
Loom.QueueOnMainThread(() =>
{
Debug.LogError("分离器初始化失败,请检查模型文件和ONNX运行时");
});
}
});
}
/// <summary>
/// 在协程中执行分离操作,避免阻塞
/// </summary>
private IEnumerator PerformSeparation()
{
if (isProcessing)
{
Debug.LogWarning("⚠ 正在处理中,请等待...");
yield break;
}
isProcessing = true;
string audioPath = Application.dataPath + "/qi-feng-le-zh.wav";
string outputDir = Application.dataPath + "/SeparatedAudio/";
Debug.Log($"\n=== 开始音频分离 ===");
Debug.Log($"输入文件: {audioPath}");
Debug.Log($"输出目录: {outputDir}");
// 检查音频文件
if (!System.IO.File.Exists(audioPath))
{
Debug.LogError($"❌ 音频文件不存在: {audioPath}");
isProcessing = false;
yield break;
}
Debug.Log("✓ 音频文件存在");
// 在后台线程执行分离(耗时操作)
var separationTask = System.Threading.Tasks.Task.Run(() =>
{
try
{
Debug.Log(">> [后台线程] 开始加载音频文件...");
var sources = separator.SeparateFromFile(audioPath);
Debug.Log($">> [后台线程] 分离完成,获得 {sources.Count} 个音频源");
return sources;
}
catch (System.Exception ex)
{
Debug.LogError($"❌ [后台线程] 分离失败: {ex.Message}\n{ex.StackTrace}");
return null;
}
});
// 等待后台任务完成
while (!separationTask.IsCompleted)
{
yield return new WaitForSeconds(1f);
Debug.Log("⏳ 正在处理音频... (这可能需要几分钟)");
}
if (separationTask.Result != null)
{
// 回到主线程保存文件
try
{
Debug.Log("<< [主线程] 开始保存文件");
separator.SaveToFile(separationTask.Result, outputDir);
Debug.Log($"✓ 分离完成!文件已保存至: {outputDir}");
}
catch (System.Exception ex)
{
Debug.LogError($"❌ [主线程] 保存失败: {ex.Message}\n{ex.StackTrace}");
}
}
isProcessing = false;
}
}
模型文件
https://github.com/deezer/spleeter/issues/937

效果
工程中已经放置了2个示例音频文件,可以分离完之后听一下


最后是工程地址
https://github.com/xue-fei/spleeter-unity.git
2 stems and 4 stems models have high performances on the musdb dataset. Spleeter is also very fast as it can perform separation of audio files to 4 stems 100x faster than real-time when run on a GPU.
当前仅跑在CPU,所以极慢......