MATLAB 多窗谱谱减法语音去噪

多窗谱谱减法 (Multi-taper Spectral Subtraction) 语音去噪实现，这是一种比传统谱减法更鲁棒的语音增强方法。

MATLAB 实现

2 主程序：多窗谱谱减法语音去噪

matlab 复制代码

%% 多窗谱谱减法语音去噪
% 方法：Multi-taper Spectral Subtraction
% 特点：减少音乐噪声，提高语音质量
% 适用：非平稳噪声环境下的语音增强

clear; clc; close all;
warning off;

%% 参数设置
frame_len = 256;        % 帧长 (16kHz采样率下约16ms)
frame_shift = 128;      % 帧移 (50%重叠)
fs = 16000;             % 采样率
n_tapers = 4;           % 多窗谱使用的Slepian窗数量
alpha = 2.5;            % 过减因子 (通常2-4)
beta = 0.02;            % 谱下限参数 (防止音乐噪声)
gamma = 0.8;            % 平滑因子
noise_frames = 10;      % 用于噪声估计的初始静音帧数

%% 读取语音文件
[noisy_speech, fs] = audioread('noisy_speech.wav');
if size(noisy_speech, 2) > 1
    noisy_speech = mean(noisy_speech, 2); % 转为单声道
end

% 如果有纯净语音用于评估
try
    [clean_speech, ~] = audioread('clean_speech.wav');
    has_clean = true;
catch
    has_clean = false;
end

fprintf('=== 多窗谱谱减法语音去噪 ===\n');
fprintf('采样率: %d Hz\n', fs);
fprintf('帧长: %d 样本 (%.1f ms)\n', frame_len, frame_len/fs*1000);
fprintf('多窗谱窗数: %d\n', n_tapers);
fprintf('过减因子 α: %.1f\n', alpha);
fprintf('谱下限 β: %.3f\n', beta);

%% 多窗谱谱减法处理
enhanced_speech = multi_taper_spectral_subtraction(...
    noisy_speech, fs, frame_len, frame_shift, ...
    n_tapers, alpha, beta, gamma, noise_frames);

%% 评估指标计算
if has_clean
    % 计算SNR改善
    snr_before = 10*log10(sum(clean_speech.^2)/sum((noisy_speech-clean_speech).^2));
    snr_after = 10*log10(sum(clean_speech.^2)/sum((enhanced_speech-clean_speech).^2));
    
    fprintf('\n=== 客观评估 ===\n');
    fprintf('输入SNR: %.2f dB\n', snr_before);
    fprintf('输出SNR: %.2f dB\n', snr_after);
    fprintf('SNR改善: %.2f dB\n', snr_after - snr_before);
    
    % 计算PESQ分数（需要Voicebox工具箱）
    try
        pesq_score = pesq(clean_speech, enhanced_speech, fs);
        fprintf('PESQ分数: %.3f\n', pesq_score);
    catch
        fprintf('PESQ分数: 未计算（需要Voicebox工具箱）\n');
    end
end

%% 保存结果
audiowrite('enhanced_speech.wav', enhanced_speech, fs);
fprintf('增强语音已保存为: enhanced_speech.wav\n');

%% 可视化结果
visualize_results(noisy_speech, enhanced_speech, clean_speech, fs, has_clean);

%% ========== 核心函数 ==========

function enhanced = multi_taper_spectral_subtraction(...
    noisy, fs, frame_len, frame_shift, n_tapers, alpha, beta, gamma, noise_frames)
    % 多窗谱谱减法主函数
    
    % 分帧
    frames = enframe(noisy, frame_len, frame_shift);
    n_frames = size(frames, 1);
    
    % 生成Slepian窗（多窗谱的核心）
    slepian_windows = dpss(frame_len, n_tapers);
    
    % 初始化
    enhanced_frames = zeros(size(frames));
    noise_psd = zeros(frame_len, 1);
    prev_gain = ones(frame_len, 1);
    
    % 噪声估计（使用前几帧）
    for i = 1:noise_frames
        frame = frames(i, :)';
        noise_psd = noise_psd + abs(fft(frame)).^2;
    end
    noise_psd = noise_psd / noise_frames;
    
    % 处理每一帧
    for i = 1:n_frames
        frame = frames(i, :)';
        
        % 多窗谱估计
        multi_taper_spec = zeros(frame_len, 1);
        for taper = 1:n_tapers
            windowed_frame = frame .* slepian_windows(taper, :)';
            spec = fft(windowed_frame);
            multi_taper_spec = multi_taper_spec + abs(spec).^2;
        end
        multi_taper_spec = multi_taper_spec / n_tapers;
        
        % 计算增益函数（谱减法）
        gain = max(1 - alpha * sqrt(noise_psd ./ (multi_taper_spec + eps)), beta);
        
        % 时间平滑
        gain = gamma * prev_gain + (1 - gamma) * gain;
        prev_gain = gain;
        
        % 应用增益
        enhanced_spec = gain .* fft(frame);
        
        % IFFT得到时域信号
        enhanced_frame = real(ifft(enhanced_spec));
        enhanced_frames(i, :) = enhanced_frame';
        
        % 更新噪声估计（语音活动检测）
        if i > noise_frames
            noise_psd = 0.95 * noise_psd + 0.05 * multi_taper_spec;
        end
    end
    
    % 重叠相加重构语音
    enhanced = overlap_add(enhanced_frames, frame_shift);
end

function frames = enframe(x, win_len, shift_len)
    % 分帧函数
    n_frames = floor((length(x) - win_len) / shift_len) + 1;
    frames = zeros(n_frames, win_len);
    
    for i = 1:n_frames
        start_idx = (i-1)*shift_len + 1;
        end_idx = start_idx + win_len - 1;
        frames(i, :) = x(start_idx:end_idx);
    end
end

function x = overlap_add(frames, shift_len)
    % 重叠相加重构
    [n_frames, frame_len] = size(frames);
    x_len = (n_frames-1)*shift_len + frame_len;
    x = zeros(x_len, 1);
    
    for i = 1:n_frames
        start_idx = (i-1)*shift_len + 1;
        end_idx = start_idx + frame_len - 1;
        x(start_idx:end_idx) = x(start_idx:end_idx) + frames(i, :)';
    end
end

function windows = dpss(N, K)
    % 生成离散扁长球序列（DPSS）窗
    % N: 窗长度
    % K: 窗数量
    % 返回: K×N 的窗矩阵
    
    % 时间带宽积
    NW = 3;
    
    % 计算DPSS
    [windows, ~] = dpss(N, NW, K);
    
    % 归一化
    for i = 1:K
        windows(i, :) = windows(i, :) / norm(windows(i, :));
    end
end

%% ========== 可视化函数 ==========
function visualize_results(noisy, enhanced, clean, fs, has_clean)
    % 可视化结果
    
    % 时间轴
    t = (0:length(noisy)-1)/fs;
    
    figure('Position', [100, 100, 1200, 800]);
    
    % 波形图
    subplot(3, 2, 1);
    plot(t, noisy, 'b', 'LineWidth', 1);
    xlabel('时间 (s)'); ylabel('幅度');
    title('带噪语音波形');
    grid on; axis tight;
    
    subplot(3, 2, 2);
    plot(t, enhanced, 'r', 'LineWidth', 1);
    xlabel('时间 (s)'); ylabel('幅度');
    title('增强语音波形');
    grid on; axis tight;
    
    if has_clean
        subplot(3, 2, 3);
        plot(t, clean, 'g', 'LineWidth', 1);
        xlabel('时间 (s)'); ylabel('幅度');
        title('纯净语音波形');
        grid on; axis tight;
    end
    
    % 语谱图
    subplot(3, 2, 4);
    spectrogram(noisy, hamming(256), 128, 512, fs, 'yaxis');
    title('带噪语音语谱图');
    colorbar;
    
    subplot(3, 2, 5);
    spectrogram(enhanced, hamming(256), 128, 512, fs, 'yaxis');
    title('增强语音语谱图');
    colorbar;
    
    if has_clean
        subplot(3, 2, 6);
        spectrogram(clean, hamming(256), 128, 512, fs, 'yaxis');
        title('纯净语音语谱图');
        colorbar;
    end
    
    % 频谱对比
    figure('Position', [100, 100, 800, 600]);
    
    % 计算平均频谱
    noisy_spec = abs(fft(enframe(noisy, 1024, 512)));
    enhanced_spec = abs(fft(enframe(enhanced, 1024, 512)));
    
    freq = (0:size(noisy_spec, 2)-1)*fs/size(noisy_spec, 2);
    
    subplot(2, 1, 1);
    plot(freq(1:512), mean(noisy_spec(:, 1:512), 1), 'b', 'LineWidth', 1.5);
    hold on;
    plot(freq(1:512), mean(enhanced_spec(:, 1:512), 1), 'r', 'LineWidth', 1.5);
    xlabel('频率 (Hz)'); ylabel('幅度');
    title('平均频谱对比');
    legend('带噪语音', '增强语音');
    grid on; axis tight;
    
    % 频谱差异
    subplot(2, 1, 2);
    diff_spec = mean(enhanced_spec(:, 1:512), 1) - mean(noisy_spec(:, 1:512), 1);
    plot(freq(1:512), diff_spec, 'g', 'LineWidth', 1.5);
    xlabel('频率 (Hz)'); ylabel('幅度差');
    title('频谱差异 (增强 - 带噪)');
    grid on; axis tight;
    
    sgtitle('多窗谱谱减法语音增强结果');
end

2 改进的变参数多窗谱谱减法

matlab 复制代码

%% 改进版：自适应多窗谱谱减法
function enhanced = adaptive_multi_taper_ss(noisy, fs)
    % 自适应参数的多窗谱谱减法
    
    % 参数
    frame_len = 256;
    frame_shift = 128;
    n_tapers = 4;
    
    % 分帧
    frames = enframe(noisy, frame_len, frame_shift);
    n_frames = size(frames, 1);
    
    % 生成Slepian窗
    slepian_windows = dpss(frame_len, n_tapers);
    
    % 初始化
    enhanced_frames = zeros(size(frames));
    noise_psd = zeros(frame_len, 1);
    prev_gain = ones(frame_len, 1);
    
    % 初始噪声估计
    noise_est_frames = min(10, round(n_frames/10));
    for i = 1:noise_est_frames
        frame = frames(i, :)';
        noise_psd = noise_psd + abs(fft(frame)).^2;
    end
    noise_psd = noise_psd / noise_est_frames;
    
    % 处理每一帧
    for i = 1:n_frames
        frame = frames(i, :)';
        
        % 多窗谱估计
        multi_taper_spec = zeros(frame_len, 1);
        for taper = 1:n_tapers
            windowed_frame = frame .* slepian_windows(taper, :)';
            spec = fft(windowed_frame);
            multi_taper_spec = multi_taper_spec + abs(spec).^2;
        end
        multi_taper_spec = multi_taper_spec / n_tapers;
        
        % 计算信噪比
        snr_est = 10*log10(mean(multi_taper_spec) / mean(noise_psd));
        
        % 自适应过减因子
        if snr_est > 20
            alpha = 1.0;  % 高信噪比，少减
        elseif snr_est > 10
            alpha = 2.0;
        elseif snr_est > 0
            alpha = 3.0;
        else
            alpha = 4.0;  % 低信噪比，多减
        end
        
        % 自适应谱下限
        beta = 0.01 * (1 - exp(-snr_est/10));
        
        % 计算增益函数
        gain = max(1 - alpha * sqrt(noise_psd ./ (multi_taper_spec + eps)), beta);
        
        % 时间平滑
        gamma = 0.7 + 0.2 * exp(-snr_est/5);  % 自适应平滑因子
        gain = gamma * prev_gain + (1 - gamma) * gain;
        prev_gain = gain;
        
        % 应用增益
        enhanced_spec = gain .* fft(frame);
        
        % IFFT
        enhanced_frame = real(ifft(enhanced_spec));
        enhanced_frames(i, :) = enhanced_frame';
        
        % 更新噪声估计（语音活动检测）
        vad_threshold = 0.1;
        if mean(multi_taper_spec) < mean(noise_psd) * (1 + vad_threshold)
            noise_psd = 0.99 * noise_psd + 0.01 * multi_taper_spec;
        end
    end
    
    % 重构语音
    enhanced = overlap_add(enhanced_frames, frame_shift);
end

3 多窗谱谱减法与传统方法对比

matlab 复制代码

%% 多窗谱谱减法与传统方法对比
function compare_methods(noisy, fs)
    % 对比传统谱减法、维纳滤波和多窗谱谱减法
    
    frame_len = 256;
    frame_shift = 128;
    
    % 方法1：传统谱减法
    enhanced_ss = traditional_spectral_subtraction(noisy, fs, frame_len, frame_shift);
    
    % 方法2：维纳滤波
    enhanced_wiener = wiener_filtering(noisy, fs, frame_len, frame_shift);
    
    % 方法3：多窗谱谱减法
    enhanced_mtss = multi_taper_spectral_subtraction(...
        noisy, fs, frame_len, frame_shift, 4, 2.5, 0.02, 0.8, 10);
    
    % 可视化对比
    figure('Position', [100, 100, 1400, 800]);
    
    t = (0:length(noisy)-1)/fs;
    
    % 波形对比
    subplot(2, 2, 1);
    plot(t, noisy, 'b', 'LineWidth', 1);
    title('带噪语音');
    xlabel('时间 (s)'); ylabel('幅度');
    grid on;
    
    subplot(2, 2, 2);
    plot(t, enhanced_ss, 'r', 'LineWidth', 1);
    title('传统谱减法');
    xlabel('时间 (s)'); ylabel('幅度');
    grid on;
    
    subplot(2, 2, 3);
    plot(t, enhanced_wiener, 'g', 'LineWidth', 1);
    title('维纳滤波');
    xlabel('时间 (s)'); ylabel('幅度');
    grid on;
    
    subplot(2, 2, 4);
    plot(t, enhanced_mtss, 'm', 'LineWidth', 1);
    title('多窗谱谱减法');
    xlabel('时间 (s)'); ylabel('幅度');
    grid on;
    
    % 语谱图对比
    figure('Position', [100, 100, 1400, 800]);
    
    subplot(2, 2, 1);
    spectrogram(noisy, hamming(256), 128, 512, fs, 'yaxis');
    title('带噪语音');
    
    subplot(2, 2, 2);
    spectrogram(enhanced_ss, hamming(256), 128, 512, fs, 'yaxis');
    title('传统谱减法');
    
    subplot(2, 2, 3);
    spectrogram(enhanced_wiener, hamming(256), 128, 512, fs, 'yaxis');
    title('维纳滤波');
    
    subplot(2, 2, 4);
    spectrogram(enhanced_mtss, hamming(256), 128, 512, fs, 'yaxis');
    title('多窗谱谱减法');
    
    % 计算客观指标
    fprintf('\n=== 方法对比 ===\n');
    
    % 计算音乐噪声（高频能量比）
    music_noise_ss = calculate_music_noise(enhanced_ss, fs);
    music_noise_wiener = calculate_music_noise(enhanced_wiener, fs);
    music_noise_mtss = calculate_music_noise(enhanced_mtss, fs);
    
    fprintf('音乐噪声指标（越小越好）：\n');
    fprintf('传统谱减法: %.4f\n', music_noise_ss);
    fprintf('维纳滤波: %.4f\n', music_noise_wiener);
    fprintf('多窗谱谱减法: %.4f\n', music_noise_mtss);
end

function music_noise = calculate_music_noise(signal, fs)
    % 计算音乐噪声指标（高频能量比）
    [frames, ~] = buffer(signal, 256, 128, 'nodelay');
    music_noise = 0;
    
    for i = 1:size(frames, 2)
        frame = frames(:, i);
        spec = abs(fft(frame));
        freq = (0:length(spec)-1)*fs/length(spec);
        
        % 高频能量（>4kHz）与总能量比
        high_freq_energy = sum(spec(freq > 4000));
        total_energy = sum(spec);
        music_noise = music_noise + high_freq_energy / (total_energy + eps);
    end
    
    music_noise = music_noise / size(frames, 2);
end

4 实际应用示例

matlab 复制代码

%% 实际应用示例：处理真实录音
function practical_example()
    % 录制或加载语音
    fs = 16000;
    duration = 5;  % 录制5秒
    
    fprintf('请说话...（录制%d秒）\n', duration);
    recObj = audiorecorder(fs, 16, 1);
    recordblocking(recObj, duration);
    noisy = getaudiodata(recObj, 'double');
    
    % 添加模拟噪声（白噪声）
    noise_level = 0.05;
    noisy = noisy + noise_level * randn(size(noisy));
    
    % 应用多窗谱谱减法
    enhanced = multi_taper_spectral_subtraction(...
        noisy, fs, 256, 128, 4, 2.5, 0.02, 0.8, 10);
    
    % 播放结果
    fprintf('播放带噪语音...\n');
    sound(noisy, fs);
    pause(duration + 1);
    
    fprintf('播放增强语音...\n');
    sound(enhanced, fs);
    
    % 保存结果
    audiowrite('recorded_noisy.wav', noisy, fs);
    audiowrite('recorded_enhanced.wav', enhanced, fs);
    
    fprintf('录音已保存\n');
end

%% 批量处理文件夹中的音频文件
function batch_process(folder_path)
    files = dir(fullfile(folder_path, '*.wav'));
    
    for i = 1:length(files)
        filename = fullfile(folder_path, files(i).name);
        [noisy, fs] = audioread(filename);
        
        % 增强处理
        enhanced = multi_taper_spectral_subtraction(...
            noisy, fs, 256, 128, 4, 2.5, 0.02, 0.8, 10);
        
        % 保存增强后的文件
        [path, name, ext] = fileparts(filename);
        enhanced_filename = fullfile(path, [name '_enhanced' ext]);
        audiowrite(enhanced_filename, enhanced, fs);
        
        fprintf('处理完成: %s -> %s\n', files(i).name, [name '_enhanced' ext]);
    end
end

参考代码多窗谱谱减法，语音去噪 www.youwenfan.com/contentcst/77736.html

参数调优指南

参数	推荐值	作用	调整建议
`n_tapers`	3-5	多窗谱窗数量	增加可减少频谱估计方差，但增加计算量
`alpha`	2.0-4.0	过减因子	噪声越大，值应越大
`beta`	0.01-0.05	谱下限	防止音乐噪声，太小会产生残留噪声
`gamma`	0.7-0.9	时间平滑因子	越大越平滑，但可能丢失瞬态信息
`noise_frames`	5-20	噪声估计帧数	根据静音段长度调整

算法优势

减少音乐噪声：多窗谱估计降低频谱估计的方差
更好的语音保留：自适应参数调整
计算效率：相比传统谱减法增加有限计算量
鲁棒性：对非平稳噪声有更好的适应性

性能评估指标

matlab 复制代码

%% 综合评估函数
function evaluate_performance(clean, noisy, enhanced, fs)
    % 计算多种客观指标
    
    % SNR改善
    snr_before = 10*log10(sum(clean.^2)/sum((noisy-clean).^2));
    snr_after = 10*log10(sum(clean.^2)/sum((enhanced-clean).^2));
    
    % 分段信噪比 (SegSNR)
    seg_snr_before = segmental_snr(clean, noisy, fs);
    seg_snr_after = segmental_snr(clean, enhanced, fs);
    
    % 对数谱距离 (LSD)
    lsd = log_spectral_distance(clean, enhanced, fs);
    
    % 语音质量感知评估 (PESQ)
    try
        pesq_score = pesq(clean, enhanced, fs);
    catch
        pesq_score = NaN;
    end
    
    fprintf('\n=== 性能评估 ===\n');
    fprintf('输入SNR: %.2f dB\n', snr_before);
    fprintf('输出SNR: %.2f dB\n', snr_after);
    fprintf('SNR改善: %.2f dB\n', snr_after - snr_before);
    fprintf('分段SNR改善: %.2f dB\n', seg_snr_after - seg_snr_before);
    fprintf('对数谱距离: %.2f\n', lsd);
    if ~isnan(pesq_score)
        fprintf('PESQ分数: %.3f\n', pesq_score);
    end
end

function seg_snr = segmental_snr(clean, enhanced, fs)
    % 计算分段信噪比
    frame_len = 256;
    frame_shift = 128;
    
    frames_clean = enframe(clean, frame_len, frame_shift);
    frames_enhanced = enframe(enhanced, frame_len, frame_shift);
    
    seg_snr = 0;
    n_frames = size(frames_clean, 1);
    
    for i = 1:n_frames
        signal_power = mean(frames_clean(i, :).^2);
        noise_power = mean((frames_enhanced(i, :) - frames_clean(i, :)).^2);
        
        if noise_power > 0
            seg_snr = seg_snr + 10*log10(signal_power/noise_power);
        end
    end
    
    seg_snr = seg_snr / n_frames;
end

function lsd = log_spectral_distance(clean, enhanced, fs)
    % 计算对数谱距离
    frame_len = 256;
    frame_shift = 128;
    
    frames_clean = enframe(clean, frame_len, frame_shift);
    frames_enhanced = enframe(enhanced, frame_len, frame_shift);
    
    lsd = 0;
    n_frames = size(frames_clean, 1);
    
    for i = 1:n_frames
        spec_clean = abs(fft(frames_clean(i, :)'));
        spec_enhanced = abs(fft(frames_enhanced(i, :)'));
        
        % 避免零值
        spec_clean = max(spec_clean, eps);
        spec_enhanced = max(spec_enhanced, eps);
        
        % 对数谱距离
        lsd = lsd + sqrt(mean((10*log10(spec_clean) - 10*log10(spec_enhanced)).^2));
    end
    
    lsd = lsd / n_frames;
end

使用建议

对于强噪声环境 ：增加 alpha 到 3.5-4.0
对于弱噪声环境 ：减少 alpha 到 1.5-2.0
对于音乐噪声敏感 ：减小 beta 到 0.005-0.01
对于语音质量要求高：使用自适应版本
实时应用 ：减少 n_tapers 到 2-3 以降低计算复杂度

这个多窗谱谱减法实现可以有效减少传统谱减法产生的音乐噪声，同时保持较好的语音质量。