MATLAB近红外光谱预处理代码,包含移动窗口平滑(MSV)、一阶求导、二阶求导等功能,以及详细的可视化分析。
一、MATLAB预处理代码
1.1 主预处理脚本 (nir_preprocessing.m)
matlab
%% 近红外光谱预处理:平滑与求导
% 功能:移动窗口平滑(MSV)、一阶求导、二阶求导
clc; clear; close all;
%% 1. 加载近红外光谱数据
% 假设数据格式:rows=样本数,cols=波长点数
% 如果没有实际数据,生成模拟光谱
[N, wavelengths] = generate_sample_nir_data();
fprintf('光谱数据维度: %d 样本 × %d 波长点\n', size(N,1), size(N,2));
%% 2. 定义预处理参数
params.window_size = 11; % 移动窗口大小(奇数)
params.poly_order = 2; % 多项式拟合阶数
params.derivative_order = 1; % 求导阶数(1或2)
%% 3. 执行各种预处理方法
fprintf('\n=== 开始光谱预处理 ===\n');
% 3.1 原始光谱
original_spectra = N;
fprintf('✓ 原始光谱已加载\n');
% 3.2 移动窗口平滑 (Moving Window Smoothing - MSV)
smoothed_spectra = moving_window_smoothing(N, params.window_size);
fprintf('✓ 移动窗口平滑完成 (窗口大小=%d)\n', params.window_size);
% 3.3 Savitzky-Golay平滑(更精确的平滑方法)
sg_smoothed = savitzky_golay_smoothing(N, params.window_size, params.poly_order);
fprintf('✓ Savitzky-Golay平滑完成\n');
% 3.4 一阶导数(使用Savitzky-Golay求导)
first_deriv = savitzky_golay_derivative(N, params.window_size, params.poly_order, 1);
fprintf('✓ 一阶导数计算完成\n');
% 3.5 二阶导数
second_deriv = savitzky_golay_derivative(N, params.window_size, params.poly_order, 2);
fprintf('✓ 二阶导数计算完成\n');
% 3.6 标准化处理(可选)
normalized_spectra = normalize_spectra(N);
fprintf('✓ 光谱标准化完成\n');
%% 4. 可视化结果
visualize_preprocessing_results(original_spectra, smoothed_spectra, ...
sg_smoothed, first_deriv, second_deriv, wavelengths);
%% 5. 光谱特征分析
analyze_spectral_features(original_spectra, first_deriv, second_deriv, wavelengths);
%% 6. 保存预处理结果
save_preprocessing_results(original_spectra, smoothed_spectra, first_deriv, ...
second_deriv, params, wavelengths);
fprintf('\n=== 预处理完成!===\n');
1.2 移动窗口平滑函数 (moving_window_smoothing.m)
matlab
function smoothed = moving_window_smoothing(spectra, window_size)
% 移动窗口平滑(Moving Window Smoothing - MSV)
% 输入:
% spectra: 光谱矩阵 [样本数 × 波长点数]
% window_size: 窗口大小(奇数)
% 输出:
% smoothed: 平滑后的光谱矩阵
[n_samples, n_wavelengths] = size(spectra);
smoothed = zeros(n_samples, n_wavelengths);
% 确保窗口大小为奇数
if mod(window_size, 2) == 0
window_size = window_size + 1;
end
half_window = floor(window_size / 2);
% 对每个样本进行平滑
parfor sample_idx = 1:n_samples
spectrum = spectra(sample_idx, :);
for i = 1:n_wavelengths
% 确定窗口边界
start_idx = max(1, i - half_window);
end_idx = min(n_wavelengths, i + half_window);
% 计算窗口内平均值
smoothed(sample_idx, i) = mean(spectrum(start_idx:end_idx));
end
end
fprintf(' 移动窗口平滑: 窗口大小=%d, 样本数=%d\n', window_size, n_samples);
end
1.3 Savitzky-Golay平滑函数 (savitzky_golay_smoothing.m)
matlab
function smoothed = savitzky_golay_smoothing(spectra, window_size, poly_order)
% Savitzky-Golay平滑滤波器
% 输入:
% spectra: 光谱矩阵
% window_size: 窗口大小(奇数)
% poly_order: 多项式阶数
% 输出:
% smoothed: 平滑后的光谱矩阵
[n_samples, n_wavelengths] = size(spectra);
smoothed = zeros(n_samples, n_wavelengths);
% 计算Savitzky-Golay系数
coeffs = compute_sg_coefficients(window_size, poly_order, 0);
% 对每个样本进行滤波
parfor sample_idx = 1:n_samples
spectrum = spectra(sample_idx, :)';
% 使用卷积进行滤波
smoothed_spectrum = conv(spectrum, coeffs, 'same');
smoothed(sample_idx, :) = smoothed_spectrum';
end
fprintf(' SG平滑: 窗口=%d, 多项式阶数=%d\n', window_size, poly_order);
end
1.4 Savitzky-Golay求导函数 (savitzky_golay_derivative.m)
matlab
function derivative = savitzky_golay_derivative(spectra, window_size, poly_order, deriv_order)
% Savitzky-Golay求导
% 输入:
% spectra: 光谱矩阵
% window_size: 窗口大小
% poly_order: 多项式阶数
% deriv_order: 求导阶数(1或2)
% 输出:
% derivative: 导数光谱矩阵
[n_samples, n_wavelengths] = size(spectra);
derivative = zeros(n_samples, n_wavelengths);
% 计算Savitzky-Golay导数系数
coeffs = compute_sg_coefficients(window_size, poly_order, deriv_order);
% 计算波长间隔(假设均匀采样)
delta_lambda = 1; % 如果波长间隔不均匀,需要调整
% 对每个样本求导
parfor sample_idx = 1:n_samples
spectrum = spectra(sample_idx, :)';
% 使用卷积求导
deriv_spectrum = conv(spectrum, coeffs, 'same');
% 归一化(考虑波长间隔)
if deriv_order == 1
deriv_spectrum = deriv_spectrum / delta_lambda;
elseif deriv_order == 2
deriv_spectrum = deriv_spectrum / (delta_lambda^2);
end
derivative(sample_idx, :) = deriv_spectrum';
end
fprintf(' SG求导%d阶: 窗口=%d, 多项式阶数=%d\n', deriv_order, window_size, poly_order);
end
1.5 SG系数计算函数 (compute_sg_coefficients.m)
matlab
function coeffs = compute_sg_coefficients(window_size, poly_order, deriv_order)
% 计算Savitzky-Golay系数
% 参考:Numerical Recipes in C, Press et al.
% 确保窗口大小为奇数
if mod(window_size, 2) == 0
window_size = window_size + 1;
end
half_window = floor(window_size / 2);
% 构建Vandermonde矩阵
V = zeros(window_size, poly_order + 1);
for i = 1:window_size
x = i - half_window - 1;
for j = 0:poly_order
V(i, j+1) = x^j;
end
end
% 计算伪逆
V_pinv = pinv(V);
% 提取导数系数
coeffs = V_pinv(deriv_order + 1, :)';
% 归一化系数
coeffs = coeffs / sum(abs(coeffs));
end
1.6 光谱标准化函数 (normalize_spectra.m)
matlab
function normalized = normalize_spectra(spectra)
% 光谱标准化(Standard Normal Variate - SNV)
% 消除散射效应和基线漂移
[n_samples, n_wavelengths] = size(spectra);
normalized = zeros(n_samples, n_wavelengths);
parfor i = 1:n_samples
spectrum = spectra(i, :);
% 计算均值和标准差
mu = mean(spectrum);
sigma = std(spectrum);
% SNV变换
if sigma > 0
normalized(i, :) = (spectrum - mu) / sigma;
else
normalized(i, :) = spectrum - mu;
end
end
fprintf(' SNV标准化完成\n');
end
1.7 可视化函数 (visualize_preprocessing_results.m)
matlab
function visualize_preprocessing_results(orig, msv_smooth, sg_smooth, first_deriv, second_deriv, wavelengths)
% 可视化预处理结果
% 选择代表性样本(第一个样本)
sample_idx = 1;
figure('Position', [100, 100, 1400, 900], 'Name', '近红外光谱预处理结果');
% 1. 原始光谱
subplot(3, 3, 1);
plot(wavelengths, orig(sample_idx, :), 'k-', 'LineWidth', 1.5);
xlabel('波长 (nm)'); ylabel('吸光度');
title('原始光谱');
grid on; box on;
% 2. 移动窗口平滑
subplot(3, 3, 2);
plot(wavelengths, orig(sample_idx, :), 'k:', 'LineWidth', 1, 'DisplayName', '原始');
hold on;
plot(wavelengths, msv_smooth(sample_idx, :), 'b-', 'LineWidth', 1.5, 'DisplayName', 'MSV平滑');
xlabel('波长 (nm)'); ylabel('吸光度');
title('移动窗口平滑 (MSV)');
legend('Location', 'best');
grid on; box on;
% 3. Savitzky-Golay平滑
subplot(3, 3, 3);
plot(wavelengths, orig(sample_idx, :), 'k:', 'LineWidth', 1, 'DisplayName', '原始');
hold on;
plot(wavelengths, sg_smooth(sample_idx, :), 'r-', 'LineWidth', 1.5, 'DisplayName', 'SG平滑');
xlabel('波长 (nm)'); ylabel('吸光度');
title('Savitzky-Golay平滑');
legend('Location', 'best');
grid on; box on;
% 4. 一阶导数光谱
subplot(3, 3, 4);
plot(wavelengths, first_deriv(sample_idx, :), 'g-', 'LineWidth', 1.5);
xlabel('波长 (nm)'); ylabel('一阶导数');
title('一阶导数光谱');
grid on; box on;
% 5. 二阶导数光谱
subplot(3, 3, 5);
plot(wavelengths, second_deriv(sample_idx, :), 'm-', 'LineWidth', 1.5);
xlabel('波长 (nm)'); ylabel('二阶导数');
title('二阶导数光谱');
grid on; box on;
% 6. 平滑效果对比
subplot(3, 3, 6);
plot(wavelengths, orig(sample_idx, :), 'k:', 'LineWidth', 1, 'DisplayName', '原始');
hold on;
plot(wavelengths, msv_smooth(sample_idx, :), 'b-', 'LineWidth', 1.2, 'DisplayName', 'MSV');
plot(wavelengths, sg_smooth(sample_idx, :), 'r-', 'LineWidth', 1.2, 'DisplayName', 'SG');
xlabel('波长 (nm)'); ylabel('吸光度');
title('平滑方法对比');
legend('Location', 'best');
grid on; box on;
% 7. 导数光谱对比
subplot(3, 3, 7);
plot(wavelengths, first_deriv(sample_idx, :), 'g-', 'LineWidth', 1.5, 'DisplayName', '一阶导');
hold on;
plot(wavelengths, second_deriv(sample_idx, :), 'm-', 'LineWidth', 1.5, 'DisplayName', '二阶导');
xlabel('波长 (nm)'); ylabel('导数值');
title('导数光谱对比');
legend('Location', 'best');
grid on; box on;
% 8. 局部放大(关键吸收峰区域)
subplot(3, 3, 8);
xlim_range = [1300, 1600]; % 近红外主要吸收区域
idx = find(wavelengths >= xlim_range(1) & wavelengths <= xlim_range(2));
plot(wavelengths(idx), orig(sample_idx, idx), 'k-', 'LineWidth', 1.5, 'DisplayName', '原始');
hold on;
plot(wavelengths(idx), sg_smooth(sample_idx, idx), 'r-', 'LineWidth', 1.5, 'DisplayName', 'SG平滑');
xlabel('波长 (nm)'); ylabel('吸光度');
title('关键吸收区域放大 (1300-1600 nm)');
legend('Location', 'best');
grid on; box on;
xlim(xlim_range);
% 9. 信噪比改善评估
subplot(3, 3, 9);
snr_orig = estimate_snr(orig(sample_idx, :));
snr_sg = estimate_snr(sg_smooth(sample_idx, :));
snr_first = estimate_snr(first_deriv(sample_idx, :));
bar([snr_orig, snr_sg, snr_first]);
set(gca, 'XTickLabel', {'原始', 'SG平滑', '一阶导'});
ylabel('信噪比 (SNR)');
title('信噪比改善评估');
grid on; box on;
sgtitle('近红外光谱预处理综合分析', 'FontSize', 16, 'FontWeight', 'bold');
end
1.8 光谱特征分析函数 (analyze_spectral_features.m)
matlab
function analyze_spectral_features(orig, first_deriv, second_deriv, wavelengths)
% 分析光谱特征变化
fprintf('\n=== 光谱特征分析 ===\n');
% 1. 统计特征
fprintf('\n1. 统计特征:\n');
fprintf(' 原始光谱: 均值=%.4f, 标准差=%.4f\n', mean(orig(:)), std(orig(:)));
fprintf(' 一阶导数: 均值=%.4f, 标准差=%.4f\n', mean(first_deriv(:)), std(first_deriv(:)));
fprintf(' 二阶导数: 均值=%.4f, 标准差=%.4f\n', mean(second_deriv(:)), std(second_deriv(:)));
% 2. 关键吸收峰分析
fprintf('\n2. 关键吸收峰分析:\n');
% 水吸收峰 (~1450 nm)
water_peak = find_nearest_peak(wavelengths, first_deriv, 1450);
fprintf(' 水吸收峰 (1450 nm): 位置=%d nm, 强度=%.4f\n', water_peak.wavelength, water_peak.intensity);
% 脂肪吸收峰 (~1720 nm)
fat_peak = find_nearest_peak(wavelengths, first_deriv, 1720);
fprintf(' 脂肪吸收峰 (1720 nm): 位置=%d nm, 强度=%.4f\n', fat_peak.wavelength, fat_peak.intensity);
% 蛋白质吸收峰 (~2180 nm)
protein_peak = find_nearest_peak(wavelengths, first_deriv, 2180);
fprintf(' 蛋白质吸收峰 (2180 nm): 位置=%d nm, 强度=%.4f\n', protein_peak.wavelength, protein_peak.intensity);
% 3. 基线漂移评估
fprintf('\n3. 基线漂移评估:\n');
baseline_orig = estimate_baseline_drift(orig);
baseline_deriv = estimate_baseline_drift(first_deriv);
fprintf(' 原始光谱基线漂移: %.4f\n', baseline_orig);
fprintf(' 一阶导数基线漂移: %.4f\n', baseline_deriv);
fprintf(' 改善程度: %.1f%%\n', (baseline_orig-baseline_deriv)/baseline_orig*100);
% 4. 分辨率增强评估
fprintf('\n4. 分辨率增强:\n');
resolution_orig = estimate_resolution(orig, wavelengths);
resolution_deriv = estimate_resolution(first_deriv, wavelengths);
fprintf(' 原始光谱分辨率: %.4f nm⁻¹\n', resolution_orig);
fprintf(' 一阶导数分辨率: %.4f nm⁻¹\n', resolution_deriv);
fprintf(' 分辨率提升: %.1f倍\n', resolution_deriv/resolution_orig);
end
function peak = find_nearest_peak(wavelengths, spectra, target_wavelength)
% 查找最近的吸收峰
[~, idx] = min(abs(wavelengths - target_wavelength));
peak.wavelength = wavelengths(idx);
peak.intensity = mean(spectra(:, idx));
end
function baseline = estimate_baseline_drift(spectra)
% 估算基线漂移
% 使用光谱两端点的斜率
n_samples = size(spectra, 1);
baseline = 0;
for i = 1:n_samples
spectrum = spectra(i, :);
baseline = baseline + abs(spectrum(end) - spectrum(1));
end
baseline = baseline / n_samples;
end
function resolution = estimate_resolution(spectra, wavelengths)
% 估算光谱分辨率
% 使用半峰宽法
n_samples = size(spectra, 1);
resolution = 0;
for i = 1:n_samples
spectrum = spectra(i, :);
% 找到最大峰值
[~, max_idx] = max(spectrum);
% 计算半峰宽
half_max = spectrum(max_idx) / 2;
% 找到左右半峰位置
left_idx = find(spectrum(1:max_idx) <= half_max, 1, 'last');
right_idx = find(spectrum(max_idx:end) <= half_max, 1, 'first') + max_idx - 1;
if ~isempty(left_idx) && ~isempty(right_idx)
fwhm = wavelengths(right_idx) - wavelengths(left_idx);
resolution = resolution + 1/fwhm; % 分辨率 = 1/FWHM
end
end
resolution = resolution / n_samples;
end
1.9 信噪比估计函数 (estimate_snr.m)
matlab
function snr = estimate_snr(spectrum)
% 估计信噪比
% 使用信号峰值与噪声标准差的比值
% 信号部分:光谱的最大值
signal = max(spectrum);
% 噪声部分:光谱两端区域的波动
n_points = length(spectrum);
noise_region = [spectrum(1:round(n_points/10)), spectrum(end-round(n_points/10)+1:end)];
noise_std = std(noise_region);
% 信噪比
if noise_std > 0
snr = signal / noise_std;
else
snr = inf;
end
end
1.10 数据生成函数 (generate_sample_nir_data.m)
matlab
function [spectra, wavelengths] = generate_sample_nir_data()
% 生成模拟近红外光谱数据
% 参数设置
n_samples = 50; % 样本数
n_wavelengths = 1000; % 波长点数
wavelength_start = 1000; % 起始波长 (nm)
wavelength_end = 2500; % 结束波长 (nm)
% 生成波长轴
wavelengths = linspace(wavelength_start, wavelength_end, n_wavelengths);
% 初始化光谱矩阵
spectra = zeros(n_samples, n_wavelengths);
% 生成模拟光谱
for i = 1:n_samples
% 基线
baseline = 0.5 + 0.1 * randn(1);
% 主要吸收峰
water_peak = 0.3 * exp(-((wavelengths - 1450) / 50).^2);
fat_peak = 0.2 * exp(-((wavelengths - 1720) / 40).^2);
protein_peak = 0.15 * exp(-((wavelengths - 2180) / 60).^2);
% 组合光谱
spectrum = baseline + water_peak + fat_peak + protein_peak;
% 添加噪声
noise = 0.02 * randn(size(wavelengths));
% 添加基线漂移
drift = 0.05 * (wavelengths - wavelength_start) / (wavelength_end - wavelength_start);
spectra(i, :) = spectrum + noise + drift;
end
fprintf('生成模拟近红外光谱数据: %d 样本 × %d 波长点\n', n_samples, n_wavelengths);
end
1.11 结果保存函数 (save_preprocessing_results.m)
matlab
function save_preprocessing_results(orig, msv_smooth, first_deriv, second_deriv, params, wavelengths)
% 保存预处理结果
% 创建结果文件夹
if ~exist('preprocessed_data', 'dir')
mkdir('preprocessed_data');
end
% 保存.mat文件
timestamp = datestr(now, 'yyyymmdd_HHMMSS');
filename = sprintf('NIR_preprocessed_%s.mat', timestamp);
save(fullfile('preprocessed_data', filename), ...
'orig', 'msv_smooth', 'first_deriv', 'second_deriv', ...
'params', 'wavelengths', '-v7.3');
% 保存为CSV格式(便于其他软件读取)
csv_filename = fullfile('preprocessed_data', sprintf('NIR_original_%s.csv', timestamp));
csvwrite(csv_filename, orig);
csv_filename = fullfile('preprocessed_data', sprintf('NIR_first_derivative_%s.csv', timestamp));
csvwrite(csv_filename, first_deriv);
csv_filename = fullfile('preprocessed_data', sprintf('NIR_second_derivative_%s.csv', timestamp));
csvwrite(csv_filename, second_deriv);
fprintf('结果已保存到 preprocessed_data/ 文件夹\n');
fprintf(' MAT文件: %s\n', filename);
fprintf(' CSV文件: NIR_*.csv\n');
end
二、高级预处理功能
2.1 多尺度平滑比较 (multi_scale_smoothing.m)
matlab
function multi_scale_smoothing(spectra, wavelengths)
% 多尺度平滑比较
window_sizes = [5, 11, 21, 31, 51];
sample_idx = 1;
figure('Position', [200, 200, 1200, 800], 'Name', '多尺度平滑比较');
for i = 1:length(window_sizes)
window_size = window_sizes(i);
smoothed = moving_window_smoothing(spectra, window_size);
subplot(2, 3, i);
plot(wavelengths, spectra(sample_idx, :), 'k:', 'LineWidth', 1);
hold on;
plot(wavelengths, smoothed(sample_idx, :), 'b-', 'LineWidth', 1.5);
xlabel('波长 (nm)'); ylabel('吸光度');
title(sprintf('窗口大小 = %d', window_size));
grid on; box on;
end
% 比较图
subplot(2, 3, 6);
colors = lines(length(window_sizes));
hold on;
for i = 1:length(window_sizes)
window_size = window_sizes(i);
smoothed = moving_window_smoothing(spectra, window_size);
plot(wavelengths, smoothed(sample_idx, :), 'Color', colors(i,:), ...
'LineWidth', 1.5, 'DisplayName', sprintf('%d点', window_size));
end
xlabel('波长 (nm)'); ylabel('吸光度');
title('多尺度平滑对比');
legend('Location', 'best');
grid on; box on;
sgtitle('移动窗口平滑的多尺度比较', 'FontSize', 14, 'FontWeight', 'bold');
end
2.2 导数光谱特征提取 (extract_derivative_features.m)
matlab
function features = extract_derivative_features(first_deriv, second_deriv, wavelengths)
% 从导数光谱中提取特征
features = struct();
% 1. 峰值位置
[~, max_idx] = max(first_deriv, [], 2);
features.peak_positions = wavelengths(max_idx);
% 2. 峰值强度
features.peak_intensities = max(first_deriv, [], 2);
% 3. 峰面积(一阶导数)
features.peak_areas = sum(abs(first_deriv), 2);
% 4. 拐点位置(二阶导数零点)
for i = 1:size(second_deriv, 1)
zero_crossings = find(diff(sign(second_deriv(i, :))) ~= 0);
features.inflexion_points{i} = wavelengths(zero_crossings);
end
% 5. 光谱矩
features.mean = mean(first_deriv, 2);
features.std = std(first_deriv, 2);
features.skewness = skewness(first_deriv, 2);
features.kurtosis = kurtosis(first_deriv, 2);
fprintf('导数光谱特征提取完成\n');
end
参考代码 matlab 近红外光谱图 www.youwenfan.com/contentcsv/135809.html
三、使用说明
3.1 基本使用步骤
matlab
% 1. 运行主脚本
run('nir_preprocessing.m');
% 2. 如果有自己的数据,替换数据加载部分
% 假设你的数据存储在变量 'my_spectra' 中
my_spectra = load('your_nir_data.mat');
wavelengths = my_spectra.wavelengths;
spectra = my_spectra.intensity;
% 3. 调用预处理函数
params.window_size = 15;
params.poly_order = 3;
smoothed = savitzky_golay_smoothing(spectra, params.window_size, params.poly_order);
first_deriv = savitzky_golay_derivative(spectra, params.window_size, params.poly_order, 1);
second_deriv = savitzky_golay_derivative(spectra, params.window_size, params.poly_order, 2);
% 4. 可视化
visualize_preprocessing_results(spectra, smoothed, smoothed, first_deriv, second_deriv, wavelengths);
3.2 参数选择建议
| 参数 | 建议值 | 说明 |
|---|---|---|
| 窗口大小 | 11-21 | 奇数,根据光谱分辨率调整 |
| 多项式阶数 | 2-4 | 过高会产生振荡,过低平滑不足 |
| 求导阶数 | 1或2 | 一阶消除基线,二阶增强分辨率 |
3.3 常见问题解决
| 问题 | 解决方法 |
|---|---|
| 光谱出现振荡 | 减小多项式阶数或增大窗口大小 |
| 平滑不足 | 增大窗口大小 |
| 导数光谱噪声大 | 先平滑再求导,或使用更大窗口 |
| 边缘效应明显 | 使用对称填充或忽略边缘点 |