如何在多变光照条件下实现准确的颜色识别
在计算机视觉领域,颜色分类看似简单,实则充满挑战。一个红色的苹果在阳光下、阴影中、黄昏时分呈现出的颜色差异巨大。如何让机器像人眼一样,在不同光照条件下准确识别颜色?
为什么颜色分类如此具有挑战性?
颜色感知受光照影响极大,这被称为颜色恒常性问题。人类视觉系统能够在一定程度上"自动校正"光照变化,但机器需要明确的算法来处理这一问题。
主要挑战包括:
- 光照强度变化:强光会使颜色过曝,弱光则使颜色暗淡
- 色温变化:自然光与人工光源的色温差异显著
- 阴影和反射:环境光反射会导致颜色失真
- 设备差异:不同摄像头的色彩响应特性不同
系统架构概览
解决方案采用模块化设计:
图像输入 → 预处理 → 特征提取 → SVM分类 → 颜色识别结果
一、鲁棒的颜色特征提取
1.1 颜色空间的选择
RGB空间的局限性
传统的RGB颜色空间对光照变化极其敏感。当光照强度改变时,R、G、B三个通道的值会同步变化,导致颜色识别不稳定。
解决方案:使用感知均匀的颜色空间
python
import cv2
import numpy as np
def convert_to_perceptual_spaces(image):
"""
转换到更符合人类视觉感知的颜色空间
"""
# HSV: 色调(H)、饱和度(S)、明度(V)分离
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# LAB: 亮度(L)与颜色信息(A,B)分离,设备无关
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
# YCrCb: 亮度(Y)与色度分离,广泛用于图像处理
ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)
return hsv, lab, ycrcb
1.2 多特征融合策略
基于特征提取的思想,我们采用多层次特征提取:
python
class RobustColorFeatureExtractor:
def __init__(self, histogram_bins=8):
self.bins = histogram_bins
def extract_histogram_features(self, image):
"""提取颜色直方图特征"""
hsv, lab, ycrcb = convert_to_perceptual_spaces(image)
features = []
# HSV直方图 - 对色调特别敏感
h_hist = self._calc_normalized_histogram(hsv[:,:,0], 180, self.bins) # H通道
s_hist = self._calc_normalized_histogram(hsv[:,:,1], 256, self.bins) # S通道
features.extend([h_hist, s_hist])
# LAB直方图 - 忽略亮度通道
a_hist = self._calc_normalized_histogram(lab[:,:,1], 256, self.bins) # A通道
b_hist = self._calc_normalized_histogram(lab[:,:,2], 256, self.bins) # B通道
features.extend([a_hist, b_hist])
return np.concatenate(features)
def extract_color_moments(self, image):
"""提取颜色矩特征 - 对光照变化鲁棒"""
hsv, lab, _ = convert_to_perceptual_spaces(image)
def calculate_moments(channel):
"""计算均值、标准差、偏度"""
mean = np.mean(channel)
std = np.std(channel)
# 偏度衡量分布不对称性
skew = np.mean((channel - mean) ** 3) / (std ** 3 + 1e-8)
return [mean, std, skew]
moments = []
# 对每个颜色通道计算矩
for i in range(3): # HSV各通道
moments.extend(calculate_moments(hsv[:,:,i]))
for i in [1, 2]: # LAB的A、B通道
moments.extend(calculate_moments(lab[:,:,i]))
return np.array(moments)
def extract_autocorrelation_features(self, image):
"""提取颜色自相关特征 - 专利技术改进"""
_, lab, _ = convert_to_perceptual_spaces(image)
def spatial_autocorrelation(channel, max_distance=3):
"""计算空间自相关特征"""
correlations = []
height, width = channel.shape
for d in range(1, max_distance + 1):
# 水平自相关
if d < width:
corr_h = np.corrcoef(channel[:, :-d].flatten(),
channel[:, d:].flatten())[0, 1]
correlations.append(corr_h if not np.isnan(corr_h) else 0)
# 垂直自相关
if d < height:
corr_v = np.corrcoef(channel[:-d, :].flatten(),
channel[d:, :].flatten())[0, 1]
correlations.append(corr_v if not np.isnan(corr_v) else 0)
return correlations
# 对LAB的A、B通道计算自相关
features = []
features.extend(spatial_autocorrelation(lab[:,:,1])) # A通道
features.extend(spatial_autocorrelation(lab[:,:,2])) # B通道
return np.array(features)
def extract_all_features(self, image):
"""提取所有特征并融合"""
histogram_feat = self.extract_histogram_features(image)
moment_feat = self.extract_color_moments(image)
autocorr_feat = self.extract_autocorrelation_features(image)
# 特征融合
all_features = np.concatenate([
histogram_feat,
moment_feat,
autocorr_feat
])
# L2归一化
norm = np.linalg.norm(all_features)
return all_features / (norm + 1e-8)
def _calc_normalized_histogram(self, channel, range_max, bins):
"""计算归一化直方图"""
hist = cv2.calcHist([channel], [0], None, [bins], [0, range_max])
return hist.flatten() / (np.sum(hist) + 1e-8)
二、SVM分类器的精心设计
2.1 为什么选择SVM?
支持向量机(SVM)特别适合我们的颜色分类任务:
- 小样本学习:在颜色分类中,标注数据通常有限
- 高维处理:我们的特征维度较高(50+维)
- 非线性分类:RBF核可以处理复杂的颜色决策边界
python
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, cross_val_score
import matplotlib.pyplot as plt
import seaborn as sns
class ColorSVMClassifier:
def __init__(self):
self.pipeline = None
self.feature_extractor = RobustColorFeatureExtractor()
self.classes_ = None
def create_optimized_pipeline(self):
"""创建优化的SVM管道"""
return Pipeline([
('scaler', StandardScaler()), # 特征标准化
('svm', SVC(
kernel='rbf',
class_weight='balanced', # 处理类别不平衡
probability=True, # 启用概率预测
random_state=42
))
])
def hyperparameter_tuning(self, X, y):
"""超参数优化"""
param_grid = {
'svm__C': [0.1, 1, 10, 100], # 正则化参数
'svm__gamma': ['scale', 'auto', 0.001, 0.01, 0.1] # RBF核参数
}
grid_search = GridSearchCV(
self.pipeline, param_grid,
cv=5, scoring='accuracy', n_jobs=-1, verbose=1
)
grid_search.fit(X, y)
print("最佳参数:", grid_search.best_params_)
print("最佳交叉验证分数: {:.3f}".format(grid_search.best_score_))
return grid_search
def plot_learning_curve(self, X, y):
"""绘制学习曲线评估模型"""
from sklearn.model_selection import learning_curve
train_sizes, train_scores, test_scores = learning_curve(
self.pipeline, X, y, cv=5, n_jobs=-1,
train_sizes=np.linspace(0.1, 1.0, 10)
)
plt.figure(figsize=(10, 6))
plt.plot(train_sizes, np.mean(train_scores, axis=1), 'o-', label="训练分数")
plt.plot(train_sizes, np.mean(test_scores, axis=1), 'o-', label="验证分数")
plt.xlabel("训练样本数")
plt.ylabel("准确率")
plt.title("学习曲线")
plt.legend()
plt.grid(True)
plt.show()
三、数据增强与预处理策略
3.1 光照不变性增强
python
class IlluminationAugmentation:
@staticmethod
def apply_illumination_changes(image):
"""应用光照变化增强"""
augmented = [image]
# 亮度变化
for alpha in [0.3, 0.6, 1.5, 2.0]:
bright = cv2.convertScaleAbs(image, alpha=alpha, beta=0)
augmented.append(bright)
# 对比度变化
for beta in [-50, -20, 20, 50]:
contrast = cv2.convertScaleAbs(image, alpha=1.0, beta=beta)
augmented.append(contrast)
# 伽马校正模拟不同光照条件
for gamma in [0.3, 0.7, 1.5, 2.0]:
inv_gamma = 1.0 / gamma
table = np.array([((i / 255.0) ** inv_gamma) * 255
for i in range(256)]).astype("uint8")
gamma_img = cv2.LUT(image, table)
augmented.append(gamma_img)
return augmented
@staticmethod
def white_balance_gray_world(image):
"""灰度世界白平衡算法"""
# 转换为LAB空间进行白平衡
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
# 灰度世界假设:平均颜色应该是灰色
avg_a = np.mean(a)
avg_b = np.mean(b)
a = cv2.addWeighted(a, 1, a, 0, 128 - avg_a)
b = cv2.addWeighted(b, 1, b, 0, 128 - avg_b)
balanced_lab = cv2.merge([l, a, b])
return cv2.cvtColor(balanced_lab, cv2.COLOR_LAB2BGR)
四、完整系统实现与评估
4.1 端到端颜色分类系统
python
class ComprehensiveColorClassifier:
def __init__(self):
self.feature_extractor = RobustColorFeatureExtractor()
self.classifier = ColorSVMClassifier()
self.is_trained = False
self.label_encoder = {}
self.reverse_encoder = {}
def prepare_training_data(self, image_paths, labels, augment=True):
"""准备训练数据"""
all_features = []
all_encoded_labels = []
# 创建标签编码
unique_labels = list(set(labels))
self.label_encoder = {label: idx for idx, label in enumerate(unique_labels)}
self.reverse_encoder = {idx: label for label, idx in self.label_encoder.items()}
print("提取特征中...")
for path, label in zip(image_paths, labels):
image = cv2.imread(path)
if image is None:
continue
if augment:
# 数据增强
augmented_images = IlluminationAugmentation.apply_illumination_changes(image)
else:
augmented_images = [image]
for aug_img in augmented_images:
# 白平衡预处理
balanced_img = IlluminationAugmentation.white_balance_gray_world(aug_img)
# 提取特征
features = self.feature_extractor.extract_all_features(balanced_img)
all_features.append(features)
all_encoded_labels.append(self.label_encoder[label])
return np.array(all_features), np.array(all_encoded_labels)
def train(self, image_paths, labels, optimize_hyperparams=True):
"""训练分类器"""
X, y = self.prepare_training_data(image_paths, labels)
print(f"训练数据形状: {X.shape}")
print(f"类别分布: {np.bincount(y)}")
if optimize_hyperparams:
# 超参数优化
self.classifier.pipeline = self.classifier.create_optimized_pipeline()
grid_search = self.classifier.hyperparameter_tuning(X, y)
self.classifier.pipeline = grid_search.best_estimator_
else:
self.classifier.pipeline = self.classifier.create_optimized_pipeline()
self.classifier.pipeline.fit(X, y)
self.is_trained = True
# 绘制学习曲线
self.classifier.plot_learning_curve(X, y)
def predict(self, image, return_probability=True):
"""预测图像颜色"""
if not self.is_trained:
raise ValueError("分类器尚未训练")
# 预处理
balanced_image = IlluminationAugmentation.white_balance_gray_world(image)
# 特征提取
features = self.feature_extractor.extract_all_features(balanced_image)
features = features.reshape(1, -1)
if return_probability:
probabilities = self.classifier.pipeline.predict_proba(features)[0]
pred_class_idx = np.argmax(probabilities)
confidence = probabilities[pred_class_idx]
pred_class = self.reverse_encoder[pred_class_idx]
return pred_class, confidence, probabilities
else:
pred_class_idx = self.classifier.pipeline.predict(features)[0]
return self.reverse_encoder[pred_class_idx]
def evaluate_on_test_set(self, test_image_paths, test_labels):
"""在测试集上评估性能"""
correct = 0
total = len(test_image_paths)
confidence_scores = []
for path, true_label in zip(test_image_paths, test_labels):
image = cv2.imread(path)
if image is None:
continue
pred_label, confidence, _ = self.predict(image)
if pred_label == true_label:
correct += 1
confidence_scores.append(confidence)
accuracy = correct / total
avg_confidence = np.mean(confidence_scores)
print(f"测试准确率: {accuracy:.3f}")
print(f"平均置信度: {avg_confidence:.3f}")
return accuracy, avg_confidence
五、实战演示与结果分析
5.1 实际应用示例
python
def demo_color_classification():
"""演示颜色分类器的使用"""
# 我们有以下训练数据
train_images = [
"data/red/red1.jpg", "data/red/red2.jpg",
"data/blue/blue1.jpg", "data/blue/blue2.jpg",
"data/green/green1.jpg", "data/green/green2.jpg",
"data/yellow/yellow1.jpg", "data/yellow/yellow2.jpg"
]
train_labels = ["red", "red", "blue", "blue", "green", "green", "yellow", "yellow"]
# 创建并训练分类器
classifier = ComprehensiveColorClassifier()
classifier.train(train_images, train_labels)
# 测试新图像
test_image = cv2.imread("test_image.jpg")
predicted_color, confidence, probabilities = classifier.predict(test_image)
print(f"预测颜色: {predicted_color}")
print(f"置信度: {confidence:.3f}")
# 可视化预测结果
plt.figure(figsize=(10, 4))
# 显示原图
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
plt.title(f"输入图像 - 预测: {predicted_color}")
plt.axis('off')
# 显示概率分布
plt.subplot(1, 2, 2)
colors = list(classifier.reverse_encoder.values())
plt.bar(colors, probabilities)
plt.title('颜色分类概率分布')
plt.ylabel('概率')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# 运行演示
if __name__ == "__main__":
demo_color_classification()