python
复制代码
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from skimage.measure import regionprops
import os
from typing import List, Tuple
import matplotlib.pyplot as plt
class RandomForestObjectDetector:
"""基于随机森林的目标检测器"""
def __init__(self, window_size=(64, 64), step_size=16, n_estimators=100):
"""
初始化检测器
Args:
window_size: 滑动窗口大小
step_size: 滑动步长
n_estimators: 随机森林中树的数量
"""
self.window_size = window_size
self.step_size = step_size
self.rf_classifier = RandomForestClassifier(
n_estimators=n_estimators,
random_state=42,
max_depth=10,
min_samples_split=5
)
self.is_trained = False
def extract_features(self, image_patch: np.ndarray) -> np.ndarray:
"""
从图像块中提取特征
Args:
image_patch: 输入图像块
Returns:
特征向量
"""
features = []
# 确保图像是灰度图
if len(image_patch.shape) == 3:
gray = cv2.cvtColor(image_patch, cv2.COLOR_BGR2GRAY)
else:
gray = image_patch.copy()
# 1. 基础统计特征
features.extend([
np.mean(gray), # 均值
np.std(gray), # 标准差
np.median(gray), # 中位数
np.min(gray), # 最小值
np.max(gray), # 最大值
np.var(gray) # 方差
])
# 2. 纹理特征 - LBP (局部二值模式)
radius = 3
n_points = 8 * radius
lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
lbp_hist, _ = np.histogram(lbp.ravel(), bins=n_points + 2,
range=(0, n_points + 2), density=True)
features.extend(lbp_hist)
# 3. 灰度共生矩阵特征
try:
# 计算灰度共生矩阵
glcm = graycomatrix(gray, distances=[1], angles=[0, 45, 90, 135],
levels=256, symmetric=True, normed=True)
# 提取纹理属性
contrast = graycoprops(glcm, 'contrast').mean()
dissimilarity = graycoprops(glcm, 'dissimilarity').mean()
homogeneity = graycoprops(glcm, 'homogeneity').mean()
energy = graycoprops(glcm, 'energy').mean()
correlation = graycoprops(glcm, 'correlation').mean()
features.extend([contrast, dissimilarity, homogeneity, energy, correlation])
except:
# 如果GLCM计算失败,添加默认值
features.extend([0, 0, 0, 0, 0])
# 4. 边缘特征
edges = cv2.Canny(gray, 50, 150)
edge_density = np.sum(edges > 0) / edges.size
features.append(edge_density)
# 5. 梯度特征
grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
grad_magnitude = np.sqrt(grad_x**2 + grad_y**2)
features.extend([
np.mean(grad_magnitude),
np.std(grad_magnitude)
])
# 6. 形状特征 (通过二值化后的连通区域)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
# 找最大轮廓
largest_contour = max(contours, key=cv2.contourArea)
area = cv2.contourArea(largest_contour)
perimeter = cv2.arcLength(largest_contour, True)
# 形状描述符
if perimeter > 0:
circularity = 4 * np.pi * area / (perimeter ** 2)
else:
circularity = 0
features.extend([area / (gray.shape[0] * gray.shape[1]), circularity])
else:
features.extend([0, 0])
return np.array(features)
def sliding_window(self, image: np.ndarray) -> List[Tuple]:
"""
在图像上应用滑动窗口
Args:
image: 输入图像
Returns:
窗口位置和图像块的列表
"""
windows = []
h, w = image.shape[:2]
for y in range(0, h - self.window_size[1] + 1, self.step_size):
for x in range(0, w - self.window_size[0] + 1, self.step_size):
window = image[y:y + self.window_size[1], x:x + self.window_size[0]]
if window.shape[:2] == self.window_size:
windows.append(((x, y), window))
return windows
def prepare_training_data(self, positive_samples: List[np.ndarray],
negative_samples: List[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
"""
准备训练数据
Args:
positive_samples: 正样本图像块列表
negative_samples: 负样本图像块列表
Returns:
特征矩阵和标签向量
"""
features = []
labels = []
print("提取正样本特征...")
for sample in positive_samples:
feature = self.extract_features(sample)
features.append(feature)
labels.append(1) # 正样本标签
print("提取负样本特征...")
for sample in negative_samples:
feature = self.extract_features(sample)
features.append(feature)
labels.append(0) # 负样本标签
return np.array(features), np.array(labels)
def train(self, positive_samples: List[np.ndarray],
negative_samples: List[np.ndarray]):
"""
训练随机森林分类器
Args:
positive_samples: 正样本图像块列表
negative_samples: 负样本图像块列表
"""
print("准备训练数据...")
X, y = self.prepare_training_data(positive_samples, negative_samples)
print(f"训练数据形状: {X.shape}, 标签分布: {np.bincount(y)}")
# 分割训练和验证集
X_train, X_val, y_train, y_val = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
print("训练随机森林分类器...")
self.rf_classifier.fit(X_train, y_train)
# 验证性能
val_pred = self.rf_classifier.predict(X_val)
print("\n验证集性能:")
print(classification_report(y_val, val_pred))
self.is_trained = True
print("训练完成!")
def detect(self, image: np.ndarray, confidence_threshold: float = 0.7) -> List[Tuple]:
"""
在图像中检测目标
Args:
image: 输入图像
confidence_threshold: 置信度阈值
Returns:
检测结果列表 [(x, y, w, h, confidence), ...]
"""
if not self.is_trained:
raise ValueError("模型尚未训练,请先调用train()方法")
detections = []
windows = self.sliding_window(image)
print(f"处理 {len(windows)} 个窗口...")
for (x, y), window in windows:
# 提取特征
features = self.extract_features(window).reshape(1, -1)
# 预测
prediction = self.rf_classifier.predict(features)[0]
confidence = self.rf_classifier.predict_proba(features)[0][1] # 正类概率
if prediction == 1 and confidence >= confidence_threshold:
detections.append((x, y, self.window_size[0], self.window_size[1], confidence))
return detections
def non_max_suppression(self, detections: List[Tuple],
overlap_threshold: float = 0.3) -> List[Tuple]:
"""
非极大值抑制
Args:
detections: 检测结果列表
overlap_threshold: 重叠阈值
Returns:
过滤后的检测结果
"""
if not detections:
return []
# 按置信度排序
detections = sorted(detections, key=lambda x: x[4], reverse=True)
keep = []
while detections:
# 保留置信度最高的检测
current = detections.pop(0)
keep.append(current)
# 计算与其他检测的重叠
remaining = []
for detection in detections:
iou = self.calculate_iou(current, detection)
if iou < overlap_threshold:
remaining.append(detection)
detections = remaining
return keep
@staticmethod
def calculate_iou(box1: Tuple, box2: Tuple) -> float:
"""计算两个边界框的IoU"""
x1, y1, w1, h1, _ = box1
x2, y2, w2, h2, _ = box2
# 计算交集
xi1 = max(x1, x2)
yi1 = max(y1, y2)
xi2 = min(x1 + w1, x2 + w2)
yi2 = min(y1 + h1, y2 + h2)
if xi2 <= xi1 or yi2 <= yi1:
return 0.0
intersection = (xi2 - xi1) * (yi2 - yi1)
union = w1 * h1 + w2 * h2 - intersection
return intersection / union if union > 0 else 0.0
def visualize_detections(self, image: np.ndarray, detections: List[Tuple],
title: str = "检测结果"):
"""
可视化检测结果
Args:
image: 原始图像
detections: 检测结果列表
title: 图像标题
"""
img_vis = image.copy()
for x, y, w, h, confidence in detections:
# 绘制边界框
cv2.rectangle(img_vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
# 添加置信度标签
label = f"{confidence:.2f}"
cv2.putText(img_vis, label, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
plt.title(f"{title} - 检测到 {len(detections)} 个目标")
plt.axis('off')
plt.show()
# 使用示例
def create_sample_data():
"""创建示例训练数据"""
# 创建模拟的正样本 (包含目标的图像块)
positive_samples = []
for _ in range(100):
# 模拟水坝结构 - 矩形形状with一些纹理
sample = np.random.randint(50, 100, (64, 64), dtype=np.uint8)
# 添加矩形结构
cv2.rectangle(sample, (10, 20), (50, 40), 150, -1)
# 添加噪声
noise = np.random.normal(0, 10, sample.shape)
sample = np.clip(sample + noise, 0, 255).astype(np.uint8)
positive_samples.append(sample)
# 创建模拟的负样本 (背景图像块)
negative_samples = []
for _ in range(200):
# 随机背景纹理
sample = np.random.randint(0, 50, (64, 64), dtype=np.uint8)
# 添加随机纹理
noise = np.random.normal(0, 15, sample.shape)
sample = np.clip(sample + noise, 0, 255).astype(np.uint8)
negative_samples.append(sample)
return positive_samples, negative_samples
# 完整使用示例
if __name__ == "__main__":
# 1. 创建检测器
detector = RandomForestObjectDetector(window_size=(64, 64), step_size=32)
# 2. 准备训练数据
print("创建示例数据...")
positive_samples, negative_samples = create_sample_data()
# 3. 训练模型
detector.train(positive_samples, negative_samples)
# 4. 创建测试图像
test_image = np.random.randint(0, 50, (300, 400), dtype=np.uint8)
# 在测试图像中放置几个目标
cv2.rectangle(test_image, (50, 50), (114, 114), 150, -1)
cv2.rectangle(test_image, (200, 150), (264, 214), 150, -1)
# 5. 进行检测
print("进行目标检测...")
detections = detector.detect(test_image, confidence_threshold=0.6)
# 6. 应用非极大值抑制
filtered_detections = detector.non_max_suppression(detections, overlap_threshold=0.3)
print(f"原始检测数量: {len(detections)}")
print(f"NMS后检测数量: {len(filtered_detections)}")
# 7. 可视化结果
if len(filtered_detections) > 0:
detector.visualize_detections(cv2.cvtColor(test_image, cv2.COLOR_GRAY2BGR),
filtered_detections)
else:
print("未检测到目标")