使用随机森林实现目标检测

核心实现思路

滑动窗口策略：在图像上滑动固定大小的窗口，对每个窗口进行分类
多维特征提取：结合统计特征、纹理特征、边缘特征、形状特征等
随机森林分类：训练二分类器判断窗口是否包含目标
后处理优化：使用非极大值抑制减少重复检测
特征工程的重要性

LBP纹理特征：捕捉局部纹理模式
灰度共生矩阵：描述纹理的统计特性
边缘密度：反映目标边界信息
形状描述符：圆形度、面积比等几何特征
实际应用建议

数据收集：收集大量正负样本进行训练
特征优化：根据具体目标调整特征提取策略
参数调优：调整窗口大小、步长、置信度阈值等
多尺度检测：使用不同尺寸的窗口检测不同大小的目标
适用场景

计算资源受限的嵌入式设备
目标具有明显纹理或形状特征的场景
需要快速部署和调试的原型系统
传统图像处理流程的补充
python 复制代码
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from skimage.measure import regionprops
import os
from typing import List, Tuple
import matplotlib.pyplot as plt

class RandomForestObjectDetector:
    """基于随机森林的目标检测器"""
    
    def __init__(self, window_size=(64, 64), step_size=16, n_estimators=100):
        """
        初始化检测器
        
        Args:
            window_size: 滑动窗口大小
            step_size: 滑动步长
            n_estimators: 随机森林中树的数量
        """
        self.window_size = window_size
        self.step_size = step_size
        self.rf_classifier = RandomForestClassifier(
            n_estimators=n_estimators,
            random_state=42,
            max_depth=10,
            min_samples_split=5
        )
        self.is_trained = False
    
    def extract_features(self, image_patch: np.ndarray) -> np.ndarray:
        """
        从图像块中提取特征
        
        Args:
            image_patch: 输入图像块
            
        Returns:
            特征向量
        """
        features = []
        
        # 确保图像是灰度图
        if len(image_patch.shape) == 3:
            gray = cv2.cvtColor(image_patch, cv2.COLOR_BGR2GRAY)
        else:
            gray = image_patch.copy()
        
        # 1. 基础统计特征
        features.extend([
            np.mean(gray),           # 均值
            np.std(gray),            # 标准差
            np.median(gray),         # 中位数
            np.min(gray),            # 最小值
            np.max(gray),            # 最大值
            np.var(gray)             # 方差
        ])
        
        # 2. 纹理特征 - LBP (局部二值模式)
        radius = 3
        n_points = 8 * radius
        lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
        lbp_hist, _ = np.histogram(lbp.ravel(), bins=n_points + 2, 
                                  range=(0, n_points + 2), density=True)
        features.extend(lbp_hist)
        
        # 3. 灰度共生矩阵特征
        try:
            # 计算灰度共生矩阵
            glcm = graycomatrix(gray, distances=[1], angles=[0, 45, 90, 135], 
                              levels=256, symmetric=True, normed=True)
            
            # 提取纹理属性
            contrast = graycoprops(glcm, 'contrast').mean()
            dissimilarity = graycoprops(glcm, 'dissimilarity').mean()
            homogeneity = graycoprops(glcm, 'homogeneity').mean()
            energy = graycoprops(glcm, 'energy').mean()
            correlation = graycoprops(glcm, 'correlation').mean()
            
            features.extend([contrast, dissimilarity, homogeneity, energy, correlation])
        except:
            # 如果GLCM计算失败，添加默认值
            features.extend([0, 0, 0, 0, 0])
        
        # 4. 边缘特征
        edges = cv2.Canny(gray, 50, 150)
        edge_density = np.sum(edges > 0) / edges.size
        features.append(edge_density)
        
        # 5. 梯度特征
        grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
        grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
        grad_magnitude = np.sqrt(grad_x**2 + grad_y**2)
        features.extend([
            np.mean(grad_magnitude),
            np.std(grad_magnitude)
        ])
        
        # 6. 形状特征 (通过二值化后的连通区域)
        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            # 找最大轮廓
            largest_contour = max(contours, key=cv2.contourArea)
            area = cv2.contourArea(largest_contour)
            perimeter = cv2.arcLength(largest_contour, True)
            
            # 形状描述符
            if perimeter > 0:
                circularity = 4 * np.pi * area / (perimeter ** 2)
            else:
                circularity = 0
                
            features.extend([area / (gray.shape[0] * gray.shape[1]), circularity])
        else:
            features.extend([0, 0])
        
        return np.array(features)
    
    def sliding_window(self, image: np.ndarray) -> List[Tuple]:
        """
        在图像上应用滑动窗口
        
        Args:
            image: 输入图像
            
        Returns:
            窗口位置和图像块的列表
        """
        windows = []
        h, w = image.shape[:2]
        
        for y in range(0, h - self.window_size[1] + 1, self.step_size):
            for x in range(0, w - self.window_size[0] + 1, self.step_size):
                window = image[y:y + self.window_size[1], x:x + self.window_size[0]]
                if window.shape[:2] == self.window_size:
                    windows.append(((x, y), window))
        
        return windows
    
    def prepare_training_data(self, positive_samples: List[np.ndarray], 
                            negative_samples: List[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
        """
        准备训练数据
        
        Args:
            positive_samples: 正样本图像块列表
            negative_samples: 负样本图像块列表
            
        Returns:
            特征矩阵和标签向量
        """
        features = []
        labels = []
        
        print("提取正样本特征...")
        for sample in positive_samples:
            feature = self.extract_features(sample)
            features.append(feature)
            labels.append(1)  # 正样本标签
        
        print("提取负样本特征...")
        for sample in negative_samples:
            feature = self.extract_features(sample)
            features.append(feature)
            labels.append(0)  # 负样本标签
        
        return np.array(features), np.array(labels)
    
    def train(self, positive_samples: List[np.ndarray], 
              negative_samples: List[np.ndarray]):
        """
        训练随机森林分类器
        
        Args:
            positive_samples: 正样本图像块列表
            negative_samples: 负样本图像块列表
        """
        print("准备训练数据...")
        X, y = self.prepare_training_data(positive_samples, negative_samples)
        
        print(f"训练数据形状: {X.shape}, 标签分布: {np.bincount(y)}")
        
        # 分割训练和验证集
        X_train, X_val, y_train, y_val = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        print("训练随机森林分类器...")
        self.rf_classifier.fit(X_train, y_train)
        
        # 验证性能
        val_pred = self.rf_classifier.predict(X_val)
        print("\n验证集性能:")
        print(classification_report(y_val, val_pred))
        
        self.is_trained = True
        print("训练完成!")
    
    def detect(self, image: np.ndarray, confidence_threshold: float = 0.7) -> List[Tuple]:
        """
        在图像中检测目标
        
        Args:
            image: 输入图像
            confidence_threshold: 置信度阈值
            
        Returns:
            检测结果列表 [(x, y, w, h, confidence), ...]
        """
        if not self.is_trained:
            raise ValueError("模型尚未训练，请先调用train()方法")
        
        detections = []
        windows = self.sliding_window(image)
        
        print(f"处理 {len(windows)} 个窗口...")
        
        for (x, y), window in windows:
            # 提取特征
            features = self.extract_features(window).reshape(1, -1)
            
            # 预测
            prediction = self.rf_classifier.predict(features)[0]
            confidence = self.rf_classifier.predict_proba(features)[0][1]  # 正类概率
            
            if prediction == 1 and confidence >= confidence_threshold:
                detections.append((x, y, self.window_size[0], self.window_size[1], confidence))
        
        return detections
    
    def non_max_suppression(self, detections: List[Tuple], 
                          overlap_threshold: float = 0.3) -> List[Tuple]:
        """
        非极大值抑制
        
        Args:
            detections: 检测结果列表
            overlap_threshold: 重叠阈值
            
        Returns:
            过滤后的检测结果
        """
        if not detections:
            return []
        
        # 按置信度排序
        detections = sorted(detections, key=lambda x: x[4], reverse=True)
        
        keep = []
        
        while detections:
            # 保留置信度最高的检测
            current = detections.pop(0)
            keep.append(current)
            
            # 计算与其他检测的重叠
            remaining = []
            for detection in detections:
                iou = self.calculate_iou(current, detection)
                if iou < overlap_threshold:
                    remaining.append(detection)
            
            detections = remaining
        
        return keep
    
    @staticmethod
    def calculate_iou(box1: Tuple, box2: Tuple) -> float:
        """计算两个边界框的IoU"""
        x1, y1, w1, h1, _ = box1
        x2, y2, w2, h2, _ = box2
        
        # 计算交集
        xi1 = max(x1, x2)
        yi1 = max(y1, y2)
        xi2 = min(x1 + w1, x2 + w2)
        yi2 = min(y1 + h1, y2 + h2)
        
        if xi2 <= xi1 or yi2 <= yi1:
            return 0.0
        
        intersection = (xi2 - xi1) * (yi2 - yi1)
        union = w1 * h1 + w2 * h2 - intersection
        
        return intersection / union if union > 0 else 0.0
    
    def visualize_detections(self, image: np.ndarray, detections: List[Tuple], 
                           title: str = "检测结果"):
        """
        可视化检测结果
        
        Args:
            image: 原始图像
            detections: 检测结果列表
            title: 图像标题
        """
        img_vis = image.copy()
        
        for x, y, w, h, confidence in detections:
            # 绘制边界框
            cv2.rectangle(img_vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
            # 添加置信度标签
            label = f"{confidence:.2f}"
            cv2.putText(img_vis, label, (x, y - 10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        
        plt.figure(figsize=(12, 8))
        plt.imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
        plt.title(f"{title} - 检测到 {len(detections)} 个目标")
        plt.axis('off')
        plt.show()

# 使用示例
def create_sample_data():
    """创建示例训练数据"""
    # 创建模拟的正样本 (包含目标的图像块)
    positive_samples = []
    for _ in range(100):
        # 模拟水坝结构 - 矩形形状with一些纹理
        sample = np.random.randint(50, 100, (64, 64), dtype=np.uint8)
        # 添加矩形结构
        cv2.rectangle(sample, (10, 20), (50, 40), 150, -1)
        # 添加噪声
        noise = np.random.normal(0, 10, sample.shape)
        sample = np.clip(sample + noise, 0, 255).astype(np.uint8)
        positive_samples.append(sample)
    
    # 创建模拟的负样本 (背景图像块)
    negative_samples = []
    for _ in range(200):
        # 随机背景纹理
        sample = np.random.randint(0, 50, (64, 64), dtype=np.uint8)
        # 添加随机纹理
        noise = np.random.normal(0, 15, sample.shape)
        sample = np.clip(sample + noise, 0, 255).astype(np.uint8)
        negative_samples.append(sample)
    
    return positive_samples, negative_samples

# 完整使用示例
if __name__ == "__main__":
    # 1. 创建检测器
    detector = RandomForestObjectDetector(window_size=(64, 64), step_size=32)
    
    # 2. 准备训练数据
    print("创建示例数据...")
    positive_samples, negative_samples = create_sample_data()
    
    # 3. 训练模型
    detector.train(positive_samples, negative_samples)
    
    # 4. 创建测试图像
    test_image = np.random.randint(0, 50, (300, 400), dtype=np.uint8)
    # 在测试图像中放置几个目标
    cv2.rectangle(test_image, (50, 50), (114, 114), 150, -1)
    cv2.rectangle(test_image, (200, 150), (264, 214), 150, -1)
    
    # 5. 进行检测
    print("进行目标检测...")
    detections = detector.detect(test_image, confidence_threshold=0.6)
    
    # 6. 应用非极大值抑制
    filtered_detections = detector.non_max_suppression(detections, overlap_threshold=0.3)
    
    print(f"原始检测数量: {len(detections)}")
    print(f"NMS后检测数量: {len(filtered_detections)}")
    
    # 7. 可视化结果
    if len(filtered_detections) > 0:
        detector.visualize_detections(cv2.cvtColor(test_image, cv2.COLOR_GRAY2BGR), 
                                    filtered_detections)
    else:
        print("未检测到目标")