图像处理基础概念与常用操作

文章目录

- 图像处理基础概念与常用操作
- - 一、数字图像基础
  - - [1.1 什么是数字图像？](#1.1 什么是数字图像？)
    - [1.2 图像的基本属性](#1.2 图像的基本属性)
  - 二、图像的读取、显示与保存
  - - [2.1 使用OpenCV处理图像](#2.1 使用OpenCV处理图像)
    - [2.2 使用PIL/Pillow处理图像](#2.2 使用PIL/Pillow处理图像)
  - 三、图像基本操作
  - - [3.1 图像裁剪、缩放、旋转](#3.1 图像裁剪、缩放、旋转)
    - [3.2 图像颜色空间转换](#3.2 图像颜色空间转换)
  - 四、图像增强技术
  - - [4.1 亮度与对比度调整](#4.1 亮度与对比度调整)
    - [4.2 直方图均衡化](#4.2 直方图均衡化)
  - 五、图像滤波与噪声处理
  - - [5.1 噪声类型](#5.1 噪声类型)
    - [5.2 图像滤波](#5.2 图像滤波)
  - 六、图像边缘检测
  - - [6.1 Sobel算子](#6.1 Sobel算子)
  - 七、图像形态学操作
  - 八、图像变换
  - 九、完整实战案例
  - 十、总结与最佳实践

图像处理基础概念与常用操作

图像处理是计算机视觉的基础，也是AI技术的重要应用领域。本文将带你从零开始学习图像处理的核心概念，掌握常用的图像操作技术，为后续的深度学习和计算机视觉应用打下坚实基础。

一、数字图像基础

1.1 什么是数字图像？

数字图像是由像素组成的二维矩阵，每个像素代表图像中的一个点，包含颜色和亮度信息。

python 复制代码

import numpy as np
import matplotlib.pyplot as plt

# 创建一个简单的灰度图像
# 5x5的灰度图像，值范围0-255
gray_image = np.array([
    [0, 50, 100, 150, 200],
    [50, 100, 150, 200, 255],
    [100, 150, 200, 255, 200],
    [150, 200, 255, 200, 150],
    [200, 255, 200, 150, 100]
], dtype=np.uint8)

print("灰度图像示例:")
print(gray_image)
print(f"图像尺寸: {gray_image.shape}")
print(f"数据类型: {gray_image.dtype}")
print(f"像素值范围: {gray_image.min()}-{gray_image.max()}")

# 创建一个简单的彩色图像（RGB）
# 3x3的RGB图像
color_image = np.array([
    [[255, 0, 0], [0, 255, 0], [0, 0, 255]],      # 第一行：红、绿、蓝
    [[255, 255, 0], [0, 255, 255], [255, 0, 255]], # 第二行：黄、青、品红
    [[255, 255, 255], [128, 128, 128], [0, 0, 0]]   # 第三行：白、灰、黑
], dtype=np.uint8)

print("\n彩色图像示例:")
print(f"图像尺寸: {color_image.shape}")  # (高度, 宽度, 通道数)
print(f"通道数: {color_image.shape[2]}")
print(f"像素(0,0)的RGB值: {color_image[0,0]}")  # 红色

1.2 图像的基本属性

python 复制代码

class ImageProperties:
    """
    图像属性类
    """
    
    def __init__(self, image):
        self.image = image
    
    @property
    def shape(self):
        """图像形状"""
        return self.image.shape
    
    @property
    def height(self):
        """图像高度"""
        return self.image.shape[0]
    
    @property
    def width(self):
        """图像宽度"""
        return self.image.shape[1]
    
    @property
    def channels(self):
        """通道数"""
        if len(self.image.shape) == 2:
            return 1  # 灰度图
        else:
            return self.image.shape[2]
    
    @property
    def dtype(self):
        """数据类型"""
        return self.image.dtype
    
    @property
    def size(self):
        """图像大小（像素总数）"""
        return self.image.size
    
    @property
    def min_value(self):
        """最小值"""
        return self.image.min()
    
    @property
    def max_value(self):
        """最大值"""
        return self.image.max()
    
    def get_info(self):
        """获取图像完整信息"""
        info = {
            '尺寸': f"{self.width}x{self.height}",
            '通道数': self.channels,
            '数据类型': str(self.dtype),
            '像素总数': self.size,
            '值范围': f"{self.min_value}-{self.max_value}"
        }
        return info


# 测试
props = ImageProperties(color_image)
info = props.get_info()

print("\n图像属性信息:")
print("=" * 50)
for key, value in info.items():
    print(f"{key}: {value}")

print("\n图像类型说明:")
print("-" * 50)
print("灰度图像: 单通道，每个像素0-255（亮度）")
print("彩色图像: 三通道（RGB），每个通道0-255")
print("RGBA图像: 四通道，额外Alpha通道表示透明度")

二、图像的读取、显示与保存

2.1 使用OpenCV处理图像

python 复制代码

import cv2

# 注意：实际运行需要安装opencv-python
# pip install opencv-python

print("OpenCV图像处理:")
print("=" * 60)

# 读取图像
# cv2.imread(文件路径, 读取模式)
# 读取模式: cv2.IMREAD_COLOR(1-彩色), cv2.IMREAD_GRAYSCALE(0-灰度), cv2.IMREAD_UNCHANGED(-1-保持原样)

print("\n读取图像:")
print("image = cv2.imread('image.jpg')")
print("gray_image = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)")

# 显示图像
print("\n显示图像:")
print("cv2.imshow('Window Name', image)")
print("cv2.waitKey(0)  # 等待按键")
print("cv2.destroyAllWindows()  # 关闭窗口")

# 保存图像
print("\n保存图像:")
print("cv2.imwrite('output.jpg', image)")
print("cv2.imwrite('output.png', image)  # PNG格式支持透明度")

# 图像通道顺序说明
print("\n重要提示:")
print("- OpenCV使用BGR格式（蓝绿红）")
print("- Matplotlib使用RGB格式（红绿蓝）")
print("- 转换: cv2.cvtColor(image, cv2.COLOR_BGR2RGB)")

# 创建模拟图像用于演示
test_image = np.zeros((100, 100, 3), dtype=np.uint8)

# 绘制一些内容
cv2.rectangle(test_image, (10, 10), (90, 90), (255, 0, 0), 2)  # 蓝色矩形
cv2.circle(test_image, (50, 50), 20, (0, 255, 0), -1)  # 绿色填充圆

print(f"\n创建测试图像: {test_image.shape}")
print(f"图像中心像素BGR值: {test_image[50, 50]}")

# BGR转RGB
rgb_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
print(f"转换后RGB值: {rgb_image[50, 50]}")

2.2 使用PIL/Pillow处理图像

python 复制代码

from PIL import Image

print("\n\nPillow图像处理:")
print("=" * 60)

# 创建图像
# 创建一个纯色图像
pil_image = Image.new('RGB', (200, 200), color='red')
print(f"创建图像: {pil_image.size}")

# 转换为NumPy数组
pil_array = np.array(pil_image)
print(f"NumPy数组形状: {pil_array.shape}")

# 从NumPy数组创建图像
np_image = np.zeros((100, 100, 3), dtype=np.uint8)
np_image[:, :] = [255, 0, 0]  # 填充红色
img_from_array = Image.fromarray(np_image)

print("\nPillow常用操作:")
print("from PIL import Image")
print("img = Image.open('image.jpg')  # 打开图像")
print("img.resize((width, height))    # 调整大小")
print("img.rotate(45)                  # 旋转")
print("img.crop((left, top, right, bottom))  # 裁剪")
print("img.save('output.jpg')          # 保存")

# 图像模式
print("\n图像模式:")
modes = {
    'L': '灰度图像 (8-bit像素)',
    'RGB': '真彩色图像 (3x8-bit像素)',
    'RGBA': '带透明度的真彩色 (4x8-bit像素)',
    'CMYK': '印刷四色模式 (4x8-bit像素)',
    '1': '二值图像 (1-bit像素)'
}

for mode, desc in modes.items():
    print(f"  {mode}: {desc}")

三、图像基本操作

3.1 图像裁剪、缩放、旋转

python 复制代码

class ImageOperations:
    """
    图像操作类
    """
    
    @staticmethod
    def crop(image, x, y, width, height):
        """
        裁剪图像
        x, y: 起始坐标
        width, height: 裁剪区域大小
        """
        return image[y:y+height, x:x+width]
    
    @staticmethod
    def resize(image, new_width, new_height):
        """
        缩放图像（最近邻插值）
        """
        h, w = image.shape[:2]
        
        # 创建新图像
        if len(image.shape) == 2:
            resized = np.zeros((new_height, new_width), dtype=image.dtype)
        else:
            resized = np.zeros((new_height, new_width, image.shape[2]), dtype=image.dtype)
        
        # 计算缩放比例
        scale_x = w / new_width
        scale_y = h / new_height
        
        # 最近邻插值
        for i in range(new_height):
            for j in range(new_width):
                src_y = int(i * scale_y)
                src_x = int(j * scale_x)
                resized[i, j] = image[src_y, src_x]
        
        return resized
    
    @staticmethod
    def rotate(image, angle):
        """
        旋转图像（简化版，仅支持90度倍数）
        angle: 旋转角度（90, 180, 270, 360）
        """
        k = angle // 90
        return np.rot90(image, k)
    
    @staticmethod
    def flip(image, direction='horizontal'):
        """
        翻转图像
        direction: 'horizontal'或'vertical'
        """
        if direction == 'horizontal':
            return np.fliplr(image)
        else:
            return np.flipud(image)


# 创建测试图像
test_image = np.zeros((200, 200, 3), dtype=np.uint8)
# 绘制一些标记
test_image[50:150, 50:150] = [255, 255, 0]  # 黄色方块
test_image[70:130, 70:130] = [0, 255, 0]    # 绿色方块

print("图像基本操作:")
print("=" * 60)
print(f"原始图像尺寸: {test_image.shape}")

# 裁剪
cropped = ImageOperations.crop(test_image, 50, 50, 100, 100)
print(f"\n裁剪后尺寸: {cropped.shape}")

# 缩放
resized = ImageOperations.resize(test_image, 100, 100)
print(f"缩放后尺寸: {resized.shape}")

# 旋转
rotated = ImageOperations.rotate(test_image, 90)
print(f"旋转90度后尺寸: {rotated.shape}")

# 翻转
flipped_h = ImageOperations.flip(test_image, 'horizontal')
print(f"水平翻转完成")

# 使用OpenCV的高级功能
print("\n\n使用OpenCV的图像操作:")
print("=" * 60)
print("# 缩放")
print("resized = cv2.resize(image, (new_width, new_height))")
print("resized = cv2.resize(image, None, fx=0.5, fy=0.5)  # 按比例缩放")

print("\n# 旋转")
print("rotated = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)")
print("rotated = cv2.rotate(image, cv2.ROTATE_180)")

print("\n# 翻转")
print("flipped = cv2.flip(image, 1)  # 水平翻转")
print("flipped = cv2.flip(image, 0)  # 垂直翻转")

print("\n# 裁剪")
print("cropped = image[y:y+h, x:x+w]")

3.2 图像颜色空间转换

python 复制代码

# 颜色空间转换演示
print("颜色空间转换:")
print("=" * 60)

# 创建测试图像
test_rgb = np.array([
    [[255, 0, 0], [0, 255, 0], [0, 0, 255]],
    [[255, 255, 0], [0, 255, 255], [255, 0, 255]],
    [[128, 128, 128], [64, 64, 64], [192, 192, 192]]
], dtype=np.uint8)

print("RGB颜色空间:")
print("Red:   (255, 0, 0)")
print("Green: (0, 255, 0)")
print("Blue:  (0, 0, 255)")

print("\n常用颜色空间:")
color_spaces = {
    'RGB': '红绿蓝，显示设备常用',
    'BGR': '蓝绿红，OpenCV默认',
    'HSV': '色调、饱和度、亮度，适合颜色分割',
    'Lab': '亮度、a(绿-红)、b(蓝-黄)，接近人眼感知',
    'Grayscale': '灰度，单通道',
    'YCrCb': '亮度、色度，视频压缩常用'
}

for name, desc in color_spaces.items():
    print(f"  {name}: {desc}")

# RGB转灰度
def rgb_to_grayscale(rgb):
    """
    RGB转灰度
    Gray = 0.299*R + 0.587*G + 0.114*B
    """
    if len(rgb.shape) == 2:
        return rgb
    
    gray = 0.299 * rgb[:, :, 0] + 0.587 * rgb[:, :, 1] + 0.114 * rgb[:, :, 2]
    return gray.astype(np.uint8)

gray_image = rgb_to_grayscale(test_rgb)
print(f"\nRGB转灰度: {test_rgb.shape} -> {gray_image.shape}")

# RGB转HSV
def rgb_to_hsv(rgb):
    """
    简化的RGB转HSV（概念演示）
    H: 色调 (0-360)
    S: 饱和度 (0-100)
    V: 亮度 (0-100)
    """
    # 归一化到[0,1]
    normalized = rgb.astype(float) / 255.0
    
    r, g, b = normalized[:, :, 0], normalized[:, :, 1], normalized[:, :, 2]
    
    # 计算V
    v = np.max(normalized, axis=2)
    
    # 计算S
    min_rgb = np.min(normalized, axis=2)
    s = np.zeros_like(v)
    v_mask = v > 0
    s[v_mask] = (v[v_mask] - min_rgb[v_mask]) / v[v_mask]
    
    # 计算H
    h = np.zeros_like(v)
    max_rgb = np.max(normalized, axis=2)
    
    # 避免除零
    max_rgb[max_rgb == 0] = 1
    
    delta = max_rgb - min_rgb
    
    # R通道最大
    mask_r = (normalized[:, :, 0] == max_rgb)
    h[mask_r] = 60 * ((normalized[:, :, 1][mask_r] - normalized[:, :, 2][mask_r]) / delta[mask_r] % 6)
    
    # G通道最大
    mask_g = (normalized[:, :, 1] == max_rgb)
    h[mask_g] = 60 * ((normalized[:, :, 2][mask_g] - normalized[:, :, 0][mask_g]) / delta[mask_g] + 2)
    
    # B通道最大
    mask_b = (normalized[:, :, 2] == max_rgb)
    h[mask_b] = 60 * ((normalized[:, :, 0][mask_b] - normalized[:, :, 1][mask_b]) / delta[mask_b] + 4)
    
    h = h.astype(np.uint8)
    s = (s * 100).astype(np.uint8)
    v = (v * 100).astype(np.uint8)
    
    return np.stack([h, s, v], axis=2)

# OpenCV颜色空间转换
print("\n\nOpenCV颜色空间转换:")
print("=" * 60)
print("# RGB转灰度")
print("gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)")

print("\n# BGR转RGB")
print("rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)")

print("\n# RGB转HSV")
print("hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)")

print("\n# RGB转Lab")
print("lab = cv2.cvtColor(image, cv2.COLOR_RGB2Lab)")

print("\n# RGB转YCrCb")
print("ycrcb = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)")

四、图像增强技术

4.1 亮度与对比度调整

python 复制代码

class ImageEnhancement:
    """
    图像增强类
    """
    
    @staticmethod
    def adjust_brightness(image, factor):
        """
        调整亮度
        factor: 亮度因子，>1变亮，<1变暗
        """
        enhanced = image.astype(float) * factor
        enhanced = np.clip(enhanced, 0, 255).astype(np.uint8)
        return enhanced
    
    @staticmethod
    def adjust_contrast(image, factor):
        """
        调整对比度
        factor: 对比度因子，>1增强，<1减弱
        """
        # 计算平均值
        mean = np.mean(image)
        
        # 调整对比度
        enhanced = (image.astype(float) - mean) * factor + mean
        enhanced = np.clip(enhanced, 0, 255).astype(np.uint8)
        return enhanced
    
    @staticmethod
    def gamma_correction(image, gamma):
        """
        伽马校正
        gamma: 伽马值，>1变暗，<1变亮
        """
        # 归一化
        normalized = image.astype(float) / 255.0
        
        # 伽马变换
        corrected = np.power(normalized, gamma)
        
        # 反归一化
        corrected = (corrected * 255).astype(np.uint8)
        return corrected


# 创建测试图像
test_image = np.random.randint(50, 200, (100, 100, 3), dtype=np.uint8)

print("图像增强:")
print("=" * 60)
print(f"原始图像统计:")
print(f"  平均值: {test_image.mean():.2f}")
print(f"  标准差: {test_image.std():.2f}")

# 调整亮度
brightened = ImageEnhancement.adjust_brightness(test_image, 1.5)
print(f"\n调亮后 (factor=1.5):")
print(f"  平均值: {brightened.mean():.2f}")
print(f"  标准差: {brightened.std():.2f}")

darkened = ImageEnhancement.adjust_brightness(test_image, 0.7)
print(f"\n调暗后 (factor=0.7):")
print(f"  平均值: {darkened.mean():.2f}")
print(f"  标准差: {darkened.std():.2f}")

# 调整对比度
high_contrast = ImageEnhancement.adjust_contrast(test_image, 1.5)
print(f"\n高对比度 (factor=1.5):")
print(f"  平均值: {high_contrast.mean():.2f}")
print(f"  标准差: {high_contrast.std():.2f}")

# 伽马校正
gamma_corrected = ImageEnhancement.gamma_correction(test_image, 0.7)
print(f"\n伽马校正 (gamma=0.7):")
print(f"  平均值: {gamma_corrected.mean():.2f}")
print(f"  标准差: {gamma_corrected.std():.2f}")

# OpenCV的亮度对比度调整
print("\n\nOpenCV亮度对比度调整:")
print("=" * 60)
print("# 使用公式: new_image = alpha * image + beta")
print("# alpha: 对比度控制 (1.0-3.0)")
print("# beta: 亮度控制 (0-100)")
print()
print("alpha = 1.5  # 增强对比度")
print("beta = 50    # 增加亮度")
print("enhanced = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)")

4.2 直方图均衡化

python 复制代码

def calculate_histogram(image):
    """
    计算灰度图像的直方图
    """
    if len(image.shape) == 3:
        image = rgb_to_grayscale(image)
    
    hist, bins = np.histogram(image.flatten(), bins=256, range=[0, 256])
    return hist, bins

def histogram_equalization(image):
    """
    直方图均衡化
    """
    if len(image.shape) == 3:
        image = rgb_to_grayscale(image)
    
    # 计算直方图
    hist, _ = np.histogram(image.flatten(), bins=256, range=[0, 256])
    
    # 计算累积分布函数（CDF）
    cdf = hist.cumsum()
    
    # 归一化CDF
    cdf_normalized = cdf * 255 / cdf[-1]
    
    # 应用均衡化
    equalized = cdf_normalized[image]
    
    return equalized.astype(np.uint8)


# 创建测试图像（低对比度）
low_contrast_image = np.ones((100, 100), dtype=np.uint8) * 100
low_contrast_image[30:70, 30:70] = 150

print("直方图均衡化:")
print("=" * 60)
print(f"原始图像统计:")
print(f"  平均值: {low_contrast_image.mean():.2f}")
print(f"  标准差: {low_contrast_image.std():.2f}")

# 直方图均衡化
equalized = histogram_equalization(low_contrast_image)

print(f"\n均衡化后统计:")
print(f"  平均值: {equalized.mean():.2f}")
print(f"  标准差: {equalized.std():.2f}")

# OpenCV的直方图均衡化
print("\n\nOpenCV直方图均衡化:")
print("=" * 60)
print("# 灰度图像均衡化")
print("equalized = cv2.equalizeHist(gray_image)")

print("\n# 彩色图像均衡化（转换到YCrCb空间）")
print("ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)")
print("ycrcb[:,:,0] = cv2.equalizeHist(ycrcb[:,:,0])  # 只均衡化Y通道")
print("equalized = cv2.cvtColor(ycrcb, cv2.COLOR_YCrCb2BGR)")

print("\n# CLAHE（对比度受限的自适应直方图均衡化）")
print("clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))")
print("equalized = clahe.apply(gray_image)")

五、图像滤波与噪声处理

5.1 噪声类型

python 复制代码

def add_noise(image, noise_type='gaussian', intensity=0.1):
    """
    添加噪声
    noise_type: 'gaussian', 'salt_pepper', 'speckle'
    intensity: 噪声强度
    """
    noisy = image.copy().astype(float)
    
    if noise_type == 'gaussian':
        # 高斯噪声
        noise = np.random.normal(0, intensity * 255, image.shape)
        noisy = noisy + noise
    
    elif noise_type == 'salt_pepper':
        # 椒盐噪声
        mask = np.random.random(image.shape) < intensity
        noisy[mask] = 255 if np.random.random() > 0.5 else 0
    
    elif noise_type == 'speckle':
        # 斑点噪声
        noise = np.random.randn(*image.shape) * intensity
        noisy = noisy + noisy * noise
    
    return np.clip(noisy, 0, 255).astype(np.uint8)


# 创建测试图像
clean_image = np.zeros((100, 100), dtype=np.uint8)
clean_image[30:70, 30:70] = 200

print("图像噪声:")
print("=" * 60)
print("常见噪声类型:")
print("-" * 60)
print("1. 高斯噪声 (Gaussian Noise)")
print("   - 随机分布的亮度变化")
print("   - 符合正态分布")
print("   - 常见于传感器噪声")

print("\n2. 椒盐噪声 (Salt & Pepper Noise)")
print("   - 随机的白色和黑色像素")
print("   - 常见于传输错误")

print("\n3. 斑点噪声 (Speckle Noise)")
print("   - 乘性噪声")
print("   - 常见于医学影像")

# 添加不同类型噪声
gaussian_noise = add_noise(clean_image, 'gaussian', 0.05)
salt_pepper_noise = add_noise(clean_image, 'salt_pepper', 0.02)

print(f"\n原始图像平均值: {clean_image.mean():.2f}")
print(f"高斯噪声图像平均值: {gaussian_noise.mean():.2f}")
print(f"椒盐噪声图像平均值: {salt_pepper_noise.mean():.2f}")

5.2 图像滤波

python 复制代码

class ImageFilters:
    """
    图像滤波器类
    """
    
    @staticmethod
    def mean_filter(image, kernel_size=3):
        """
        均值滤波（平滑滤波）
        """
        if len(image.shape) == 3:
            # 对每个通道分别处理
            filtered = np.zeros_like(image)
            for c in range(image.shape[2]):
                filtered[:, :, c] = ImageFilters.mean_filter(image[:, :, c], kernel_size)
            return filtered
        
        h, w = image.shape
        filtered = np.zeros_like(image)
        
        # 创建均值核
        kernel = np.ones((kernel_size, kernel_size)) / (kernel_size * kernel_size)
        
        # 卷积
        for i in range(kernel_size//2, h - kernel_size//2):
            for j in range(kernel_size//2, w - kernel_size//2):
                window = image[i-kernel_size//2:i+kernel_size//2+1, 
                              j-kernel_size//2:j+kernel_size//2+1]
                filtered[i, j] = np.sum(window * kernel)
        
        return filtered
    
    @staticmethod
    def gaussian_filter(image, kernel_size=3, sigma=1.0):
        """
        高斯滤波
        """
        # 创建高斯核
        ax = np.linspace(-(kernel_size//2), kernel_size//2, kernel_size)
        xx, yy = np.meshgrid(ax, ax)
        kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
        kernel = kernel / np.sum(kernel)
        
        if len(image.shape) == 3:
            filtered = np.zeros_like(image)
            for c in range(image.shape[2]):
                for i in range(kernel_size//2, image.shape[0] - kernel_size//2):
                    for j in range(kernel_size//2, image.shape[1] - kernel_size//2):
                        window = image[i-kernel_size//2:i+kernel_size//2+1, 
                                      j-kernel_size//2:j+kernel_size//2+1, c]
                        filtered[i, j, c] = np.sum(window * kernel)
            return filtered
        
        h, w = image.shape
        filtered = np.zeros_like(image)
        
        for i in range(kernel_size//2, h - kernel_size//2):
            for j in range(kernel_size//2, w - kernel_size//2):
                window = image[i-kernel_size//2:i+kernel_size//2+1, 
                              j-kernel_size//2:j+kernel_size//2+1]
                filtered[i, j] = np.sum(window * kernel)
        
        return filtered
    
    @staticmethod
    def median_filter(image, kernel_size=3):
        """
        中值滤波（对椒盐噪声效果好）
        """
        if len(image.shape) == 3:
            filtered = np.zeros_like(image)
            for c in range(image.shape[2]):
                filtered[:, :, c] = ImageFilters.median_filter(image[:, :, c], kernel_size)
            return filtered
        
        h, w = image.shape
        filtered = np.zeros_like(image)
        
        for i in range(kernel_size//2, h - kernel_size//2):
            for j in range(kernel_size//2, w - kernel_size//2):
                window = image[i-kernel_size//2:i+kernel_size//2+1, 
                              j-kernel_size//2:j+kernel_size//2+1]
                filtered[i, j] = np.median(window)
        
        return filtered


# 测试滤波器
print("\n\n图像滤波:")
print("=" * 60)

# 添加噪声
noisy_image = add_noise(clean_image, 'gaussian', 0.05)

print(f"噪声图像平均值: {noisy_image.mean():.2f}")

# 均值滤波
mean_filtered = ImageFilters.mean_filter(noisy_image, kernel_size=3)
print(f"均值滤波后: {mean_filtered.mean():.2f}")

# 高斯滤波
gaussian_filtered = ImageFilters.gaussian_filter(noisy_image, kernel_size=3, sigma=1.0)
print(f"高斯滤波后: {gaussian_filtered.mean():.2f}")

# 中值滤波
sp_noisy_image = add_noise(clean_image, 'salt_pepper', 0.02)
median_filtered = ImageFilters.median_filter(sp_noisy_image, kernel_size=3)
print(f"中值滤波后: {median_filtered.mean():.2f}")

# OpenCV滤波函数
print("\n\nOpenCV滤波函数:")
print("=" * 60)
print("# 均值滤波")
print("blurred = cv2.blur(image, (5, 5))")

print("\n# 高斯滤波")
print("blurred = cv2.GaussianBlur(image, (5, 5), 1.0)")

print("\n# 中值滤波")
print("blurred = cv2.medianBlur(image, 5)")

print("\n# 双边滤波（保边去噪）")
print("blurred = cv2.bilateralFilter(image, 9, 75, 75)")

print("\n# 非局部均值去噪")
print("denoised = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)")

六、图像边缘检测

6.1 Sobel算子

python 复制代码

class EdgeDetection:
    """
    边缘检测类
    """
    
    @staticmethod
    def sobel(image):
        """
        Sobel边缘检测
        """
        if len(image.shape) == 3:
            image = rgb_to_grayscale(image)
        
        # Sobel算子
        sobel_x = np.array([[-1, 0, 1],
                           [-2, 0, 2],
                           [-1, 0, 1]])
        
        sobel_y = np.array([[-1, -2, -1],
                           [0, 0, 0],
                           [1, 2, 1]])
        
        h, w = image.shape
        grad_x = np.zeros_like(image, dtype=float)
        grad_y = np.zeros_like(image, dtype=float)
        
        # 卷积
        for i in range(1, h-1):
            for j in range(1, w-1):
                window = image[i-1:i+2, j-1:j+2]
                grad_x[i, j] = np.sum(window * sobel_x)
                grad_y[i, j] = np.sum(window * sobel_y)
        
        # 计算梯度幅值和方向
        magnitude = np.sqrt(grad_x**2 + grad_y**2)
        direction = np.arctan2(grad_y, grad_x)
        
        return magnitude.astype(np.uint8), direction
    
    @staticmethod
    def laplacian(image):
        """
        拉普拉斯边缘检测
        """
        if len(image.shape) == 3:
            image = rgb_to_grayscale(image)
        
        # 拉普拉斯算子
        laplacian_kernel = np.array([[0, 1, 0],
                                    [1, -4, 1],
                                    [0, 1, 0]])
        
        h, w = image.shape
        edges = np.zeros_like(image, dtype=float)
        
        for i in range(1, h-1):
            for j in range(1, w-1):
                window = image[i-1:i+2, j-1:j+2]
                edges[i, j] = np.sum(window * laplacian_kernel)
        
        return np.abs(edges).astype(np.uint8)


# 创建测试图像（包含边缘）
test_image = np.zeros((100, 100), dtype=np.uint8)
test_image[30:70, 30:70] = 255

print("边缘检测:")
print("=" * 60)

# Sobel边缘检测
sobel_edges, sobel_direction = EdgeDetection.sobel(test_image)
print(f"Sobel边缘检测完成")
print(f"边缘像素数: {np.sum(sobel_edges > 50)}")

# 拉普拉斯边缘检测
laplacian_edges = EdgeDetection.laplacian(test_image)
print(f"\n拉普拉斯边缘检测完成")
print(f"边缘像素数: {np.sum(laplacian_edges > 50)}")

# OpenCV边缘检测
print("\n\nOpenCV边缘检测:")
print("=" * 60)
print("# Sobel边缘检测")
print("grad_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)")
print("grad_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)")
print("magnitude = np.sqrt(grad_x**2 + grad_y**2)")

print("\n# Canny边缘检测")
print("edges = cv2.Canny(image, threshold1, threshold2)")
print("edges = cv2.Canny(image, 100, 200)  # 常用阈值")

print("\n# 拉普拉斯边缘检测")
print("edges = cv2.Laplacian(image, cv2.CV_64F)")

print("\n# Canny边缘检测步骤:")
print("1. 高斯滤波去噪")
print("2. 计算梯度幅值和方向")
print("3. 非极大值抑制")
print("4. 双阈值检测和边缘连接")

七、图像形态学操作

python 复制代码

class Morphology:
    """
    图像形态学操作
    """
    
    @staticmethod
    def dilate(image, kernel_size=3):
        """
        膨胀操作
        """
        kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
        
        if len(image.shape) == 3:
            filtered = np.zeros_like(image)
            for c in range(image.shape[2]):
                filtered[:, :, c] = Morphology.dilate(image[:, :, c], kernel_size)
            return filtered
        
        h, w = image.shape
        dilated = np.zeros_like(image)
        
        pad = kernel_size // 2
        padded = np.pad(image, pad, mode='constant')
        
        for i in range(h):
            for j in range(w):
                window = padded[i:i+kernel_size, j:j+kernel_size]
                dilated[i, j] = np.max(window * kernel)
        
        return dilated
    
    @staticmethod
    def erode(image, kernel_size=3):
        """
        腐蚀操作
        """
        kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
        
        if len(image.shape) == 3:
            filtered = np.zeros_like(image)
            for c in range(image.shape[2]):
                filtered[:, :, c] = Morphology.erode(image[:, :, c], kernel_size)
            return filtered
        
        h, w = image.shape
        eroded = np.zeros_like(image)
        
        pad = kernel_size // 2
        padded = np.pad(image, pad, mode='constant')
        
        for i in range(h):
            for j in range(w):
                window = padded[i:i+kernel_size, j:j+kernel_size]
                eroded[i, j] = np.min(window * kernel)
        
        return eroded
    
    @staticmethod
    def opening(image, kernel_size=3):
        """
        开运算：先腐蚀后膨胀
        用于去除小的噪声点
        """
        eroded = Morphology.erode(image, kernel_size)
        opened = Morphology.dilate(eroded, kernel_size)
        return opened
    
    @staticmethod
    def closing(image, kernel_size=3):
        """
        闭运算：先膨胀后腐蚀
        用于填充小孔
        """
        dilated = Morphology.dilate(image, kernel_size)
        closed = Morphology.erode(dilated, kernel_size)
        return closed


# 创建测试图像
binary_image = np.zeros((100, 100), dtype=np.uint8)
binary_image[30:40, 30:70] = 255  # 横条
binary_image[30:70, 60:70] = 255  # 竖条
# 添加噪声
binary_image[50:55, 50:55] = 255  # 孤立点

print("图像形态学操作:")
print("=" * 60)
print(f"原始图像白色像素数: {np.sum(binary_image > 0)}")

# 膨胀
dilated = Morphology.dilate(binary_image, kernel_size=3)
print(f"膨胀后白色像素数: {np.sum(dilated > 0)}")

# 腐蚀
eroded = Morphology.erode(binary_image, kernel_size=3)
print(f"腐蚀后白色像素数: {np.sum(eroded > 0)}")

# 开运算
opened = Morphology.opening(binary_image, kernel_size=5)
print(f"开运算后白色像素数: {np.sum(opened > 0)}")

# 闭运算
closed = Morphology.closing(binary_image, kernel_size=5)
print(f"闭运算后白色像素数: {np.sum(closed > 0)}")

# OpenCV形态学操作
print("\n\nOpenCV形态学操作:")
print("=" * 60)
print("# 创建结构元素")
print("kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))  # 矩形")
print("kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))  # 椭圆")
print("kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (5, 5))  # 十字形")

print("\n# 膨胀")
print("dilated = cv2.dilate(image, kernel, iterations=1)")

print("\n# 腐蚀")
print("eroded = cv2.erode(image, kernel, iterations=1)")

print("\n# 开运算")
print("opened = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)")

print("\n# 闭运算")
print("closed = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)")

print("\n# 形态学梯度（膨胀-腐蚀）")
print("gradient = cv2.morphologyEx(image, cv2.MORPH_GRADIENT, kernel)")

print("\n# 顶帽（原图-开运算）")
print("tophat = cv2.morphologyEx(image, cv2.MORPH_TOPHAT, kernel)")

print("\n# 黑帽（闭运算-原图）")
print("blackhat = cv2.morphologyEx(image, cv2.MORPH_BLACKHAT, kernel)")

八、图像变换

python 复制代码

class ImageTransforms:
    """
    图像变换
    """
    
    @staticmethod
    def translate(image, dx, dy):
        """
        平移变换
        dx, dy: 平移量
        """
        h, w = image.shape[:2]
        translation_matrix = np.float32([[1, 0, dx], [0, 1, dy]])
        
        if len(image.shape) == 2:
            translated = np.zeros_like(image)
        else:
            translated = np.zeros_like(image)
        
        for i in range(h):
            for j in range(w):
                new_x = int(j + dx)
                new_y = int(i + dy)
                
                if 0 <= new_x < w and 0 <= new_y < h:
                    if len(image.shape) == 2:
                        translated[new_y, new_x] = image[i, j]
                    else:
                        translated[new_y, new_x] = image[i, j]
        
        return translated
    
    @staticmethod
    def affine_transform(image):
        """
        仿射变换（示例）
        """
        print("仿射变换包括：")
        print("- 平移")
        print("- 旋转")
        print("- 缩放")
        print("- 错切")
        print("\nOpenCV仿射变换:")
        print("# 定义三个点对")
        print("pts1 = np.float32([[50,50],[200,50],[50,200]])")
        print("pts2 = np.float32([[10,100],[200,50],[100,250]])")
        print("# 计算变换矩阵")
        print("M = cv2.getAffineTransform(pts1, pts2)")
        print("# 应用变换")
        print("result = cv2.warpAffine(image, M, (w, h))")


print("\n\n图像变换:")
print("=" * 60)

# 平移测试
test_image = np.random.randint(100, 200, (50, 50), dtype=np.uint8)
translated = ImageTransforms.translate(test_image, dx=10, dy=5)

print(f"平移变换完成")
print(f"原始中心点: (25, 25) -> 值={test_image[25, 25]}")
print(f"平移后中心点: (35, 30) -> 值={translated[30, 35] if translated.shape[0] > 30 and translated.shape[1] > 35 else '超出范围'}")

# OpenCV图像变换
print("\n\nOpenCV图像变换:")
print("=" * 60)
print("# 平移")
print("M = np.float32([[1, 0, tx], [0, 1, ty]])")
print("translated = cv2.warpAffine(image, M, (w, h))")

print("\n# 旋转")
print("M = cv2.getRotationMatrix2D(center, angle, scale)")
print("rotated = cv2.warpAffine(image, M, (w, h))")

print("\n# 缩放")
print("resized = cv2.resize(image, (new_w, new_h))")

print("\n# 透视变换")
print("pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])")
print("pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])")
print("M = cv2.getPerspectiveTransform(pts1, pts2)")
print("result = cv2.warpPerspective(image, M, (300,300))")

九、完整实战案例

python 复制代码

class ImageProcessingPipeline:
    """
    图像处理流水线
    """
    
    def __init__(self):
        self.steps = []
    
    def add_step(self, step_name, step_func):
        """
        添加处理步骤
        """
        self.steps.append((step_name, step_func))
    
    def process(self, image):
        """
        执行处理流程
        """
        result = image.copy()
        history = [('原始图像', result.copy())]
        
        for step_name, step_func in self.steps:
            result = step_func(result)
            history.append((step_name, result.copy()))
        
        return result, history


# 创建处理流水线
print("图像处理流水线:")
print("=" * 60)

# 示例：文档图像预处理
pipeline = ImageProcessingPipeline()

pipeline.add_step('添加噪声', lambda img: add_noise(img, 'gaussian', 0.05))
pipeline.add_step('高斯滤波', lambda img: ImageFilters.gaussian_filter(img, 3, 1.0))
pipeline.add_step('调整亮度', lambda img: ImageEnhancement.adjust_brightness(img, 1.1))
pipeline.add_step('调整对比度', lambda img: ImageEnhancement.adjust_contrast(img, 1.2))

# 执行流水线
original = np.random.randint(100, 200, (50, 50), dtype=np.uint8)
processed, history = pipeline.process(original)

print(f"原始图像: 平均值={original.mean():.2f}")
for step_name, result in history[1:]:
    print(f"{step_name}: 平均值={result.mean():.2f}")

# 实际应用场景
print("\n\n实际应用场景:")
print("=" * 60)

applications = [
    {
        '场景': '文档扫描预处理',
        '流程': ['灰度化', '二值化', '降噪', '倾斜校正', '裁剪']
    },
    {
        '场景': '人脸识别预处理',
        '流程': ['人脸检测', '对齐', '归一化', '光照归一化', '特征提取']
    },
    {
        '场景': '车牌识别预处理',
        '流程': ['灰度化', '边缘检测', '形态学操作', '字符分割', '归一化']
    },
    {
        '场景': '医学图像增强',
        '流程': ['去噪', '对比度增强', '直方图均衡化', '边缘锐化', '伪彩色显示']
    },
    {
        '场景': '卫星图像处理',
        '流程': ['辐射校正', '几何校正', '大气校正', '图像融合', '分类']
    }
]

for app in applications:
    print(f"\n{app['场景']}:")
    print(f"  流程: {' -> '.join(app['流程'])}")

十、总结与最佳实践

python 复制代码

# 总结要点
print("图像处理总结:")
print("=" * 80)

key_points = [
    "理解图像的基本属性：尺寸、通道、数据类型、值范围",
    "掌握颜色空间转换：RGB、BGR、HSV、灰度等",
    "熟练使用图像基本操作：裁剪、缩放、旋转、翻转",
    "理解图像增强技术：亮度、对比度、直方图均衡化",
    "掌握噪声类型和滤波方法：高斯、椒盐、均值、中值滤波",
    "学习边缘检测算法：Sobel、Canny、拉普拉斯",
    "应用形态学操作：膨胀、腐蚀、开运算、闭运算",
    "选择合适的工具：OpenCV功能强大，Pillow简单易用"
]

for i, point in enumerate(key_points, 1):
    print(f"{i}. {point}")

print("\n\n最佳实践:")
print("=" * 80)

best_practices = [
    "数据类型：始终注意图像的数据类型（uint8, float32等）",
    "值范围：处理前归一化到[0,1]，处理后转换回[0,255]",
    "边界处理：注意卷积操作时的边界填充",
    "性能优化：使用OpenCV的内置函数而非自己实现循环",
    "内存管理：大图像处理时注意内存使用",
    "可视化：处理过程中保存中间结果便于调试",
    "参数调优：滤波器核大小、阈值等参数需要根据场景调整",
    "流水线设计：将多个操作组合成流水线，提高复用性"
]

for i, practice in enumerate(best_practices, 1):
    print(f"{i}. {practice}")

print("\n\n常用OpenCV函数速查:")
print("=" * 80)

opencv_cheatsheet = [
    ("读取图像", "cv2.imread('path', flags)"),
    ("保存图像", "cv2.imwrite('path', image)"),
    ("显示图像", "cv2.imshow('name', image)"),
    ("调整大小", "cv2.resize(image, (w, h))"),
    ("颜色转换", "cv2.cvtColor(image, code)"),
    ("高斯模糊", "cv2.GaussianBlur(image, ksize, sigma)"),
    ("边缘检测", "cv2.Canny(image, thresh1, thresh2)"),
    ("阈值处理", "cv2.threshold(image, thresh, maxval, type)"),
    ("形态学操作", "cv2.morphologyEx(image, op, kernel)"),
    ("绘制图形", "cv2.rectangle/circle/line(...)"),
]

for func_name, syntax in opencv_cheatsheet:
    print(f"{func_name:<12}: {syntax}")

print("\n\n学习资源:")
print("- OpenCV官方文档: https://docs.opencv.org/")
print("- Python图像处理教程")
print("- 计算机视觉课程（CS131, CS231n）")
print("- 实践项目：图像分类、目标检测、图像分割")

print("\n\n下一步学习:")
print("1. 深入学习卷积神经网络（CNN）")
print("2. 掌握目标检测算法（YOLO、Faster R-CNN）")
print("3. 学习图像分割技术（Semantic Segmentation）")
print("4. 了解图像生成模型（GAN、Stable Diffusion）")
print("5. 实践完整的计算机视觉项目")

本文系统地介绍了图像处理的基础知识和常用操作，从数字图像的基本概念到各种高级处理技术。通过大量的代码示例和实践案例，你可以掌握图像处理的核心技能，为后续深入学习计算机视觉和深度学习打下坚实基础。记住，理论结合实践是最好的学习方式，多动手操作，深入理解每个算法的原理和应用场景！