文章目录
-
- 图像处理基础概念与常用操作
-
- 一、数字图像基础
-
- [1.1 什么是数字图像?](#1.1 什么是数字图像?)
- [1.2 图像的基本属性](#1.2 图像的基本属性)
- 二、图像的读取、显示与保存
-
- [2.1 使用OpenCV处理图像](#2.1 使用OpenCV处理图像)
- [2.2 使用PIL/Pillow处理图像](#2.2 使用PIL/Pillow处理图像)
- 三、图像基本操作
-
- [3.1 图像裁剪、缩放、旋转](#3.1 图像裁剪、缩放、旋转)
- [3.2 图像颜色空间转换](#3.2 图像颜色空间转换)
- 四、图像增强技术
-
- [4.1 亮度与对比度调整](#4.1 亮度与对比度调整)
- [4.2 直方图均衡化](#4.2 直方图均衡化)
- 五、图像滤波与噪声处理
-
- [5.1 噪声类型](#5.1 噪声类型)
- [5.2 图像滤波](#5.2 图像滤波)
- 六、图像边缘检测
-
- [6.1 Sobel算子](#6.1 Sobel算子)
- 七、图像形态学操作
- 八、图像变换
- 九、完整实战案例
- 十、总结与最佳实践
图像处理基础概念与常用操作
图像处理是计算机视觉的基础,也是AI技术的重要应用领域。本文将带你从零开始学习图像处理的核心概念,掌握常用的图像操作技术,为后续的深度学习和计算机视觉应用打下坚实基础。
一、数字图像基础
1.1 什么是数字图像?
数字图像是由像素组成的二维矩阵,每个像素代表图像中的一个点,包含颜色和亮度信息。
python
import numpy as np
import matplotlib.pyplot as plt
# 创建一个简单的灰度图像
# 5x5的灰度图像,值范围0-255
gray_image = np.array([
[0, 50, 100, 150, 200],
[50, 100, 150, 200, 255],
[100, 150, 200, 255, 200],
[150, 200, 255, 200, 150],
[200, 255, 200, 150, 100]
], dtype=np.uint8)
print("灰度图像示例:")
print(gray_image)
print(f"图像尺寸: {gray_image.shape}")
print(f"数据类型: {gray_image.dtype}")
print(f"像素值范围: {gray_image.min()}-{gray_image.max()}")
# 创建一个简单的彩色图像(RGB)
# 3x3的RGB图像
color_image = np.array([
[[255, 0, 0], [0, 255, 0], [0, 0, 255]], # 第一行:红、绿、蓝
[[255, 255, 0], [0, 255, 255], [255, 0, 255]], # 第二行:黄、青、品红
[[255, 255, 255], [128, 128, 128], [0, 0, 0]] # 第三行:白、灰、黑
], dtype=np.uint8)
print("\n彩色图像示例:")
print(f"图像尺寸: {color_image.shape}") # (高度, 宽度, 通道数)
print(f"通道数: {color_image.shape[2]}")
print(f"像素(0,0)的RGB值: {color_image[0,0]}") # 红色
1.2 图像的基本属性
python
class ImageProperties:
"""
图像属性类
"""
def __init__(self, image):
self.image = image
@property
def shape(self):
"""图像形状"""
return self.image.shape
@property
def height(self):
"""图像高度"""
return self.image.shape[0]
@property
def width(self):
"""图像宽度"""
return self.image.shape[1]
@property
def channels(self):
"""通道数"""
if len(self.image.shape) == 2:
return 1 # 灰度图
else:
return self.image.shape[2]
@property
def dtype(self):
"""数据类型"""
return self.image.dtype
@property
def size(self):
"""图像大小(像素总数)"""
return self.image.size
@property
def min_value(self):
"""最小值"""
return self.image.min()
@property
def max_value(self):
"""最大值"""
return self.image.max()
def get_info(self):
"""获取图像完整信息"""
info = {
'尺寸': f"{self.width}x{self.height}",
'通道数': self.channels,
'数据类型': str(self.dtype),
'像素总数': self.size,
'值范围': f"{self.min_value}-{self.max_value}"
}
return info
# 测试
props = ImageProperties(color_image)
info = props.get_info()
print("\n图像属性信息:")
print("=" * 50)
for key, value in info.items():
print(f"{key}: {value}")
print("\n图像类型说明:")
print("-" * 50)
print("灰度图像: 单通道,每个像素0-255(亮度)")
print("彩色图像: 三通道(RGB),每个通道0-255")
print("RGBA图像: 四通道,额外Alpha通道表示透明度")
二、图像的读取、显示与保存
2.1 使用OpenCV处理图像
python
import cv2
# 注意:实际运行需要安装opencv-python
# pip install opencv-python
print("OpenCV图像处理:")
print("=" * 60)
# 读取图像
# cv2.imread(文件路径, 读取模式)
# 读取模式: cv2.IMREAD_COLOR(1-彩色), cv2.IMREAD_GRAYSCALE(0-灰度), cv2.IMREAD_UNCHANGED(-1-保持原样)
print("\n读取图像:")
print("image = cv2.imread('image.jpg')")
print("gray_image = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)")
# 显示图像
print("\n显示图像:")
print("cv2.imshow('Window Name', image)")
print("cv2.waitKey(0) # 等待按键")
print("cv2.destroyAllWindows() # 关闭窗口")
# 保存图像
print("\n保存图像:")
print("cv2.imwrite('output.jpg', image)")
print("cv2.imwrite('output.png', image) # PNG格式支持透明度")
# 图像通道顺序说明
print("\n重要提示:")
print("- OpenCV使用BGR格式(蓝绿红)")
print("- Matplotlib使用RGB格式(红绿蓝)")
print("- 转换: cv2.cvtColor(image, cv2.COLOR_BGR2RGB)")
# 创建模拟图像用于演示
test_image = np.zeros((100, 100, 3), dtype=np.uint8)
# 绘制一些内容
cv2.rectangle(test_image, (10, 10), (90, 90), (255, 0, 0), 2) # 蓝色矩形
cv2.circle(test_image, (50, 50), 20, (0, 255, 0), -1) # 绿色填充圆
print(f"\n创建测试图像: {test_image.shape}")
print(f"图像中心像素BGR值: {test_image[50, 50]}")
# BGR转RGB
rgb_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
print(f"转换后RGB值: {rgb_image[50, 50]}")
2.2 使用PIL/Pillow处理图像
python
from PIL import Image
print("\n\nPillow图像处理:")
print("=" * 60)
# 创建图像
# 创建一个纯色图像
pil_image = Image.new('RGB', (200, 200), color='red')
print(f"创建图像: {pil_image.size}")
# 转换为NumPy数组
pil_array = np.array(pil_image)
print(f"NumPy数组形状: {pil_array.shape}")
# 从NumPy数组创建图像
np_image = np.zeros((100, 100, 3), dtype=np.uint8)
np_image[:, :] = [255, 0, 0] # 填充红色
img_from_array = Image.fromarray(np_image)
print("\nPillow常用操作:")
print("from PIL import Image")
print("img = Image.open('image.jpg') # 打开图像")
print("img.resize((width, height)) # 调整大小")
print("img.rotate(45) # 旋转")
print("img.crop((left, top, right, bottom)) # 裁剪")
print("img.save('output.jpg') # 保存")
# 图像模式
print("\n图像模式:")
modes = {
'L': '灰度图像 (8-bit像素)',
'RGB': '真彩色图像 (3x8-bit像素)',
'RGBA': '带透明度的真彩色 (4x8-bit像素)',
'CMYK': '印刷四色模式 (4x8-bit像素)',
'1': '二值图像 (1-bit像素)'
}
for mode, desc in modes.items():
print(f" {mode}: {desc}")
三、图像基本操作
3.1 图像裁剪、缩放、旋转
python
class ImageOperations:
"""
图像操作类
"""
@staticmethod
def crop(image, x, y, width, height):
"""
裁剪图像
x, y: 起始坐标
width, height: 裁剪区域大小
"""
return image[y:y+height, x:x+width]
@staticmethod
def resize(image, new_width, new_height):
"""
缩放图像(最近邻插值)
"""
h, w = image.shape[:2]
# 创建新图像
if len(image.shape) == 2:
resized = np.zeros((new_height, new_width), dtype=image.dtype)
else:
resized = np.zeros((new_height, new_width, image.shape[2]), dtype=image.dtype)
# 计算缩放比例
scale_x = w / new_width
scale_y = h / new_height
# 最近邻插值
for i in range(new_height):
for j in range(new_width):
src_y = int(i * scale_y)
src_x = int(j * scale_x)
resized[i, j] = image[src_y, src_x]
return resized
@staticmethod
def rotate(image, angle):
"""
旋转图像(简化版,仅支持90度倍数)
angle: 旋转角度(90, 180, 270, 360)
"""
k = angle // 90
return np.rot90(image, k)
@staticmethod
def flip(image, direction='horizontal'):
"""
翻转图像
direction: 'horizontal'或'vertical'
"""
if direction == 'horizontal':
return np.fliplr(image)
else:
return np.flipud(image)
# 创建测试图像
test_image = np.zeros((200, 200, 3), dtype=np.uint8)
# 绘制一些标记
test_image[50:150, 50:150] = [255, 255, 0] # 黄色方块
test_image[70:130, 70:130] = [0, 255, 0] # 绿色方块
print("图像基本操作:")
print("=" * 60)
print(f"原始图像尺寸: {test_image.shape}")
# 裁剪
cropped = ImageOperations.crop(test_image, 50, 50, 100, 100)
print(f"\n裁剪后尺寸: {cropped.shape}")
# 缩放
resized = ImageOperations.resize(test_image, 100, 100)
print(f"缩放后尺寸: {resized.shape}")
# 旋转
rotated = ImageOperations.rotate(test_image, 90)
print(f"旋转90度后尺寸: {rotated.shape}")
# 翻转
flipped_h = ImageOperations.flip(test_image, 'horizontal')
print(f"水平翻转完成")
# 使用OpenCV的高级功能
print("\n\n使用OpenCV的图像操作:")
print("=" * 60)
print("# 缩放")
print("resized = cv2.resize(image, (new_width, new_height))")
print("resized = cv2.resize(image, None, fx=0.5, fy=0.5) # 按比例缩放")
print("\n# 旋转")
print("rotated = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)")
print("rotated = cv2.rotate(image, cv2.ROTATE_180)")
print("\n# 翻转")
print("flipped = cv2.flip(image, 1) # 水平翻转")
print("flipped = cv2.flip(image, 0) # 垂直翻转")
print("\n# 裁剪")
print("cropped = image[y:y+h, x:x+w]")
3.2 图像颜色空间转换
python
# 颜色空间转换演示
print("颜色空间转换:")
print("=" * 60)
# 创建测试图像
test_rgb = np.array([
[[255, 0, 0], [0, 255, 0], [0, 0, 255]],
[[255, 255, 0], [0, 255, 255], [255, 0, 255]],
[[128, 128, 128], [64, 64, 64], [192, 192, 192]]
], dtype=np.uint8)
print("RGB颜色空间:")
print("Red: (255, 0, 0)")
print("Green: (0, 255, 0)")
print("Blue: (0, 0, 255)")
print("\n常用颜色空间:")
color_spaces = {
'RGB': '红绿蓝,显示设备常用',
'BGR': '蓝绿红,OpenCV默认',
'HSV': '色调、饱和度、亮度,适合颜色分割',
'Lab': '亮度、a(绿-红)、b(蓝-黄),接近人眼感知',
'Grayscale': '灰度,单通道',
'YCrCb': '亮度、色度,视频压缩常用'
}
for name, desc in color_spaces.items():
print(f" {name}: {desc}")
# RGB转灰度
def rgb_to_grayscale(rgb):
"""
RGB转灰度
Gray = 0.299*R + 0.587*G + 0.114*B
"""
if len(rgb.shape) == 2:
return rgb
gray = 0.299 * rgb[:, :, 0] + 0.587 * rgb[:, :, 1] + 0.114 * rgb[:, :, 2]
return gray.astype(np.uint8)
gray_image = rgb_to_grayscale(test_rgb)
print(f"\nRGB转灰度: {test_rgb.shape} -> {gray_image.shape}")
# RGB转HSV
def rgb_to_hsv(rgb):
"""
简化的RGB转HSV(概念演示)
H: 色调 (0-360)
S: 饱和度 (0-100)
V: 亮度 (0-100)
"""
# 归一化到[0,1]
normalized = rgb.astype(float) / 255.0
r, g, b = normalized[:, :, 0], normalized[:, :, 1], normalized[:, :, 2]
# 计算V
v = np.max(normalized, axis=2)
# 计算S
min_rgb = np.min(normalized, axis=2)
s = np.zeros_like(v)
v_mask = v > 0
s[v_mask] = (v[v_mask] - min_rgb[v_mask]) / v[v_mask]
# 计算H
h = np.zeros_like(v)
max_rgb = np.max(normalized, axis=2)
# 避免除零
max_rgb[max_rgb == 0] = 1
delta = max_rgb - min_rgb
# R通道最大
mask_r = (normalized[:, :, 0] == max_rgb)
h[mask_r] = 60 * ((normalized[:, :, 1][mask_r] - normalized[:, :, 2][mask_r]) / delta[mask_r] % 6)
# G通道最大
mask_g = (normalized[:, :, 1] == max_rgb)
h[mask_g] = 60 * ((normalized[:, :, 2][mask_g] - normalized[:, :, 0][mask_g]) / delta[mask_g] + 2)
# B通道最大
mask_b = (normalized[:, :, 2] == max_rgb)
h[mask_b] = 60 * ((normalized[:, :, 0][mask_b] - normalized[:, :, 1][mask_b]) / delta[mask_b] + 4)
h = h.astype(np.uint8)
s = (s * 100).astype(np.uint8)
v = (v * 100).astype(np.uint8)
return np.stack([h, s, v], axis=2)
# OpenCV颜色空间转换
print("\n\nOpenCV颜色空间转换:")
print("=" * 60)
print("# RGB转灰度")
print("gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)")
print("\n# BGR转RGB")
print("rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)")
print("\n# RGB转HSV")
print("hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)")
print("\n# RGB转Lab")
print("lab = cv2.cvtColor(image, cv2.COLOR_RGB2Lab)")
print("\n# RGB转YCrCb")
print("ycrcb = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)")
四、图像增强技术
4.1 亮度与对比度调整
python
class ImageEnhancement:
"""
图像增强类
"""
@staticmethod
def adjust_brightness(image, factor):
"""
调整亮度
factor: 亮度因子,>1变亮,<1变暗
"""
enhanced = image.astype(float) * factor
enhanced = np.clip(enhanced, 0, 255).astype(np.uint8)
return enhanced
@staticmethod
def adjust_contrast(image, factor):
"""
调整对比度
factor: 对比度因子,>1增强,<1减弱
"""
# 计算平均值
mean = np.mean(image)
# 调整对比度
enhanced = (image.astype(float) - mean) * factor + mean
enhanced = np.clip(enhanced, 0, 255).astype(np.uint8)
return enhanced
@staticmethod
def gamma_correction(image, gamma):
"""
伽马校正
gamma: 伽马值,>1变暗,<1变亮
"""
# 归一化
normalized = image.astype(float) / 255.0
# 伽马变换
corrected = np.power(normalized, gamma)
# 反归一化
corrected = (corrected * 255).astype(np.uint8)
return corrected
# 创建测试图像
test_image = np.random.randint(50, 200, (100, 100, 3), dtype=np.uint8)
print("图像增强:")
print("=" * 60)
print(f"原始图像统计:")
print(f" 平均值: {test_image.mean():.2f}")
print(f" 标准差: {test_image.std():.2f}")
# 调整亮度
brightened = ImageEnhancement.adjust_brightness(test_image, 1.5)
print(f"\n调亮后 (factor=1.5):")
print(f" 平均值: {brightened.mean():.2f}")
print(f" 标准差: {brightened.std():.2f}")
darkened = ImageEnhancement.adjust_brightness(test_image, 0.7)
print(f"\n调暗后 (factor=0.7):")
print(f" 平均值: {darkened.mean():.2f}")
print(f" 标准差: {darkened.std():.2f}")
# 调整对比度
high_contrast = ImageEnhancement.adjust_contrast(test_image, 1.5)
print(f"\n高对比度 (factor=1.5):")
print(f" 平均值: {high_contrast.mean():.2f}")
print(f" 标准差: {high_contrast.std():.2f}")
# 伽马校正
gamma_corrected = ImageEnhancement.gamma_correction(test_image, 0.7)
print(f"\n伽马校正 (gamma=0.7):")
print(f" 平均值: {gamma_corrected.mean():.2f}")
print(f" 标准差: {gamma_corrected.std():.2f}")
# OpenCV的亮度对比度调整
print("\n\nOpenCV亮度对比度调整:")
print("=" * 60)
print("# 使用公式: new_image = alpha * image + beta")
print("# alpha: 对比度控制 (1.0-3.0)")
print("# beta: 亮度控制 (0-100)")
print()
print("alpha = 1.5 # 增强对比度")
print("beta = 50 # 增加亮度")
print("enhanced = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)")
4.2 直方图均衡化
python
def calculate_histogram(image):
"""
计算灰度图像的直方图
"""
if len(image.shape) == 3:
image = rgb_to_grayscale(image)
hist, bins = np.histogram(image.flatten(), bins=256, range=[0, 256])
return hist, bins
def histogram_equalization(image):
"""
直方图均衡化
"""
if len(image.shape) == 3:
image = rgb_to_grayscale(image)
# 计算直方图
hist, _ = np.histogram(image.flatten(), bins=256, range=[0, 256])
# 计算累积分布函数(CDF)
cdf = hist.cumsum()
# 归一化CDF
cdf_normalized = cdf * 255 / cdf[-1]
# 应用均衡化
equalized = cdf_normalized[image]
return equalized.astype(np.uint8)
# 创建测试图像(低对比度)
low_contrast_image = np.ones((100, 100), dtype=np.uint8) * 100
low_contrast_image[30:70, 30:70] = 150
print("直方图均衡化:")
print("=" * 60)
print(f"原始图像统计:")
print(f" 平均值: {low_contrast_image.mean():.2f}")
print(f" 标准差: {low_contrast_image.std():.2f}")
# 直方图均衡化
equalized = histogram_equalization(low_contrast_image)
print(f"\n均衡化后统计:")
print(f" 平均值: {equalized.mean():.2f}")
print(f" 标准差: {equalized.std():.2f}")
# OpenCV的直方图均衡化
print("\n\nOpenCV直方图均衡化:")
print("=" * 60)
print("# 灰度图像均衡化")
print("equalized = cv2.equalizeHist(gray_image)")
print("\n# 彩色图像均衡化(转换到YCrCb空间)")
print("ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)")
print("ycrcb[:,:,0] = cv2.equalizeHist(ycrcb[:,:,0]) # 只均衡化Y通道")
print("equalized = cv2.cvtColor(ycrcb, cv2.COLOR_YCrCb2BGR)")
print("\n# CLAHE(对比度受限的自适应直方图均衡化)")
print("clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))")
print("equalized = clahe.apply(gray_image)")
五、图像滤波与噪声处理
5.1 噪声类型
python
def add_noise(image, noise_type='gaussian', intensity=0.1):
"""
添加噪声
noise_type: 'gaussian', 'salt_pepper', 'speckle'
intensity: 噪声强度
"""
noisy = image.copy().astype(float)
if noise_type == 'gaussian':
# 高斯噪声
noise = np.random.normal(0, intensity * 255, image.shape)
noisy = noisy + noise
elif noise_type == 'salt_pepper':
# 椒盐噪声
mask = np.random.random(image.shape) < intensity
noisy[mask] = 255 if np.random.random() > 0.5 else 0
elif noise_type == 'speckle':
# 斑点噪声
noise = np.random.randn(*image.shape) * intensity
noisy = noisy + noisy * noise
return np.clip(noisy, 0, 255).astype(np.uint8)
# 创建测试图像
clean_image = np.zeros((100, 100), dtype=np.uint8)
clean_image[30:70, 30:70] = 200
print("图像噪声:")
print("=" * 60)
print("常见噪声类型:")
print("-" * 60)
print("1. 高斯噪声 (Gaussian Noise)")
print(" - 随机分布的亮度变化")
print(" - 符合正态分布")
print(" - 常见于传感器噪声")
print("\n2. 椒盐噪声 (Salt & Pepper Noise)")
print(" - 随机的白色和黑色像素")
print(" - 常见于传输错误")
print("\n3. 斑点噪声 (Speckle Noise)")
print(" - 乘性噪声")
print(" - 常见于医学影像")
# 添加不同类型噪声
gaussian_noise = add_noise(clean_image, 'gaussian', 0.05)
salt_pepper_noise = add_noise(clean_image, 'salt_pepper', 0.02)
print(f"\n原始图像平均值: {clean_image.mean():.2f}")
print(f"高斯噪声图像平均值: {gaussian_noise.mean():.2f}")
print(f"椒盐噪声图像平均值: {salt_pepper_noise.mean():.2f}")
5.2 图像滤波
python
class ImageFilters:
"""
图像滤波器类
"""
@staticmethod
def mean_filter(image, kernel_size=3):
"""
均值滤波(平滑滤波)
"""
if len(image.shape) == 3:
# 对每个通道分别处理
filtered = np.zeros_like(image)
for c in range(image.shape[2]):
filtered[:, :, c] = ImageFilters.mean_filter(image[:, :, c], kernel_size)
return filtered
h, w = image.shape
filtered = np.zeros_like(image)
# 创建均值核
kernel = np.ones((kernel_size, kernel_size)) / (kernel_size * kernel_size)
# 卷积
for i in range(kernel_size//2, h - kernel_size//2):
for j in range(kernel_size//2, w - kernel_size//2):
window = image[i-kernel_size//2:i+kernel_size//2+1,
j-kernel_size//2:j+kernel_size//2+1]
filtered[i, j] = np.sum(window * kernel)
return filtered
@staticmethod
def gaussian_filter(image, kernel_size=3, sigma=1.0):
"""
高斯滤波
"""
# 创建高斯核
ax = np.linspace(-(kernel_size//2), kernel_size//2, kernel_size)
xx, yy = np.meshgrid(ax, ax)
kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
kernel = kernel / np.sum(kernel)
if len(image.shape) == 3:
filtered = np.zeros_like(image)
for c in range(image.shape[2]):
for i in range(kernel_size//2, image.shape[0] - kernel_size//2):
for j in range(kernel_size//2, image.shape[1] - kernel_size//2):
window = image[i-kernel_size//2:i+kernel_size//2+1,
j-kernel_size//2:j+kernel_size//2+1, c]
filtered[i, j, c] = np.sum(window * kernel)
return filtered
h, w = image.shape
filtered = np.zeros_like(image)
for i in range(kernel_size//2, h - kernel_size//2):
for j in range(kernel_size//2, w - kernel_size//2):
window = image[i-kernel_size//2:i+kernel_size//2+1,
j-kernel_size//2:j+kernel_size//2+1]
filtered[i, j] = np.sum(window * kernel)
return filtered
@staticmethod
def median_filter(image, kernel_size=3):
"""
中值滤波(对椒盐噪声效果好)
"""
if len(image.shape) == 3:
filtered = np.zeros_like(image)
for c in range(image.shape[2]):
filtered[:, :, c] = ImageFilters.median_filter(image[:, :, c], kernel_size)
return filtered
h, w = image.shape
filtered = np.zeros_like(image)
for i in range(kernel_size//2, h - kernel_size//2):
for j in range(kernel_size//2, w - kernel_size//2):
window = image[i-kernel_size//2:i+kernel_size//2+1,
j-kernel_size//2:j+kernel_size//2+1]
filtered[i, j] = np.median(window)
return filtered
# 测试滤波器
print("\n\n图像滤波:")
print("=" * 60)
# 添加噪声
noisy_image = add_noise(clean_image, 'gaussian', 0.05)
print(f"噪声图像平均值: {noisy_image.mean():.2f}")
# 均值滤波
mean_filtered = ImageFilters.mean_filter(noisy_image, kernel_size=3)
print(f"均值滤波后: {mean_filtered.mean():.2f}")
# 高斯滤波
gaussian_filtered = ImageFilters.gaussian_filter(noisy_image, kernel_size=3, sigma=1.0)
print(f"高斯滤波后: {gaussian_filtered.mean():.2f}")
# 中值滤波
sp_noisy_image = add_noise(clean_image, 'salt_pepper', 0.02)
median_filtered = ImageFilters.median_filter(sp_noisy_image, kernel_size=3)
print(f"中值滤波后: {median_filtered.mean():.2f}")
# OpenCV滤波函数
print("\n\nOpenCV滤波函数:")
print("=" * 60)
print("# 均值滤波")
print("blurred = cv2.blur(image, (5, 5))")
print("\n# 高斯滤波")
print("blurred = cv2.GaussianBlur(image, (5, 5), 1.0)")
print("\n# 中值滤波")
print("blurred = cv2.medianBlur(image, 5)")
print("\n# 双边滤波(保边去噪)")
print("blurred = cv2.bilateralFilter(image, 9, 75, 75)")
print("\n# 非局部均值去噪")
print("denoised = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)")
六、图像边缘检测
6.1 Sobel算子
python
class EdgeDetection:
"""
边缘检测类
"""
@staticmethod
def sobel(image):
"""
Sobel边缘检测
"""
if len(image.shape) == 3:
image = rgb_to_grayscale(image)
# Sobel算子
sobel_x = np.array([[-1, 0, 1],
[-2, 0, 2],
[-1, 0, 1]])
sobel_y = np.array([[-1, -2, -1],
[0, 0, 0],
[1, 2, 1]])
h, w = image.shape
grad_x = np.zeros_like(image, dtype=float)
grad_y = np.zeros_like(image, dtype=float)
# 卷积
for i in range(1, h-1):
for j in range(1, w-1):
window = image[i-1:i+2, j-1:j+2]
grad_x[i, j] = np.sum(window * sobel_x)
grad_y[i, j] = np.sum(window * sobel_y)
# 计算梯度幅值和方向
magnitude = np.sqrt(grad_x**2 + grad_y**2)
direction = np.arctan2(grad_y, grad_x)
return magnitude.astype(np.uint8), direction
@staticmethod
def laplacian(image):
"""
拉普拉斯边缘检测
"""
if len(image.shape) == 3:
image = rgb_to_grayscale(image)
# 拉普拉斯算子
laplacian_kernel = np.array([[0, 1, 0],
[1, -4, 1],
[0, 1, 0]])
h, w = image.shape
edges = np.zeros_like(image, dtype=float)
for i in range(1, h-1):
for j in range(1, w-1):
window = image[i-1:i+2, j-1:j+2]
edges[i, j] = np.sum(window * laplacian_kernel)
return np.abs(edges).astype(np.uint8)
# 创建测试图像(包含边缘)
test_image = np.zeros((100, 100), dtype=np.uint8)
test_image[30:70, 30:70] = 255
print("边缘检测:")
print("=" * 60)
# Sobel边缘检测
sobel_edges, sobel_direction = EdgeDetection.sobel(test_image)
print(f"Sobel边缘检测完成")
print(f"边缘像素数: {np.sum(sobel_edges > 50)}")
# 拉普拉斯边缘检测
laplacian_edges = EdgeDetection.laplacian(test_image)
print(f"\n拉普拉斯边缘检测完成")
print(f"边缘像素数: {np.sum(laplacian_edges > 50)}")
# OpenCV边缘检测
print("\n\nOpenCV边缘检测:")
print("=" * 60)
print("# Sobel边缘检测")
print("grad_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)")
print("grad_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)")
print("magnitude = np.sqrt(grad_x**2 + grad_y**2)")
print("\n# Canny边缘检测")
print("edges = cv2.Canny(image, threshold1, threshold2)")
print("edges = cv2.Canny(image, 100, 200) # 常用阈值")
print("\n# 拉普拉斯边缘检测")
print("edges = cv2.Laplacian(image, cv2.CV_64F)")
print("\n# Canny边缘检测步骤:")
print("1. 高斯滤波去噪")
print("2. 计算梯度幅值和方向")
print("3. 非极大值抑制")
print("4. 双阈值检测和边缘连接")
七、图像形态学操作
python
class Morphology:
"""
图像形态学操作
"""
@staticmethod
def dilate(image, kernel_size=3):
"""
膨胀操作
"""
kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
if len(image.shape) == 3:
filtered = np.zeros_like(image)
for c in range(image.shape[2]):
filtered[:, :, c] = Morphology.dilate(image[:, :, c], kernel_size)
return filtered
h, w = image.shape
dilated = np.zeros_like(image)
pad = kernel_size // 2
padded = np.pad(image, pad, mode='constant')
for i in range(h):
for j in range(w):
window = padded[i:i+kernel_size, j:j+kernel_size]
dilated[i, j] = np.max(window * kernel)
return dilated
@staticmethod
def erode(image, kernel_size=3):
"""
腐蚀操作
"""
kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
if len(image.shape) == 3:
filtered = np.zeros_like(image)
for c in range(image.shape[2]):
filtered[:, :, c] = Morphology.erode(image[:, :, c], kernel_size)
return filtered
h, w = image.shape
eroded = np.zeros_like(image)
pad = kernel_size // 2
padded = np.pad(image, pad, mode='constant')
for i in range(h):
for j in range(w):
window = padded[i:i+kernel_size, j:j+kernel_size]
eroded[i, j] = np.min(window * kernel)
return eroded
@staticmethod
def opening(image, kernel_size=3):
"""
开运算:先腐蚀后膨胀
用于去除小的噪声点
"""
eroded = Morphology.erode(image, kernel_size)
opened = Morphology.dilate(eroded, kernel_size)
return opened
@staticmethod
def closing(image, kernel_size=3):
"""
闭运算:先膨胀后腐蚀
用于填充小孔
"""
dilated = Morphology.dilate(image, kernel_size)
closed = Morphology.erode(dilated, kernel_size)
return closed
# 创建测试图像
binary_image = np.zeros((100, 100), dtype=np.uint8)
binary_image[30:40, 30:70] = 255 # 横条
binary_image[30:70, 60:70] = 255 # 竖条
# 添加噪声
binary_image[50:55, 50:55] = 255 # 孤立点
print("图像形态学操作:")
print("=" * 60)
print(f"原始图像白色像素数: {np.sum(binary_image > 0)}")
# 膨胀
dilated = Morphology.dilate(binary_image, kernel_size=3)
print(f"膨胀后白色像素数: {np.sum(dilated > 0)}")
# 腐蚀
eroded = Morphology.erode(binary_image, kernel_size=3)
print(f"腐蚀后白色像素数: {np.sum(eroded > 0)}")
# 开运算
opened = Morphology.opening(binary_image, kernel_size=5)
print(f"开运算后白色像素数: {np.sum(opened > 0)}")
# 闭运算
closed = Morphology.closing(binary_image, kernel_size=5)
print(f"闭运算后白色像素数: {np.sum(closed > 0)}")
# OpenCV形态学操作
print("\n\nOpenCV形态学操作:")
print("=" * 60)
print("# 创建结构元素")
print("kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) # 矩形")
print("kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) # 椭圆")
print("kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (5, 5)) # 十字形")
print("\n# 膨胀")
print("dilated = cv2.dilate(image, kernel, iterations=1)")
print("\n# 腐蚀")
print("eroded = cv2.erode(image, kernel, iterations=1)")
print("\n# 开运算")
print("opened = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)")
print("\n# 闭运算")
print("closed = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)")
print("\n# 形态学梯度(膨胀-腐蚀)")
print("gradient = cv2.morphologyEx(image, cv2.MORPH_GRADIENT, kernel)")
print("\n# 顶帽(原图-开运算)")
print("tophat = cv2.morphologyEx(image, cv2.MORPH_TOPHAT, kernel)")
print("\n# 黑帽(闭运算-原图)")
print("blackhat = cv2.morphologyEx(image, cv2.MORPH_BLACKHAT, kernel)")
八、图像变换
python
class ImageTransforms:
"""
图像变换
"""
@staticmethod
def translate(image, dx, dy):
"""
平移变换
dx, dy: 平移量
"""
h, w = image.shape[:2]
translation_matrix = np.float32([[1, 0, dx], [0, 1, dy]])
if len(image.shape) == 2:
translated = np.zeros_like(image)
else:
translated = np.zeros_like(image)
for i in range(h):
for j in range(w):
new_x = int(j + dx)
new_y = int(i + dy)
if 0 <= new_x < w and 0 <= new_y < h:
if len(image.shape) == 2:
translated[new_y, new_x] = image[i, j]
else:
translated[new_y, new_x] = image[i, j]
return translated
@staticmethod
def affine_transform(image):
"""
仿射变换(示例)
"""
print("仿射变换包括:")
print("- 平移")
print("- 旋转")
print("- 缩放")
print("- 错切")
print("\nOpenCV仿射变换:")
print("# 定义三个点对")
print("pts1 = np.float32([[50,50],[200,50],[50,200]])")
print("pts2 = np.float32([[10,100],[200,50],[100,250]])")
print("# 计算变换矩阵")
print("M = cv2.getAffineTransform(pts1, pts2)")
print("# 应用变换")
print("result = cv2.warpAffine(image, M, (w, h))")
print("\n\n图像变换:")
print("=" * 60)
# 平移测试
test_image = np.random.randint(100, 200, (50, 50), dtype=np.uint8)
translated = ImageTransforms.translate(test_image, dx=10, dy=5)
print(f"平移变换完成")
print(f"原始中心点: (25, 25) -> 值={test_image[25, 25]}")
print(f"平移后中心点: (35, 30) -> 值={translated[30, 35] if translated.shape[0] > 30 and translated.shape[1] > 35 else '超出范围'}")
# OpenCV图像变换
print("\n\nOpenCV图像变换:")
print("=" * 60)
print("# 平移")
print("M = np.float32([[1, 0, tx], [0, 1, ty]])")
print("translated = cv2.warpAffine(image, M, (w, h))")
print("\n# 旋转")
print("M = cv2.getRotationMatrix2D(center, angle, scale)")
print("rotated = cv2.warpAffine(image, M, (w, h))")
print("\n# 缩放")
print("resized = cv2.resize(image, (new_w, new_h))")
print("\n# 透视变换")
print("pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])")
print("pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])")
print("M = cv2.getPerspectiveTransform(pts1, pts2)")
print("result = cv2.warpPerspective(image, M, (300,300))")
九、完整实战案例
python
class ImageProcessingPipeline:
"""
图像处理流水线
"""
def __init__(self):
self.steps = []
def add_step(self, step_name, step_func):
"""
添加处理步骤
"""
self.steps.append((step_name, step_func))
def process(self, image):
"""
执行处理流程
"""
result = image.copy()
history = [('原始图像', result.copy())]
for step_name, step_func in self.steps:
result = step_func(result)
history.append((step_name, result.copy()))
return result, history
# 创建处理流水线
print("图像处理流水线:")
print("=" * 60)
# 示例:文档图像预处理
pipeline = ImageProcessingPipeline()
pipeline.add_step('添加噪声', lambda img: add_noise(img, 'gaussian', 0.05))
pipeline.add_step('高斯滤波', lambda img: ImageFilters.gaussian_filter(img, 3, 1.0))
pipeline.add_step('调整亮度', lambda img: ImageEnhancement.adjust_brightness(img, 1.1))
pipeline.add_step('调整对比度', lambda img: ImageEnhancement.adjust_contrast(img, 1.2))
# 执行流水线
original = np.random.randint(100, 200, (50, 50), dtype=np.uint8)
processed, history = pipeline.process(original)
print(f"原始图像: 平均值={original.mean():.2f}")
for step_name, result in history[1:]:
print(f"{step_name}: 平均值={result.mean():.2f}")
# 实际应用场景
print("\n\n实际应用场景:")
print("=" * 60)
applications = [
{
'场景': '文档扫描预处理',
'流程': ['灰度化', '二值化', '降噪', '倾斜校正', '裁剪']
},
{
'场景': '人脸识别预处理',
'流程': ['人脸检测', '对齐', '归一化', '光照归一化', '特征提取']
},
{
'场景': '车牌识别预处理',
'流程': ['灰度化', '边缘检测', '形态学操作', '字符分割', '归一化']
},
{
'场景': '医学图像增强',
'流程': ['去噪', '对比度增强', '直方图均衡化', '边缘锐化', '伪彩色显示']
},
{
'场景': '卫星图像处理',
'流程': ['辐射校正', '几何校正', '大气校正', '图像融合', '分类']
}
]
for app in applications:
print(f"\n{app['场景']}:")
print(f" 流程: {' -> '.join(app['流程'])}")
十、总结与最佳实践
python
# 总结要点
print("图像处理总结:")
print("=" * 80)
key_points = [
"理解图像的基本属性:尺寸、通道、数据类型、值范围",
"掌握颜色空间转换:RGB、BGR、HSV、灰度等",
"熟练使用图像基本操作:裁剪、缩放、旋转、翻转",
"理解图像增强技术:亮度、对比度、直方图均衡化",
"掌握噪声类型和滤波方法:高斯、椒盐、均值、中值滤波",
"学习边缘检测算法:Sobel、Canny、拉普拉斯",
"应用形态学操作:膨胀、腐蚀、开运算、闭运算",
"选择合适的工具:OpenCV功能强大,Pillow简单易用"
]
for i, point in enumerate(key_points, 1):
print(f"{i}. {point}")
print("\n\n最佳实践:")
print("=" * 80)
best_practices = [
"数据类型:始终注意图像的数据类型(uint8, float32等)",
"值范围:处理前归一化到[0,1],处理后转换回[0,255]",
"边界处理:注意卷积操作时的边界填充",
"性能优化:使用OpenCV的内置函数而非自己实现循环",
"内存管理:大图像处理时注意内存使用",
"可视化:处理过程中保存中间结果便于调试",
"参数调优:滤波器核大小、阈值等参数需要根据场景调整",
"流水线设计:将多个操作组合成流水线,提高复用性"
]
for i, practice in enumerate(best_practices, 1):
print(f"{i}. {practice}")
print("\n\n常用OpenCV函数速查:")
print("=" * 80)
opencv_cheatsheet = [
("读取图像", "cv2.imread('path', flags)"),
("保存图像", "cv2.imwrite('path', image)"),
("显示图像", "cv2.imshow('name', image)"),
("调整大小", "cv2.resize(image, (w, h))"),
("颜色转换", "cv2.cvtColor(image, code)"),
("高斯模糊", "cv2.GaussianBlur(image, ksize, sigma)"),
("边缘检测", "cv2.Canny(image, thresh1, thresh2)"),
("阈值处理", "cv2.threshold(image, thresh, maxval, type)"),
("形态学操作", "cv2.morphologyEx(image, op, kernel)"),
("绘制图形", "cv2.rectangle/circle/line(...)"),
]
for func_name, syntax in opencv_cheatsheet:
print(f"{func_name:<12}: {syntax}")
print("\n\n学习资源:")
print("- OpenCV官方文档: https://docs.opencv.org/")
print("- Python图像处理教程")
print("- 计算机视觉课程(CS131, CS231n)")
print("- 实践项目:图像分类、目标检测、图像分割")
print("\n\n下一步学习:")
print("1. 深入学习卷积神经网络(CNN)")
print("2. 掌握目标检测算法(YOLO、Faster R-CNN)")
print("3. 学习图像分割技术(Semantic Segmentation)")
print("4. 了解图像生成模型(GAN、Stable Diffusion)")
print("5. 实践完整的计算机视觉项目")
本文系统地介绍了图像处理的基础知识和常用操作,从数字图像的基本概念到各种高级处理技术。通过大量的代码示例和实践案例,你可以掌握图像处理的核心技能,为后续深入学习计算机视觉和深度学习打下坚实基础。记住,理论结合实践是最好的学习方式,多动手操作,深入理解每个算法的原理和应用场景!