复习日
作业:
kaggle找到一个图像数据集,用cnn网络进行训练并且用grad-cam做可视化
进阶:并拆分成多个文件
1. 数据集准备
在 Kaggle 上搜索并下载 "Cats VS. Dogs" 数据集
2. 项目结构
CNN_CatDog/
├── data/ # 数据集目录
│ ├── train/ # 训练集
│ └── test/ # 测试集
├── models/ # 模型保存目录
├── utils/ # 工具文件夹
│ ├── data_loader.py # 数据加载工具
│ ├── model.py # 模型定义
│ └── grad_cam.py # Grad-CAM 实现
├── train.py # 训练脚本
├── test.py # 测试脚本
└── visualize.py # 可视化脚本
3. 数据预处理(utils/data_loader.py)
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def load_data(train_dir, test_dir, img_height, img_width, batch_size):
# 数据增强和加载
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
validation_split=0.2 # 划分验证集
)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary',
subset='training'
)
validation_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary',
subset='validation'
)
test_generator = test_datagen.flow_from_directory(
test_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary',
shuffle=False
)
return train_generator, validation_generator, test_generator
4. 模型定义(utils/model.py)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
def create_cnn_model(img_height, img_width):
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
BatchNormalization(),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation='relu'),
BatchNormalization(),
MaxPooling2D((2, 2)),
Conv2D(128, (3, 3), activation='relu'),
BatchNormalization(),
MaxPooling2D((2, 2)),
Flatten(),
Dense(512, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
return model
5. Grad-CAM 实现(utils/grad_cam.py)
import numpy as np
import tensorflow as tf
import cv2
def get_last_conv_layer(model):
# 获取模型中最后一个卷积层
for layer in reversed(model.layers):
if isinstance(layer, tf.keras.layers.Conv2D):
return layer
return None
def compute_grad_cam(model, img_array, pred_class, last_conv_layer):
# 构建梯度模型
grad_model = tf.keras.models.Model(
[model.inputs],
[last_conv_layer.output, model.output]
)
# 计算梯度
with tf.GradientTape() as tape:
last_conv_layer_output, preds = grad_model(img_array)
class_channel = preds[:, pred_class]
grads = tape.gradient(class_channel, last_conv_layer_output)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
# 应用权重到卷积层输出并加和
last_conv_layer_output = last_conv_layer_output[0]
heatmap = tf.reduce_mean(tf.multiply(last_conv_layer_output, pooled_grads), axis=-1)
# ReLU
heatmap = np.maximum(heatmap, 0)
max_heat = np.max(heatmap)
if max_heat == 0:
max_heat = 1e-10
heatmap /= max_heat
# 热图大小调整与应用
heatmap = cv2.resize(heatmap.numpy(), (img_array.shape[2], img_array.shape[1]))
return heatmap
6. 训练脚本(train.py)
import os
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from utils.data_loader import load_data
from utils.model import create_cnn_model
if __name__ == "__main__":
# 参数设置
img_height = 150
img_width = 150
batch_size = 32
epochs = 50
# 加载数据
train_generator, validation_generator, _ = load_data(
'data/train',
'data/test',
img_height,
img_width,
batch_size
)
# 创建模型
model = create_cnn_model(img_height, img_width)
# 回调函数
checkpoint = ModelCheckpoint('models/best_model.h5', monitor='val_accuracy', save_best_only=True, mode='max')
early_stop = EarlyStopping(monitor='val_loss', patience=5, mode='min')
# 训练模型
history = model.fit(
train_generator,
steps_per_epoch=train_generator.samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_generator.samples // batch_size,
callbacks=[checkpoint, early_stop]
)
7. 测试脚本(test.py)
import os
import numpy as np
from tensorflow.keras.models import load_model
from utils.data_loader import load_data
if __name__ == "__main__":
# 参数设置
img_height = 150
img_width = 150
batch_size = 32
# 加载数据
_, _, test_generator = load_data(
'data/train',
'data/test',
img_height,
img_width,
batch_size
)
# 加载最佳模型
model = load_model('models/best_model.h5')
# 测试模型
test_loss, test_acc = model.evaluate(
test_generator,
steps=test_generator.samples // batch_size,
verbose=2
)
print(f'\nTest accuracy: {test_acc:.4f}')
print(f'Test loss: {test_loss:.4f}')
8. 可视化脚本(visualize.py)
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
from utils.grad_cam import get_last_conv_layer, compute_grad_cam
if __name__ == "__main__":
# 参数设置
img_height = 150
img_width = 150
# 加载模型和最后一层卷积层
model = load_model('models/best_model.h5')
last_conv_layer = get_last_conv_layer(model)
# 预测并可视化
img_path = 'data/test/dog/10002.jpg' # 替换为测试图像路径
img = image.load_img(img_path, target_size=(img_height, img_width))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0) / 255.0
# 预测类别
pred = model.predict(img_array)
pred_class = 0 if pred[0] < 0.5 else 1 # 假设二分类任务
# 计算 Grad-CAM 热图
heatmap = compute_grad_cam(model, img_array, pred_class, last_conv_layer)
# 可视化热图
plt.matshow(heatmap)
plt.title('Grad-CAM Heatmap')
plt.show()
# 将热图叠加到原图上
img = cv2.imread(img_path)
img = cv2.resize(img, (img_height, img_width))
heatmap = cv2.resize(heatmap, (img_height, img_width))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.4 + img
cv2.imwrite('grad_cam_result.jpg', superimposed_img)
plt.imshow(cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB))
plt.title('Grad-CAM Result')
plt.show()
总结
以上代码实现了使用 CNN 网络对猫狗图像数据集进行训练,并通过 Grad-CAM 对模型进行可视化。代码被拆分成多个文件,每个文件负责不同的功能模块,便于管理和维护。你可以根据实际需求调整网络结构、超参数和数据集路径等。
通过训练脚本 train.py
进行模型训练,并通过回调函数保存最佳模型。使用 test.py
进行模型测试,评估模型性能。最后通过 visualize.py
使用 Grad-CAM 对模型的预测结果进行可视化,生成热图并叠加到原图上,帮助理解模型的决策依据。