深度学习案例：DenseNet + SE-Net

本文为为🔗365天深度学习训练营内部文章

原作者：K同学啊

一回顾DenseNet算法

DenseNet（Densely Connected Convolutional Networks）是一种深度卷积神经网络架构，提出的核心思想是通过在每一层与前面所有层进行直接连接，极大地增强了信息和梯度的流动。传统的卷积神经网络（CNN）结构中，每一层的输入仅来自前一层，而DenseNet通过让每一层的输入包含所有前面层的输出，形成了更密集的连接。这样的设计能够减少梯度消失的问题，促进特征复用，提高模型的表现力和学习效率。

DenseNet的优势主要体现在两个方面。首先，由于密集连接的特点，它在同等参数量下比传统的卷积网络能够学习到更丰富的特征，提升了网络的性能。其次，由于每层都接收前面层的特征图，DenseNet有效缓解了深度神经网络中训练难度较大的问题，特别是在处理深层网络时，可以显著提高梯度的传递效率，减少了对大规模数据集的需求。通过这些优点，DenseNet在图像分类、目标检测等任务中表现出色。
通道注意力机制上文提及，不再叙述。以下是DenseNet+SE-Net代码

python 复制代码

'''
SE模块实现
'''
import tensorflow as tf
from keras.models import Model
from keras import layers
from keras import backend

class Squeeze_excitation_layer(tf.keras.Model):
    def __init__(self, filter_sq):
        super().__init__()
        self.filter_sq = filter_sq
        self.avepool = tf.keras.layers.GlobalAveragePooling2D()

    def build(self, input_shape):
        self.dense1 = tf.keras.layers.Dense(self.filter_sq, activation='relu')
        self.dense2 = tf.keras.layers.Dense(input_shape[-1], activation='sigmoid')

    def call(self, inputs):
        squeeze = self.avepool(inputs)
        excitation = self.dense1(squeeze)
        excitation = self.dense2(excitation)
        excitation = tf.keras.layers.Reshape((1, 1, inputs.shape[-1]))(excitation)
        scale = inputs * excitation
        return scale



def dense_block(x,blocks,name):
    for i in range(blocks):
        x = conv_block(x,32,name=name+'_block'+str(i+1))
    return x

def conv_block(x,growth_rate,name):
    bn_axis = 3
    x1 = layers.BatchNormalization(axis=bn_axis,
                                   epsilon=1.001e-5,
                                   name=name+'_0_bn')(x)
    x1 = layers.Activation('relu',name=name+'_0_relu')(x1)
    x1 = layers.Conv2D(4*growth_rate,1,use_bias=False,name=name+'_1_conv')(x1)

    x1 = layers.BatchNormalization(axis=bn_axis,
                                   epsilon=1.001e-5,
                                   name=name + '_1_bn')(x1)
    x1 = layers.Activation('relu', name=name + '_1_relu')(x1)
    x1 = layers.Conv2D(growth_rate, 3, padding='same',use_bias=False, name=name + '_2_conv')(x1)
    x = layers.Concatenate(axis=bn_axis,name=name+'_concat')([x,x1])
    return x

def transition_block(x,reduction,name):
    bn_axis = 3
    x = layers.BatchNormalization(axis=bn_axis,epsilon=1.001e-5,name=name+'_bn')(x)
    x = layers.Activation('relu',name=name+'_relu')(x)
    x = layers.Conv2D(int(backend.int_shape(x)[bn_axis] * reduction),1,use_bias=False,name=name+'_conv')(x)
    x = layers.AveragePooling2D(2,strides=2,name=name+'_pool')(x)
    return x

def DenseNet(blocks,input_shape=None,classes=1000,**kwargs):
    img_input = layers.Input(shape=input_shape)

    bn_axis = 3

    # 224,224,3 -> 112,112,64
    x = layers.ZeroPadding2D(padding=((3,3),(3,3)))(img_input)
    x = layers.Conv2D(64,7,strides=2,use_bias=False,name='conv1/conv')(x)
    x = layers.BatchNormalization(axis=bn_axis,epsilon=1.001e-5,name='conv1/bn')(x)
    x = layers.Activation('relu',name='conv1/relu')(x)

    # 112,112,64 -> 56,56,64
    x = layers.ZeroPadding2D(padding=((1,1),(1,1)))(x)
    x = layers.MaxPooling2D(3,strides=2,name='pool1')(x)

    # 56,56,64 -> 56,56,64+32*block[0]
    # DenseNet121 56,56,64 -> 56,56,64+32*6 == 56,56,256
    x = dense_block(x,blocks[0],name='conv2')

    # 56,56,64+32*block[0] -> 28,28,32+16*block[0]
    # DenseNet121 56,56,256 -> 28,28,32+16*6 == 28,28,128
    x = transition_block(x,0.5,name='pool2')

    # 28,28,32+16*block[0] -> 28,28,32+16*block[0]+32*block[1]
    # DenseNet121 28,28,128 -> 28,28,128+32*12 == 28,28,512
    x = dense_block(x,blocks[1],name='conv3')

    # DenseNet121 28,28,512 -> 14,14,256
    x = transition_block(x,0.5,name='pool3')

    # DenseNet121 14,14,256 -> 14,14,256+32*block[2] == 14,14,1024
    x = dense_block(x,blocks[2],name='conv4')

    # DenseNet121 14,14,1024 -> 7,7,512
    x = transition_block(x,0.5,name='pool4')

    # DenseNet121 7,7,512 -> 7,7,256+32*block[3] == 7,7,1024
    x = dense_block(x,blocks[3],name='conv5')

    # 加SE注意力机制
    x = Squeeze_excitation_layer(16)(x)

    x = layers.BatchNormalization(axis=bn_axis,epsilon=1.001e-5,name='bn')(x)
    x = layers.Activation('relu',name='relu')(x)

    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    x = layers.Dense(classes,activation='softmax',name='fc1000')(x)

    inputs = img_input

    if blocks == [6,12,24,16]:
        model = Model(inputs,x,name='densenet121')
    elif blocks == [6,12,32,32]:
        model = Model(inputs,x,name='densenet169')
    elif blocks == [6,12,48,32]:
        model = Model(inputs,x,name='densenet201')
    else:
        model = Model(inputs,x,name='densenet')
    return model

def DenseNet121(input_shape=[224,224,3],classes=3,**kwargs):
    return DenseNet([6,12,24,16],input_shape,classes,**kwargs)

def DenseNet169(input_shape=[224,224,3],classes=3,**kwargs):
    return DenseNet([6,12,32,32],input_shape,classes,**kwargs)

def DenseNet201(input_shape=[224,224,3],classes=3,**kwargs):
    return DenseNet([6,12,48,32],input_shape,classes,**kwargs)

from tensorflow.keras.optimizers import Adam

# 实例化模型，指定输入形状和类别数
model = DenseNet201(input_shape=[224,224,3], classes=2)
model.summary()

python 复制代码

# 设置优化器
opt = tf.keras.optimizers.Adam(learning_rate=1e-7)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

epochs = 25

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
)

# 获取实际训练轮数
actual_epochs = len(history.history['accuracy'])

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(actual_epochs)

plt.figure(figsize=(12, 4))

# 绘制准确率
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# 绘制损失
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()

总结：DenseNet与SE-Net（Squeeze-and-Excitation Networks）结合后，能够进一步增强模型的表现力和效率。DenseNet通过密集连接每一层，促进了特征的复用和梯度的流动，而SE-Net通过引入通道注意力机制，能够自动学习每个特征通道的重要性，调整通道的权重。将这两者结合起来，DenseNet负责加强特征之间的关联性和信息流动，而SE-Net则提升了特征通道的自适应能力，使得网络能够在不同任务中更加精准地利用最有用的特征。这样的结合使得模型在保持高效的同时，能够更加聚焦于有价值的特征，从而提升了性能，尤其在处理复杂的视觉任务时，表现尤为出色。

深度学习案例：DenseNet + SE-Net

一 回顾DenseNet算法

一回顾DenseNet算法