1. 环境和数据集
|---------------|---------------------------------------------------|
| GPU型号 | RTX 4090 |
| 显存 | 24GB |
| GPU数量 | 1卡 |
| CPU | 16核 Intel(R) Xeon(R) Platinum 8352V CPU @ 2.10GHz |
| 内存 | 120GB |
| 硬盘 | 50GB |
| 操作系统 | Ubuntu22.04 |
深度学习框架
TensorFlow:版本为 2.6.0
Keras:版本 2.6.0
数据处理与分析
Pandas:版本 1.2.0
NumPy:版本 1.19.5
图像处理相关
OpenCV-Python: 版本 4.10.0.84
Scikit-Image: 版本 0.18.0
Pillow:版本 10.4.0
其他常用工具库
Scikit-Learn:版本 1.3.2
Tqdm:版本4.67.0
YAML:版本 0.2.5
数据集划分
训练集有138张图片以及标注掩码,测试集有59张图片以及标注掩码。
2. 模型介绍
2.1. 模型发展历程
CNN
特点:自动提取特征
结构:卷积层+激活层+池化层
FCN(全卷积网络)
特点:图像分割
结构:没有那些全连接的层,可以处理任意大小的图像输入,为编解码器架构在图像分割领域的应用打开了大门。
U - Net
特点:编码器提取特征,解码器把提取的特征还原成我们想要的分割结果
结构:编码器+解码器
IterLUNet
特点:信息循环,会把解码器和瓶颈部分的信息再送回编码器
结构:INITIALBLOCK+SE BLOCK+INTERMEDIATEBLOCK+ITERLBLOCK
2.2. INITIALBLOCK 初始块
python
# 在每次迭代的第一个编码器中使用,产生指定数量的特征图。
def initial_conv2d_bn(x, filters, num_row, num_col, padding='same', strides=(1, 1), activation='relu', name=None):
# 卷积
x = Conv2D(filters, (num_row, num_col), strides=strides, kernel_initializer="he_normal", padding=padding, use_bias=False)(x)
# 批量归一化
x = BatchNormalization(axis=3, scale=False)(x)
if(activation == None):
return x
# ReLU激活
x = Activation(activation, name=name)(x)
return x
2.3. SE BLOCK挤压和激励块
python
def squeeze_excite_block(input, ratio=16):
init = input
channel_axis = 1 if .image_data_format() == "channels_first" else -1
filters = init.[channel_axis]
se_shape = (1, 1, filters)
se = GlobalAveragePooling2D()(init)
se = Reshape(se_shape)(se)
se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)
if .image_data_format() == 'channels_first':
se = Permute((3, 1, 2))(se)
x = multiply([init, se])
return x
2.4. csSE BLOCK并发信道和空间SE块
python
def spatial_squeeze_excite_block(input):
se = Conv2D(1, (1, 1), activation='sigmoid', use_bias=False, kernel_initializer='he_normal')(input)
x = multiply([input, se])
return x
def channel_spatial_squeeze_excite(input, ratio=16):
cse = squeeze_excite_block(input, ratio)
sse = spatial_squeeze_excite_block(input)
x = add([cse, sse])
return x
2.5. INTERMEDIATEBLOCK中间块
python
def depthwise_convblock(inputs, filters, num_row, num_col, alpha=1, depth_multiplier=1, strides=(1,1), block_id=1, SE=False):
channel_axis = 1 if .image_data_format() == 'channels_first' else -1
pointwise_conv_filters = int(filters * alpha)
x = DepthwiseConv2D((num_row, num_col),padding='same',
depth_multiplier=depth_multiplier,strides=strides,
kernel_initializer='he_normal',use_bias=False)(inputs)
x = BatchNormalization(axis=channel_axis,)(x)
x = Activation('elu')(x)
x = Conv2D(pointwise_conv_filters, (1,1),padding='same',
kernel_initializer='he_normal',use_bias=False,strides=(1, 1))(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation('elu')(x)
if(SE == True):
x = channel_spatial_squeeze_excite(x)
return x
return x
2.6. ITERLBLOCK迭代循环块
python
def iterLBlock(x, filters, name=None):
shortcut = x
filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool_proj = filters//16, filters//16, filters//4, filters//16, filters//8, filters//16
conv_1x1 = initial_conv2d_bn(x, filters_1x1, 1, 1, padding='same', activation='relu')
conv_3x3 = initial_conv2d_bn(x, filters_3x3_reduce, 1, 1, padding='same', activation='relu')
conv_3x1 = conv2d_bn(conv_3x3, filters_3x3//2, 3,1)
conv_1x3 = conv2d_bn(conv_3x3, filters_3x3//2, 1,3)
conv_5x5 = initial_conv2d_bn(x, filters_5x5_reduce, 1, 1, padding='same', activation='relu')
conv_5x5 = conv2d_bn(conv_5x5, filters_5x5, 3,3)
conv_5x5_3x1 = conv2d_bn(conv_5x5, filters_5x5//2, 3,1)
conv_5x5_1x3 = conv2d_bn(conv_5x5, filters_5x5//2, 1,3)
pool_proj = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
pool_proj = initial_conv2d_bn(pool_proj, filters_pool_proj, 1, 1, padding='same', activation='relu')
output = squeeze_excite_block(concatenate([conv_1x1, conv_3x1, conv_1x3, conv_5x5_3x1, conv_5x5_1x3, pool_proj], axis=3, name=name))
output = BatchNormalization(axis=3)(output)
output = Activation('relu')(output)
return output
2.7. 总体结构
python
def IterLUNet(input_filters, height, width, n_channels):
inputs = Input((height, width, n_channels))
filters = input_filters
# Iteration 1
block1 = initial_conv2d_bn(inputs, filters*1, 3, 3, padding='same', activation='relu')
pool1 = MaxPooling2D(pool_size=(2, 2))(block1)
bottleneck1 = iterLBlock(pool1, filters*2, name = 'iterLBlock1')
up1 = concatenate([Conv2DTranspose(
filters*1, (2, 2), strides=(2, 2), padding='same')(bottleneck1), block1], axis=3)
level1 = iterLBlock(up1, filters*1,name = 'iterLBlock2' )
# Iteration 2
encoder2 = initial_conv2d_bn(inputs, filters*1, 3, 3, padding='same', activation='relu')
inter1 = concatenate([encoder2, level1], axis=3)
inter1 = depthwise_convblock(inter1, filters, 3,3, depth_multiplier=1, SE=True)
block2 = iterLBlock(inter1, filters*2, name = 'iterLBlock3')
pool2 = MaxPooling2D(pool_size=(2, 2))(block2)
inter21 = concatenate([pool2, bottleneck1], axis=3)
inter21 = depthwise_convblock(inter21, filters*2, 3,3, depth_multiplier=1, SE=True)
block21 = iterLBlock(inter21, filters*4,name = 'iterLBlock4')
pool21 = MaxPooling2D(pool_size=(2, 2))(block21)
bottleneck2 = iterLBlock(pool21, filters*8, name = 'iterLBlock5')
up21 = concatenate([Conv2DTranspose(
filters*4, (2, 2), strides=(2, 2), padding='same')(bottleneck2), block21], axis=3)
block22 = iterLBlock(up21, filters*4, name = 'iterLBlock6')
up22 = concatenate([Conv2DTranspose(
filters*2, (2, 2), strides=(2, 2), padding='same')(block22), block2], axis=3)
level2 = iterLBlock(up22, filters*2, name = 'iterLBlock7')
# Iteration 3
encoder3 = initial_conv2d_bn(inputs, filters*2, 3, 3, padding='same', activation='relu')
inter3 = concatenate([encoder3, level2], axis=3)
inter3 = depthwise_convblock(inter3, filters*2, 3,3, depth_multiplier=1, SE=True)
block3 = iterLBlock(inter3, filters*2, name = 'iterLBlock8')
pool3 = MaxPooling2D(pool_size=(2, 2))(block3)
inter31 = concatenate([pool3, block22], axis=3)
inter31 = depthwise_convblock(inter31, filters*4, 3,3, depth_multiplier=1, SE=True)
block31 = iterLBlock(inter31, filters*4, name = 'iterLBlock9')
pool31 = MaxPooling2D(pool_size=(2, 2))(block31)
inter32 = concatenate([pool31, bottleneck2], axis=3)
inter32 = depthwise_convblock(inter32, filters*8, 3,3, depth_multiplier=1, SE=True)
block32 = iterLBlock(inter32, filters*8, name = 'iterLBlock10')
pool32 = MaxPooling2D(pool_size=(2, 2))(block32)
bottleneck3 = iterLBlock(pool32, filters*16, name = 'iterLBlock11')
up3 = concatenate([Conv2DTranspose(
filters*8, (2, 2), strides=(2, 2), padding='same')(bottleneck3), block32], axis=3)
block33 = iterLBlock(up3, filters*8, name = 'iterLBlock12')
up31 = concatenate([Conv2DTranspose(
filters*4, (2, 2), strides=(2, 2), padding='same')(block33), block31], axis=3)
block34 = iterLBlock(up31, filters*4, name = 'iterLBlock13')
up32 = concatenate([Conv2DTranspose(
filters*2, (2, 2), strides=(2, 2), padding='same')(block34), block3], axis=3)
level3 = iterLBlock(up32, filters*2, name = 'iterLBlock14')
out = Conv2D(1, (1, 1), padding="same", activation="sigmoid", name='visualized_layer')(level3)
model = Model(inputs=[inputs], outputs=[out])
return model
3. 训练过程
3.1. 参数设置
python
parser.add_argument('--img_width', type=int, default=256)
parser.add_argument('--img_height', type=int, default=256)
parser.add_argument('--img_ch', type=int, default=3)
parser.add_argument('--output_ch', type=int, default=1)
parser.add_argument('--input_filters', type=int, default=64)
parser.add_argument('--num_epochs', type=int, default=150)
parser.add_argument('--batch_size', type=int, default=4)
parser.add_argument('--num_workers', type=int, default=8)
parser.add_argument('--lr', type=float, default=2e-3)
parser.add_argument('--optimizer', type=str, default='Adam')
parser.add_argument('--loss_function', type=str, default='focal_tversky_loss')
parser.add_argument('--beta1', type=float, default=0.9) # momentum1 in Adam
parser.add_argument('--beta2', type=float, default=0.9) # momentum2 in Adam
parser.add_argument('--model_type', type=str, default='iterlunet', help='unet/multiresunet/attentionunet/nestedunet/iterlunet')
parser.add_argument('--model_path', type=str, default='./models/iterlunet')
parser.add_argument('--graph_path', type=str, default='./models/focal_tversky_loss_metric_graphs')
parser.add_argument('--result_path', type=str, default='./results/')
parser.add_argument('--train_valid_path', type=str, default='./dataset/experiment_1/train/')
parser.add_argument('--test_path', type=str, default='./dataset/experiment_1/test/')
parser.add_argument('--valid_perc', type=float, default=0.2)
parser.add_argument('--seed', type=int, default=2021)