EfficientNetV1(pytorch)

之前的研究探索的是单个因改变的影响，这篇论文采用网络搜索机制同时探索3个因素的影响。

e图是同时在深度(+layer)，宽度（+channel），分辨率（+h，+w）进行增加，探索影响。

这是一个示意图，EfficientNet的基础模块是MBConv，MobileNetV3的基础模块。

整体网络设计相关

MBConv模块：

注意力机制：

这个和MobileNetV3有些不同，第一层全连接长度不是升维后的3*3DW卷积维度的1/4，二是PW升维前的输入通道的1/4。

B0的参数表：

model.py

读模型代码我喜欢从最下面的 def efficientnet_b2(num_classes=1000): 模型定义时候开始看，从传参调用一个一个去看模型的整个结构，包括所有MBConv模块参数的构建，所有层的构建，小模块的构建等等细节。从整体到局部的看。

python 复制代码

import math
import copy
from functools import partial
from collections import OrderedDict
from typing import Optional, Callable

import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import functional as F


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    # 内存友好，对输入通道调整到最接近的8的倍数的通道数
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


def drop_path(x, drop_prob: float = 0., training: bool = False):
    """
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf

    This function is taken from the rwightman.
    It can be seen here:
    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
    random_tensor.floor_()  # binarize
    output = x.div(keep_prob) * random_tensor
    return output


class DropPath(nn.Module):
    """
    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class ConvBNActivation(nn.Sequential):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.SiLU  # alias Swish  (torch>=1.7)

        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,
                                                         out_channels=out_planes,
                                                         kernel_size=kernel_size,
                                                         stride=stride,
                                                         padding=padding,
                                                         groups=groups,
                                                         bias=False),
                                               norm_layer(out_planes),
                                               activation_layer())


class SqueezeExcitation(nn.Module):
    def __init__(self,
                 input_c: int,   # block input channel
                 expand_c: int,  # block expand channel
                 squeeze_factor: int = 4):
        super(SqueezeExcitation, self).__init__()
        squeeze_c = input_c // squeeze_factor
        self.fc1 = nn.Conv2d(expand_c, squeeze_c, 1)
        self.ac1 = nn.SiLU()  # alias Swish
        self.fc2 = nn.Conv2d(squeeze_c, expand_c, 1)
        self.ac2 = nn.Sigmoid()

    def forward(self, x: Tensor) -> Tensor:
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = self.ac1(scale)
        scale = self.fc2(scale)
        scale = self.ac2(scale)
        return scale * x


class InvertedResidualConfig:
    # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate
    def __init__(self,
                 kernel: int,          # 3 or 5
                 input_c: int,
                 out_c: int,
                 expanded_ratio: int,  # 1 or 6
                 stride: int,          # 1 or 2
                 use_se: bool,         # True
                 drop_rate: float,
                 index: str,           # 1a, 2a, 2b, ...
                 width_coefficient: float):
        self.input_c = self.adjust_channels(input_c, width_coefficient)
        self.kernel = kernel
        self.expanded_c = self.input_c * expanded_ratio
        self.out_c = self.adjust_channels(out_c, width_coefficient)
        self.use_se = use_se
        self.stride = stride
        self.drop_rate = drop_rate
        self.index = index

    @staticmethod
    def adjust_channels(channels: int, width_coefficient: float):
        return _make_divisible(channels * width_coefficient, 8)


class InvertedResidual(nn.Module):
    def __init__(self,
                 cnf: InvertedResidualConfig,
                 norm_layer: Callable[..., nn.Module]):
        # 传入一个InvertedResidualConfig格式的配置参数，一个标准化参数
        # MBConv参数 一条一条的传入
        super(InvertedResidual, self).__init__()

        if cnf.stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        # MBConv的是否使用shortcut连接，步长是1而且还得输入输出特征层数一样才使用
        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)

        #  layers = OrderedDict()   layers.update({})
        #  上行注释这种使用可以构建每一个模块对应的名字（键 key）和卷积层（值  value）
        layers = OrderedDict()
        activation_layer = nn.SiLU  # alias Swish

        # expand  PW升维 若扩展后的通道数expanded_c和输入不相等，就要用PW升维
        if cnf.expanded_c != cnf.input_c:
            layers.update({"expand_conv": ConvBNActivation(cnf.input_c,
                                                           cnf.expanded_c,
                                                           kernel_size=1,
                                                           norm_layer=norm_layer,
                                                           activation_layer=activation_layer)})

        # depthwise  DW
        layers.update({"dwconv": ConvBNActivation(cnf.expanded_c,
                                                  cnf.expanded_c,
                                                  kernel_size=cnf.kernel,
                                                  stride=cnf.stride,
                                                  groups=cnf.expanded_c,
                                                  norm_layer=norm_layer,
                                                  activation_layer=activation_layer)})
         
        # EfficienNet 的所有MBConv都用注意力机制，这里的注意力机制和MobileNetV3里的有一点不一样
        # 注意一下
        if cnf.use_se:
            layers.update({"se": SqueezeExcitation(cnf.input_c,
                                                   cnf.expanded_c)})

        # project
        layers.update({"project_conv": ConvBNActivation(cnf.expanded_c,
                                                        cnf.out_c,
                                                        kernel_size=1,
                                                        norm_layer=norm_layer,
                                                        activation_layer=nn.Identity)})

        # 对根据传入的参数生成的MBConv打包成block
        self.block = nn.Sequential(layers)
        # out_c是传入的参数列表里的一个元素，表示这个MBConv的模块输出通道数
        self.out_channels = cnf.out_c
        self.is_strided = cnf.stride > 1

        # 只有在使用shortcut连接时才使用dropout层
        if self.use_res_connect and cnf.drop_rate > 0:
            self.dropout = DropPath(cnf.drop_rate)
        else:
            self.dropout = nn.Identity()

    def forward(self, x: Tensor) -> Tensor:
        result = self.block(x)
        result = self.dropout(result)
        if self.use_res_connect:
            result += x

        return result


class EfficientNet(nn.Module):
    def __init__(self,
                 width_coefficient: float,
                 depth_coefficient: float,
                 num_classes: int = 1000,
                 dropout_rate: float = 0.2,
                 drop_connect_rate: float = 0.2,
                 block: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None
                 ):
        # dropout_rate：最后的全连接层随即失活比例
        # drop_connect_rate：每一个MBConv的最后的随即失活比例，缓慢增加到0.2的，
        super(EfficientNet, self).__init__()

        # 对照结构表查看，repeats指对应stage的MBConv的重复次数
        # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate, repeats
        default_cnf = [[3, 32, 16, 1, 1, True, drop_connect_rate, 1],
                       [3, 16, 24, 6, 2, True, drop_connect_rate, 2],
                       [5, 24, 40, 6, 2, True, drop_connect_rate, 2],
                       [3, 40, 80, 6, 2, True, drop_connect_rate, 3],
                       [5, 80, 112, 6, 1, True, drop_connect_rate, 3],
                       [5, 112, 192, 6, 2, True, drop_connect_rate, 4],
                       [3, 192, 320, 6, 1, True, drop_connect_rate, 1]]

        def round_repeats(repeats):
            """Round number of repeats based on depth multiplier."""
            # 通过深度倍率因子乘上stage重复次数来增加网络深度，向上取整
            # math.ceil() 函数对浮点数进行向上取整，即取大于等于该数的最小整数。
            # 最后，使用 int() 函数将取整后的浮点数转换为整数类型。
            return int(math.ceil(depth_coefficient * repeats))

        if block is None:
            block = InvertedResidual

        if norm_layer is None:
            # 初始化标准化层
            norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1)

        # 根据宽度倍率因子调整调整通道数的函数
        # 这段代码使用了偏函数（partial function）的概念，
        # 将 InvertedResidualConfig.adjust_channels 函数转换为一个带有固定参数的新函数 adjust_channels。
        # partial 函数是 Python 标准库 functools 中的一个函数，用于部分应用（partial application）函数。
        # 它可以将一个函数转换为一个新的可调用对象，固定部分参数的值，从而创建一个具有更少参数的新函数。
        # 可以理解为把宽度倍率因子作为固定参数的新函数
        adjust_channels = partial(InvertedResidualConfig.adjust_channels,
                                  width_coefficient=width_coefficient)

        # build inverted_residual_setting
        # 把宽度倍率因子作为固定参数的新类
        bneck_conf = partial(InvertedResidualConfig,
                             width_coefficient=width_coefficient)

        # 这个b控制每个MBConv的dropout随机失活比例，逐渐增大
        b = 0
        # 遍历默认参数配置列表，就是efficientnetB0版本的参数，i是对应的stage的MBConv的参数配置
        # 取出最后一个元素即每个stage的MBConv的重复次数，求和，求得整个模型MBConv的总个数，转为浮点
        # 所以 num_blocks就是整个模型MBConv的总个数
        num_blocks = float(sum(round_repeats(i[-1]) for i in default_cnf))
        # inverted_residual_setting 倒残差模块参数列表，包含整个结构参数，包括每个stage的MBConv
        inverted_residual_setting = []
        # stage 的初始值是 0，对应于 default_cnf 中的第一个元素
        # 这个嵌套for循环的作用是构造每个stage的每个MBConv的参数，最终形成整个倒残差的参数 inverted_residual_setting
        for stage, args in enumerate(default_cnf):
            # 对取出的参数进行复制，防止改变默认参数
            cnf = copy.copy(args)
            # 上面是遍历每个MBConv模块的stage并取参数
            # cnf.pop(-1) 是对列表 cnf 的最后一个元素进行弹出操作。
            # 然后，将弹出的元素作为参数传递给 round_repeats 函数。
            # round_repeats 函数的作用是根据深度倍率因子（depth_coefficient）将重复次数进行向上取整。
            # 这段代码的作用是根据 cnf 列表的最后一个元素，通过 round_repeats 函数计算重复次数并进行向上取整，
            # 然后使用 for 循环对这个重复次数进行迭代。
            # 结合上面的遍历，遍历所有stage，遍历每个stage里MBConv的重复次数，
            # 循环重复次数，i从0开始，i>0就说明不是第一次循环这个stage的MBConv模块，调整一系列参数，步长，通道
            for i in range(round_repeats(cnf.pop(-1))):
                if i > 0:
                    # 首先检查 i 的值是否大于 0。如果大于 0，表示不是第一个重复的 MBConv 模块，需要对部分参数进行修改。
                    # 具体的修改是将 cnf[-3]（即倒数第三个元素，代表步长）设置为 1，
                    # 将 cnf[1]（即第二个元素，代表输入通道数）设置为与输出通道数相等，以保持维度一致。
                    # strides equal 1 except first cnf
                    cnf[-3] = 1  # strides
                    cnf[1] = cnf[2]  # input_channel equal output_channel

                # cnf[-1] 每一个MBConv模块的失活比例
                cnf[-1] = args[-2] * b / num_blocks  # update dropout ratio
                # 1a : 1 表示stage，a表示重复的第几个MBConv模块，index主要是记录一个小结构的名称
                index = str(stage + 1) + chr(i + 97)  # 1a, 2a, 2b, ...
                # 遍历得到每个stage的每个MBConv参数后就进行一次添加操作到inverted_residual_setting总的结构参数列表
                # 把宽度倍率因子作为固定参数的新类，bneck_conf = partial(InvertedResidualConfig,
                #                              width_coefficient=width_coefficient)
                # 再传入这几个参数，进行一个参数列表的构造
                inverted_residual_setting.append(bneck_conf(*cnf, index))
                # b控制每个MBConv的dropout随机失活比例
                b += 1



        # create layers  参数构造完毕，开始构造模型
        # 使用了 OrderedDict 类型的 layers 变量，并对其进行了更新操作。
        #
        layers = OrderedDict()

        # first conv
        # 通过调用 ConvBNActivation 类生成一个卷积层 stem_conv，并使用 update() 方法将其添加到 layers 中
        # 使用 layers.update() 方法将 stem_conv 添加到 layers 字典中，使用键名 "stem_conv" 进行索引。
        # 这里使用 update() 方法是为了将新的键值对添加到字典中，如果键名已存在，则会更新对应的值。
        layers.update({"stem_conv": ConvBNActivation(in_planes=3,
                                                     out_planes=adjust_channels(32),
                                                     kernel_size=3,
                                                     stride=2,
                                                     norm_layer=norm_layer)})

        # building inverted residual blocks
        # 倒残差结构的构建，所有的MBConv
        for cnf in inverted_residual_setting:
            layers.update({cnf.index: block(cnf, norm_layer)})

        # build top
        # 构造最后的一个卷积层
        last_conv_input_c = inverted_residual_setting[-1].out_c
        last_conv_output_c = adjust_channels(1280)
        layers.update({"top": ConvBNActivation(in_planes=last_conv_input_c,
                                               out_planes=last_conv_output_c,
                                               kernel_size=1,
                                               norm_layer=norm_layer)})

        # 打包整个特征提取层，包括首尾两个Conv，中间若干个MBConv
        self.features = nn.Sequential(layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        classifier = []
        if dropout_rate > 0:
            classifier.append(nn.Dropout(p=dropout_rate, inplace=True))
        classifier.append(nn.Linear(last_conv_output_c, num_classes))
        self.classifier = nn.Sequential(*classifier)

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def _forward_impl(self, x: Tensor) -> Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def efficientnet_b0(num_classes=1000):
    # input image size 224x224
    return EfficientNet(width_coefficient=1.0,
                        depth_coefficient=1.0,
                        dropout_rate=0.2,
                        num_classes=num_classes)


def efficientnet_b1(num_classes=1000):
    # input image size 240x240
    return EfficientNet(width_coefficient=1.0,
                        depth_coefficient=1.1,
                        dropout_rate=0.2,
                        num_classes=num_classes)


def efficientnet_b2(num_classes=1000):
    # input image size 260x260
    return EfficientNet(width_coefficient=1.1,
                        depth_coefficient=1.2,
                        dropout_rate=0.3,
                        num_classes=num_classes)


def efficientnet_b3(num_classes=1000):
    # input image size 300x300
    return EfficientNet(width_coefficient=1.2,
                        depth_coefficient=1.4,
                        dropout_rate=0.3,
                        num_classes=num_classes)


def efficientnet_b4(num_classes=1000):
    # input image size 380x380
    return EfficientNet(width_coefficient=1.4,
                        depth_coefficient=1.8,
                        dropout_rate=0.4,
                        num_classes=num_classes)


def efficientnet_b5(num_classes=1000):
    # input image size 456x456
    return EfficientNet(width_coefficient=1.6,
                        depth_coefficient=2.2,
                        dropout_rate=0.4,
                        num_classes=num_classes)


def efficientnet_b6(num_classes=1000):
    # input image size 528x528
    return EfficientNet(width_coefficient=1.8,
                        depth_coefficient=2.6,
                        dropout_rate=0.5,
                        num_classes=num_classes)


def efficientnet_b7(num_classes=1000):
    # input image size 600x600
    return EfficientNet(width_coefficient=2.0,
                        depth_coefficient=3.1,
                        dropout_rate=0.5,
                        num_classes=num_classes)

train.py

用的EfficientNetB0进行的训练

python 复制代码

import os
import math
import argparse

import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
import torch.optim.lr_scheduler as lr_scheduler

from model import efficientnet_b0 as create_model
from my_dataset import MyDataSet
from utils import read_split_data, train_one_epoch, evaluate


def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)

    img_size = {"B0": 224,
                "B1": 240,
                "B2": 260,
                "B3": 300,
                "B4": 380,
                "B5": 456,
                "B6": 528,
                "B7": 600}
    num_model = "B0"

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(img_size[num_model]),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        "val": transforms.Compose([transforms.Resize(img_size[num_model]),
                                   transforms.CenterCrop(img_size[num_model]),
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 实例化训练数据集
    train_dataset = MyDataSet(images_path=train_images_path,
                              images_class=train_images_label,
                              transform=data_transform["train"])

    # 实例化验证数据集
    val_dataset = MyDataSet(images_path=val_images_path,
                            images_class=val_images_label,
                            transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=nw,
                                               collate_fn=train_dataset.collate_fn)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=nw,
                                             collate_fn=val_dataset.collate_fn)

    # 如果存在预训练权重则载入
    model = create_model(num_classes=args.num_classes).to(device)
    if args.weights != "":
        if os.path.exists(args.weights):
            weights_dict = torch.load(args.weights, map_location=device)
            load_weights_dict = {k: v for k, v in weights_dict.items()
                                 if model.state_dict()[k].numel() == v.numel()}
            print(model.load_state_dict(load_weights_dict, strict=False))
        else:
            raise FileNotFoundError("not found weights file: {}".format(args.weights))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后一个卷积层和全连接层外，其他权重全部冻结
            if ("features.top" not in name) and ("classifier" not in name):
                para.requires_grad_(False)
            else:
                print("training {}".format(name))

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        # validate
        acc = evaluate(model=model,
                       data_loader=val_loader,
                       device=device)
        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_classes', type=int, default=5)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--batch-size', type=int, default=16)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--lrf', type=float, default=0.01)

    # 数据集所在根目录
    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    parser.add_argument('--data-path', type=str,
                        default="../../data_set/flower_data/flower_photos")

    # download model weights
    # 链接: https://pan.baidu.com/s/1ouX0UmjCsmSx3ZrqXbowjw  密码: 090i
    parser.add_argument('--weights', type=str, default='./efficientnetb0-pre.pth',
                        help='initial weights path')
    parser.add_argument('--freeze-layers', type=bool, default=False)
    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')

    opt = parser.parse_args()

    main(opt)

训练结果：

python 复制代码

Namespace(num_classes=5, epochs=10, batch_size=16, lr=0.01, lrf=0.01, data_path='../../data_set/flower_data/flower_photos', weights='./efficientnetb0-pre.pth', freeze_layers=False, device='cuda:0')
Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/
3670 images were found in the dataset.
2939 images for training.
731 images for validation.
Using 8 dataloader workers every process
_IncompatibleKeys(missing_keys=['classifier.1.weight', 'classifier.1.bias'], unexpected_keys=[])
[epoch 0] mean loss 0.62: 100%|██████████| 184/184 [00:42<00:00,  4.34it/s] 
100%|██████████| 46/46 [00:19<00:00,  2.35it/s]
[epoch 0] accuracy: 0.904
[epoch 1] mean loss 0.342: 100%|██████████| 184/184 [00:40<00:00,  4.57it/s]
100%|██████████| 46/46 [00:18<00:00,  2.42it/s]
[epoch 1] accuracy: 0.917
[epoch 2] mean loss 0.305: 100%|██████████| 184/184 [00:39<00:00,  4.64it/s]
100%|██████████| 46/46 [00:18<00:00,  2.42it/s]
[epoch 2] accuracy: 0.925
[epoch 3] mean loss 0.259: 100%|██████████| 184/184 [00:39<00:00,  4.64it/s]
100%|██████████| 46/46 [00:18<00:00,  2.42it/s]
[epoch 3] accuracy: 0.917
[epoch 4] mean loss 0.22: 100%|██████████| 184/184 [00:40<00:00,  4.59it/s] 
100%|██████████| 46/46 [00:19<00:00,  2.33it/s]
[epoch 4] accuracy: 0.941
[epoch 5] mean loss 0.187: 100%|██████████| 184/184 [00:40<00:00,  4.51it/s]
100%|██████████| 46/46 [00:19<00:00,  2.33it/s]
[epoch 5] accuracy: 0.948
[epoch 6] mean loss 0.165: 100%|██████████| 184/184 [00:40<00:00,  4.50it/s]
100%|██████████| 46/46 [00:19<00:00,  2.39it/s]
[epoch 6] accuracy: 0.952
[epoch 7] mean loss 0.16: 100%|██████████| 184/184 [00:40<00:00,  4.60it/s]
100%|██████████| 46/46 [00:19<00:00,  2.34it/s]
[epoch 7] accuracy: 0.953
[epoch 8] mean loss 0.151: 100%|██████████| 184/184 [00:40<00:00,  4.56it/s]
100%|██████████| 46/46 [00:19<00:00,  2.38it/s]
[epoch 8] accuracy: 0.948
[epoch 9] mean loss 0.134: 100%|██████████| 184/184 [00:40<00:00,  4.54it/s]
100%|██████████| 46/46 [00:19<00:00,  2.30it/s]
[epoch 9] accuracy: 0.953

用到的自定义数据集

dataset,py

python 复制代码

from PIL import Image
import torch
from torch.utils.data import Dataset


class MyDataSet(Dataset):
    """自定义数据集"""

    def __init__(self, images_path: list, images_class: list, transform=None):
        self.images_path = images_path
        self.images_class = images_class
        self.transform = transform

    def __len__(self):
        return len(self.images_path)

    def __getitem__(self, item):
        img = Image.open(self.images_path[item])
        # RGB为彩色图片，L为灰度图片
        if img.mode != 'RGB':
            raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
        label = self.images_class[item]

        if self.transform is not None:
            img = self.transform(img)

        return img, label

    @staticmethod
    def collate_fn(batch):
        # 官方实现的default_collate可以参考
        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
        images, labels = tuple(zip(*batch))

        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels

utils.py

python 复制代码

import os
import sys
import json
import pickle
import random

import torch
from tqdm import tqdm

import matplotlib.pyplot as plt


def read_split_data(root: str, val_rate: float = 0.2):
    random.seed(0)  # 保证随机结果可复现
    assert os.path.exists(root), "dataset root: {} does not exist.".format(root)

    # 遍历文件夹，一个文件夹对应一个类别
    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
    # 排序，保证各平台顺序一致
    flower_class.sort()
    # 生成类别名称以及对应的数字索引
    class_indices = dict((k, v) for v, k in enumerate(flower_class))
    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    train_images_path = []  # 存储训练集的所有图片路径
    train_images_label = []  # 存储训练集图片对应索引信息
    val_images_path = []  # 存储验证集的所有图片路径
    val_images_label = []  # 存储验证集图片对应索引信息
    every_class_num = []  # 存储每个类别的样本总数
    supported = [".jpg", ".JPG", ".png", ".PNG"]  # 支持的文件后缀类型
    # 遍历每个文件夹下的文件
    for cla in flower_class:
        cla_path = os.path.join(root, cla)
        # 遍历获取supported支持的所有文件路径
        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
                  if os.path.splitext(i)[-1] in supported]
        # 排序，保证各平台顺序一致
        images.sort()
        # 获取该类别对应的索引
        image_class = class_indices[cla]
        # 记录该类别的样本数量
        every_class_num.append(len(images))
        # 按比例随机采样验证样本
        val_path = random.sample(images, k=int(len(images) * val_rate))

        for img_path in images:
            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集
                val_images_path.append(img_path)
                val_images_label.append(image_class)
            else:  # 否则存入训练集
                train_images_path.append(img_path)
                train_images_label.append(image_class)

    print("{} images were found in the dataset.".format(sum(every_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than 0."
    assert len(val_images_path) > 0, "number of validation images must greater than 0."

    plot_image = False
    if plot_image:
        # 绘制每种类别个数柱状图
        plt.bar(range(len(flower_class)), every_class_num, align='center')
        # 将横坐标0,1,2,3,4替换为相应的类别名称
        plt.xticks(range(len(flower_class)), flower_class)
        # 在柱状图上添加数值标签
        for i, v in enumerate(every_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        # 设置x坐标
        plt.xlabel('image class')
        # 设置y坐标
        plt.ylabel('number of images')
        # 设置柱状图的标题
        plt.title('flower class distribution')
        plt.show()

    return train_images_path, train_images_label, val_images_path, val_images_label


def plot_data_loader_image(data_loader):
    batch_size = data_loader.batch_size
    plot_num = min(batch_size, 4)

    json_path = './class_indices.json'
    assert os.path.exists(json_path), json_path + " does not exist."
    json_file = open(json_path, 'r')
    class_indices = json.load(json_file)

    for data in data_loader:
        images, labels = data
        for i in range(plot_num):
            # [C, H, W] -> [H, W, C]
            img = images[i].numpy().transpose(1, 2, 0)
            # 反Normalize操作
            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
            label = labels[i].item()
            plt.subplot(1, plot_num, i+1)
            plt.xlabel(class_indices[str(label)])
            plt.xticks([])  # 去掉x轴的刻度
            plt.yticks([])  # 去掉y轴的刻度
            plt.imshow(img.astype('uint8'))
        plt.show()


def write_pickle(list_info: list, file_name: str):
    with open(file_name, 'wb') as f:
        pickle.dump(list_info, f)


def read_pickle(file_name: str) -> list:
    with open(file_name, 'rb') as f:
        info_list = pickle.load(f)
        return info_list


def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    loss_function = torch.nn.CrossEntropyLoss()
    mean_loss = torch.zeros(1).to(device)
    optimizer.zero_grad()

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data

        pred = model(images.to(device))

        loss = loss_function(pred, labels.to(device))
        loss.backward()
        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses

        data_loader.desc = "[epoch {}] mean loss {}".format(epoch, round(mean_loss.item(), 3))

        if not torch.isfinite(loss):
            print('WARNING: non-finite loss, ending training ', loss)
            sys.exit(1)

        optimizer.step()
        optimizer.zero_grad()

    return mean_loss.item()


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()

    # 验证样本总个数
    total_num = len(data_loader.dataset)

    # 用于存储预测正确的样本个数
    sum_num = torch.zeros(1).to(device)

    data_loader = tqdm(data_loader, file=sys.stdout)

    for step, data in enumerate(data_loader):
        images, labels = data
        pred = model(images.to(device))
        pred = torch.max(pred, dim=1)[1]
        sum_num += torch.eq(pred, labels.to(device)).sum()

    return sum_num.item() / total_num

predict.py

python 复制代码

import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import efficientnet_b0 as create_model


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    img_size = {"B0": 224,
                "B1": 240,
                "B2": 260,
                "B3": 300,
                "B4": 380,
                "B5": 456,
                "B6": 528,
                "B7": 600}
    num_model = "B0"

    data_transform = transforms.Compose(
        [transforms.Resize(img_size[num_model]),
         transforms.CenterCrop(img_size[num_model]),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # load image
    img_path = "./test.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model
    model = create_model(num_classes=5).to(device)
    # load model weights
    model_weight_path = "./weights/model-9.pth"
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()

预测结果：

python 复制代码

class: daisy        prob: 1.32e-06
class: dandelion    prob: 6.14e-07
class: roses        prob: 2.31e-06
class: sunflowers   prob: 1.64e-06
class: tulips       prob: 1.0

trans_weights_to_pytorch.py

作者提供的模型预训练权重是tensorflow版本的，对此进行了权重格式修改

python 复制代码

import numpy as np
import torch
import tensorflow as tf

assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"


def main():
    # save pytorch weights path
    save_path = "./efficientnetb0.pth"

    # create keras model and download weights
    # EfficientNetB0, EfficientNetB1, EfficientNetB2, ...
    m = tf.keras.applications.EfficientNetB0()

    weights_dict = dict()
    weights = m.weights[3:]  # delete norm weights
    for weight in weights:
        name = weight.name
        data = weight.numpy()

        if "stem_conv/kernel:0" == name:
            torch_name = "features.stem_conv.0.weight"
            weights_dict[torch_name] = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)
        elif "stem_bn/gamma:0" == name:
            torch_name = "features.stem_conv.1.weight"
            weights_dict[torch_name] = data
        elif "stem_bn/beta:0" == name:
            torch_name = "features.stem_conv.1.bias"
            weights_dict[torch_name] = data
        elif "stem_bn/moving_mean:0" == name:
            torch_name = "features.stem_conv.1.running_mean"
            weights_dict[torch_name] = data
        elif "stem_bn/moving_variance:0" == name:
            torch_name = "features.stem_conv.1.running_var"
            weights_dict[torch_name] = data
        elif "block" in name:
            name = name[5:]  # delete "block" word
            block_index = name[:2]  # 1a, 2a, ...
            name = name[3:]  # delete block_index and "_"
            torch_prefix = "features.{}.block.".format(block_index)

            trans_dict = {"expand_conv/kernel:0": "expand_conv.0.weight",
                          "expand_bn/gamma:0": "expand_conv.1.weight",
                          "expand_bn/beta:0": "expand_conv.1.bias",
                          "expand_bn/moving_mean:0": "expand_conv.1.running_mean",
                          "expand_bn/moving_variance:0": "expand_conv.1.running_var",
                          "dwconv/depthwise_kernel:0": "dwconv.0.weight",
                          "bn/gamma:0": "dwconv.1.weight",
                          "bn/beta:0": "dwconv.1.bias",
                          "bn/moving_mean:0": "dwconv.1.running_mean",
                          "bn/moving_variance:0": "dwconv.1.running_var",
                          "se_reduce/kernel:0": "se.fc1.weight",
                          "se_reduce/bias:0": "se.fc1.bias",
                          "se_expand/kernel:0": "se.fc2.weight",
                          "se_expand/bias:0": "se.fc2.bias",
                          "project_conv/kernel:0": "project_conv.0.weight",
                          "project_bn/gamma:0": "project_conv.1.weight",
                          "project_bn/beta:0": "project_conv.1.bias",
                          "project_bn/moving_mean:0": "project_conv.1.running_mean",
                          "project_bn/moving_variance:0": "project_conv.1.running_var"}

            assert name in trans_dict, "key '{}' not in trans_dict".format(name)
            torch_postfix = trans_dict[name]
            torch_name = torch_prefix + torch_postfix
            if torch_postfix in ["expand_conv.0.weight", "se.fc1.weight", "se.fc2.weight", "project_conv.0.weight"]:
                data = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)
            elif torch_postfix == "dwconv.0.weight":
                data = np.transpose(data, (2, 3, 0, 1)).astype(np.float32)
            weights_dict[torch_name] = data
        elif "top_conv/kernel:0" == name:
            torch_name = "features.top.0.weight"
            weights_dict[torch_name] = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)
        elif "top_bn/gamma:0" == name:
            torch_name = "features.top.1.weight"
            weights_dict[torch_name] = data
        elif "top_bn/beta:0" == name:
            torch_name = "features.top.1.bias"
            weights_dict[torch_name] = data
        elif "top_bn/moving_mean:0" == name:
            torch_name = "features.top.1.running_mean"
            weights_dict[torch_name] = data
        elif "top_bn/moving_variance:0" == name:
            torch_name = "features.top.1.running_var"
            weights_dict[torch_name] = data
        elif "predictions/kernel:0" == name:
            torch_name = "classifier.1.weight"
            weights_dict[torch_name] = np.transpose(data, (1, 0)).astype(np.float32)
        elif "predictions/bias:0" == name:
            torch_name = "classifier.1.bias"
            weights_dict[torch_name] = data
        else:
            raise KeyError("no match key '{}'".format(name))

    for k, v in weights_dict.items():
        weights_dict[k] = torch.as_tensor(v)

    torch.save(weights_dict, save_path)
    print("Conversion complete.")


if __name__ == '__main__':
    main()