InceptionV4 Pytorch 实现图片分类

一、目录结构

训练过程:

  1. 在训练集和测试集分类目录中放好待训练的分类图片(f1,f2,f3)
  2. 运行模型训练代码,生成模型参数文件
  3. 运行分类测试文件,设置待验证的图片路径,调用模型文件得出分类结果

二、模型构建代码

python 复制代码
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x)


class InceptionA(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InceptionA, self).__init__()
        # branch1: avgpool --> conv1*1(96)
        self.b1_1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=1)
        self.b1_2 = BasicConv2d(in_channels, 96, kernel_size=1)

        # branch2: conv1*1(96)
        self.b2 = BasicConv2d(in_channels, 96, kernel_size=1)

        # branch3: conv1*1(64) --> conv3*3(96)
        self.b3_1 = BasicConv2d(in_channels, 64, kernel_size=1)
        self.b3_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)

        # branch4: conv1*1(64) --> conv3*3(96) --> conv3*3(96)
        self.b4_1 = BasicConv2d(in_channels, 64, kernel_size=1)
        self.b4_2 = BasicConv2d(64, 96, kernel_size=3, padding=1)
        self.b4_3 = BasicConv2d(96, 96, kernel_size=3, padding=1)

    def forward(self, x):
        y1 = self.b1_2(self.b1_1(x))
        y2 = self.b2(x)
        y3 = self.b3_2(self.b3_1(x))
        y4 = self.b4_3(self.b4_2(self.b4_1(x)))

        outputsA = [y1, y2, y3, y4]
        return torch.cat(outputsA, 1)


class InceptionB(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InceptionB, self).__init__()
        # branch1: avgpool --> conv1*1(128)
        self.b1_1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=1)
        self.b1_2 = BasicConv2d(in_channels, 128, kernel_size=1)

        # branch2: conv1*1(384)
        self.b2 = BasicConv2d(in_channels, 384, kernel_size=1)

        # branch3: conv1*1(192) --> conv1*7(224) --> conv1*7(256)
        self.b3_1 = BasicConv2d(in_channels, 192, kernel_size=1)
        self.b3_2 = BasicConv2d(192, 224, kernel_size=(1, 7), padding=(0, 3))
        self.b3_3 = BasicConv2d(224, 256, kernel_size=(1, 7), padding=(0, 3))

        # branch4: conv1*1(192) --> conv1*7(192) --> conv7*1(224) --> conv1*7(224) --> conv7*1(256)
        self.b4_1 = BasicConv2d(in_channels, 192, kernel_size=1, stride=1)
        self.b4_2 = BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3))
        self.b4_3 = BasicConv2d(192, 224, kernel_size=(7, 1), padding=(3, 0))
        self.b4_4 = BasicConv2d(224, 224, kernel_size=(1, 7), padding=(0, 3))
        self.b4_5 = BasicConv2d(224, 256, kernel_size=(7, 1), padding=(3, 0))

    def forward(self, x):
        y1 = self.b1_2(self.b1_1(x))
        y2 = self.b2(x)
        y3 = self.b3_3(self.b3_2(self.b3_1(x)))
        y4 = self.b4_5(self.b4_4(self.b4_3(self.b4_2(self.b4_1(x)))))

        outputsB = [y1, y2, y3, y4]
        return torch.cat(outputsB, 1)


class InceptionC(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InceptionC, self).__init__()
        # branch1: avgpool --> conv1*1(256)
        self.b1_1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=1)
        self.b1_2 = BasicConv2d(in_channels, 256, kernel_size=1)

        # branch2: conv1*1(256)
        self.b2 = BasicConv2d(in_channels, 256, kernel_size=1)

        # branch3: conv1*1(384) --> conv1*3(256) & conv3*1(256)
        self.b3_1 = BasicConv2d(in_channels, 384, kernel_size=1)
        self.b3_2_1 = BasicConv2d(384, 256, kernel_size=(1, 3), padding=(0, 1))
        self.b3_2_2 = BasicConv2d(384, 256, kernel_size=(3, 1), padding=(1, 0))

        # branch4: conv1*1(384) --> conv1*3(448) --> conv3*1(512) --> conv3*1(256) & conv7*1(256)
        self.b4_1 = BasicConv2d(in_channels, 384, kernel_size=1, stride=1)
        self.b4_2 = BasicConv2d(384, 448, kernel_size=(1, 3), padding=(0, 1))
        self.b4_3 = BasicConv2d(448, 512, kernel_size=(3, 1), padding=(1, 0))
        self.b4_4_1 = BasicConv2d(512, 256, kernel_size=(3, 1), padding=(1, 0))
        self.b4_4_2 = BasicConv2d(512, 256, kernel_size=(1, 3), padding=(0, 1))

    def forward(self, x):
        y1 = self.b1_2(self.b1_1(x))
        y2 = self.b2(x)
        y3_1 = self.b3_2_1(self.b3_1(x))
        y3_2 = self.b3_2_2(self.b3_1(x))
        y4_1 = self.b4_4_1(self.b4_3(self.b4_2(self.b4_1(x))))
        y4_2 = self.b4_4_2(self.b4_3(self.b4_2(self.b4_1(x))))

        outputsC = [y1, y2, y3_1, y3_2, y4_1, y4_2]
        return torch.cat(outputsC, 1)


class ReductionA(nn.Module):
    def __init__(self, in_channels, out_channels, k, l, m, n):
        super(ReductionA, self).__init__()
        # branch1: maxpool3*3(stride2 valid)
        self.b1 = nn.MaxPool2d(kernel_size=3, stride=2)

        # branch2: conv3*3(n stride2 valid)
        self.b2 = BasicConv2d(in_channels, n, kernel_size=3, stride=2)

        # branch3: conv1*1(k) --> conv3*3(l) --> conv3*3(m stride2 valid)
        self.b3_1 = BasicConv2d(in_channels, k, kernel_size=1)
        self.b3_2 = BasicConv2d(k, l, kernel_size=3, padding=1)
        self.b3_3 = BasicConv2d(l, m, kernel_size=3, stride=2)

    def forward(self, x):
        y1 = self.b1(x)
        y2 = self.b2(x)
        y3 = self.b3_3(self.b3_2(self.b3_1(x)))

        outputsRedA = [y1, y2, y3]
        return torch.cat(outputsRedA, 1)


class ReductionB(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ReductionB, self).__init__()
        # branch1: maxpool3*3(stride2 valid)
        self.b1 = nn.MaxPool2d(kernel_size=3, stride=2)

        # branch2: conv1*1(192) --> conv3*3(192 stride2 valid)
        self.b2_1 = BasicConv2d(in_channels, 192, kernel_size=1)
        self.b2_2 = BasicConv2d(192, 192, kernel_size=3, stride=2)

        # branch3: conv1*1(256) --> conv1*7(256) --> conv7*1(320) --> conv3*3(320 stride2 valid)
        self.b3_1 = BasicConv2d(in_channels, 256, kernel_size=1)
        self.b3_2 = BasicConv2d(256, 256, kernel_size=(1, 7), padding=(0, 3))
        self.b3_3 = BasicConv2d(256, 320, kernel_size=(7, 1), padding=(3, 0))
        self.b3_4 = BasicConv2d(320, 320, kernel_size=3, stride=2)

    def forward(self, x):
        y1 = self.b1(x)
        y2 = self.b2_2(self.b2_1((x)))
        y3 = self.b3_4(self.b3_3(self.b3_2(self.b3_1(x))))

        outputsRedB = [y1, y2, y3]
        return torch.cat(outputsRedB, 1)


class Stem(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Stem, self).__init__()
        # conv3*3(32 stride2 valid)
        self.conv1 = BasicConv2d(in_channels, 32, kernel_size=3, stride=2)
        # conv3*3(32 valid)
        self.conv2 = BasicConv2d(32, 32, kernel_size=3)
        # conv3*3(64)
        self.conv3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
        # maxpool3*3(stride2 valid) & conv3*3(96 stride2 valid)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv4 = BasicConv2d(64, 96, kernel_size=3, stride=2)

        # conv1*1(64) --> conv3*3(96 valid)
        self.conv5_1_1 = BasicConv2d(160, 64, kernel_size=1)
        self.conv5_1_2 = BasicConv2d(64, 96, kernel_size=3)
        # conv1*1(64) --> conv7*1(64) --> conv1*7(64) --> conv3*3(96 valid)
        self.conv5_2_1 = BasicConv2d(160, 64, kernel_size=1)
        self.conv5_2_2 = BasicConv2d(64, 64, kernel_size=(7, 1), padding=(3, 0))
        self.conv5_2_3 = BasicConv2d(64, 64, kernel_size=(1, 7), padding=(0, 3))
        self.conv5_2_4 = BasicConv2d(64, 96, kernel_size=3)

        # conv3*3(192 valid)
        self.conv6 = BasicConv2d(192, 192, kernel_size=3, stride=2)
        # maxpool3*3(stride2 valid)
        self.maxpool6 = nn.MaxPool2d(kernel_size=3, stride=2)

    def forward(self, x):
        y1_1 = self.maxpool4(self.conv3(self.conv2(self.conv1(x))))
        y1_2 = self.conv4(self.conv3(self.conv2(self.conv1(x))))
        y1 = torch.cat([y1_1, y1_2], 1)

        y2_1 = self.conv5_1_2(self.conv5_1_1(y1))
        y2_2 = self.conv5_2_4(self.conv5_2_3(self.conv5_2_2(self.conv5_2_1(y1))))
        y2 = torch.cat([y2_1, y2_2], 1)

        y3_1 = self.conv6(y2)
        y3_2 = self.maxpool6(y2)
        y3 = torch.cat([y3_1, y3_2], 1)

        return y3


class MyInceptionV4(nn.Module):
    def __init__(self, num_classes):
        super(MyInceptionV4, self).__init__()
        self.stem = Stem(3, 384)
        self.icpA = InceptionA(384, 384)
        self.redA = ReductionA(384, 1024, 192, 224, 256, 384)
        self.icpB = InceptionB(1024, 1024)
        self.redB = ReductionB(1024, 1536)
        self.icpC = InceptionC(1536, 1536)
        self.avgpool = nn.AvgPool2d(kernel_size=8)
        self.dropout = nn.Dropout(p=0.8)
        self.linear = nn.Linear(1536, out_features=num_classes)

    def forward(self, x):
        # Stem Module
        out = self.stem(x)
        # InceptionA Module * 4
        out = self.icpA(self.icpA(self.icpA(self.icpA(out))))
        # ReductionA Module
        out = self.redA(out)
        # InceptionB Module * 7
        out = self.icpB(self.icpB(self.icpB(self.icpB(self.icpB(self.icpB(self.icpB(out)))))))
        # ReductionB Module
        out = self.redB(out)
        # InceptionC Module * 3
        out = self.icpC(self.icpC(self.icpC(out)))
        # Average Pooling
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        # Dropout
        out = self.dropout(out)
        # Linear(Softmax)
        out = self.linear(out)

        return out

# def test():
#     x = torch.randn(20, 3, 299, 299)
#     net = MyInceptionV4(num_classes=5)
#     y = net(x)
#     print(y.size())
# test()

三、模型训练代码

python 复制代码
import time
import torch
from torch import nn
import os
from MyInceptionV4 import MyInceptionV4 as Model
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from tqdm import tqdm



os.environ["PYTORCH_CUDA_ALLOC_CONF"]="expandable_segments:True,max_split_size_mb:64"


def WriteData(fname, *args):
    with open(fname, 'a+') as f:
        for data in args:
            f.write(str(data)+"\t")
        f.write("\n")

def train(dataloader, model, loss_fn, optimizer, device):
    model.train()
    size = len(dataloader.dataset)
    avg_loss = 0

    # 从数据加载器中读取batch(一次读取多少张,即批次数),X(图片数据),y(图片真实标签)
    time_start = time.time()
    for batch,(X, y) in enumerate(dataloader): #固定格式:batchL第几批数据,不是批次大小,(X,y):数值用括号
        # 将数据存储到显卡
        X, y = X.to(device), y.to(device)
        # 得到预测的结果pred
        out = model(X)
        loss = loss_fn(out, y)
        avg_loss += loss # 一个batch的数据
        #反向传播,更新模型参数
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # 每次训练10次,输出一次当前信息
        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"Current Batch Training Loss:{loss:>5f} [{current:>5d}/{size:>5d}]")

    # 当一个epoth玩了后返回平均 loss
    avg_loss /= size
    avg_loss = avg_loss.detach().cpu().numpy()

    time_end = time.time()
    print(f"train time:{(time_end - time_start):>0.2f} Avg Loss ={avg_loss:>5f}")
    return avg_loss

def validate(dataloader, model, loss_fn, device):
    size = len(dataloader.dataset)
    # 将模型转为验证模式
    model.eval()
    # 初始化 test_loss 和 correct 用来统计每次的误差
    test_loss, correct = 0, 0
    # 测试时模型参数不用跟新,所以 no_gard()
    # 非训练,推理期用到
    with torch.no_grad():
        # 加载数据加载器,得到里面的X(图片数据) 和 y(真实标签)
        for X, y in tqdm(dataloader):
            # 将数据转到GPU
            X, y = X.to(device), y.to(device)
            # 将图片传入到模型当中就得到预测的值pred
            pred = model(X)
            # 计算预测值pred和真实值y的差距
            test_loss += loss_fn(pred, y).item()
            # 统计预测正确的个数(针对分类)
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"correct = {correct}, Test Error: \n Accuracy: {(100 * correct):>0.5f}%, Avg loss:{test_loss:>0.5f} \n")
    return correct, test_loss

if __name__=='__main__':
    '''
    加载数据集
    '''
    train_root = "dataset/dataset_train"
    test_root = "dataset/dataset_test"

    train_tf = transforms.Compose([
        transforms.Resize((299, 299)),
        transforms.RandomVerticalFlip(), #对图片进行随机的水平翻转
        transforms.ToTensor() # 把图片改为Tenser格式
    ])

    test_tf = transforms.Compose([
        transforms.Resize((299, 299)),
        transforms.ToTensor()  # 把图片改为Tenser格式
    ])

    batch_size = 32
    train_data = ImageFolder(root=train_root, transform=train_tf)
    train_loader = DataLoader(dataset=train_data, batch_size=batch_size, pin_memory=True, num_workers=4, shuffle=True)

    test_data = ImageFolder(root=test_root, transform=test_tf)
    test_loader = DataLoader(dataset=test_data, batch_size=batch_size, pin_memory=True, num_workers=4, shuffle=True)

    # 如果显卡可用,则用显卡训练
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # device = "cpu"
    print(f"Using {device} device")

    if hasattr(torch.cuda, 'empty_cache'):
        torch.cuda.empty_cache()
    model = Model(num_classes=5)
    model = model.to(device)

    # 定义损失函数,计算相差多少,交叉熵
    loss_fn = nn.CrossEntropyLoss()

    # 定义优化器,用来训练时候优化模型参数,随机梯度下降法
    learning_rate = 1e-4
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    epochs = 40
    loss_ = 10
    save_root = "Model_result/My_Inception_v4/"

    if not os.path.exists(save_root):
        os.makedirs(save_root)

    for t in range(epochs):
        print(f"Epoth {t+1}\n--------------------------------")
        avg_loss = train(train_loader, model, loss_fn, optimizer, device)

        val_accuracy, val_loss = validate(test_loader, model, loss_fn, device)
        # 写入数据
        WriteData(save_root + "My_Inception_v4.txt",
                  "epoch", t,
                  "train_loss", avg_loss,
                  "val_loss", val_loss,
                  "val_accuracy", val_accuracy)
        if t % 5 == 0:
            torch.save(model.state_dict(), save_root+"My_Inception_v4_epoch" + str(t) + "_loss_" + str(avg_loss) + ".pth")

        torch.save(model.state_dict(), save_root + "My_Inception_v4_last.pth")

        if val_loss < loss_:
            loss_ = val_loss
            torch.save(model.state_dict(), save_root + "My_Inception_v4_best.pth")

四、分类测试代码

python 复制代码
'''
单图测试
'''

import torch
from MyInceptionV4 import MyInceptionV4
from PIL import Image
import torchvision.transforms as transforms
import os


if __name__=='__main__':
    img_path = r"dataset/dataset_train/f3/image_00581.jpg"

    test_tf = transforms.Compose([
        transforms.Resize(((299, 299))),
        transforms.ToTensor()
    ])

    # 如果显卡可用,则用显卡训练
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using {device} device")

    model = MyInceptionV4(num_classes=5)
    model = model.to(device)
    state_dict = torch.load(r"Model_result/My_Inception_v4/My_Inception_v4_best.pth")

    model.load_state_dict(state_dict)
    model.eval()
    with torch.no_grad():
        img = Image.open(img_path) #打开图片
        img = img.convert('RGB') #转换为RGB格式
        img = test_tf(img)
        img_tensor = torch.unsqueeze(img, 0) # C,H,W(通道,高,宽) 转为 N,C,H,W
        img_tensor = img_tensor.to(device)
        result = model(img_tensor)

        id = result.argmax(1).item()

        file_list = []
        for a, b, c in os.walk("dataset/dataset_train"):
            if len(b) != 0:
                file_list = b
                print("InveptionV4 对输入的图片预测的结果为:", file_list[id])
相关推荐
AwesomeCPA9 分钟前
果蔬识别系统性能优化之路(四)
前端·人工智能·python·性能优化·tensorflow
nice-wyh9 分钟前
ORB-SLAM2关键点总结
人工智能·计算机视觉
CaiYongji13 分钟前
深度!程序员生涯的垃圾时间(上)
人工智能·gpt·chatgpt·openai
h1771134720514 分钟前
交友系统“陌陌”全方位解析
大数据·人工智能·微信小程序·小程序·回归
h1771134720523 分钟前
定制相亲交友系统如何提升用户体验
大数据·开发语言·人工智能·小程序·系统开发
逐梦苍穹30 分钟前
速通GPT:Improving Language Understanding by Generative Pre-Training全文解读
论文阅读·人工智能·gpt·语言模型·论文笔记
为为-180-3121-145534 分钟前
“AI大语言模型+”助力大气科学相关交叉领域实践技术应用
人工智能·语言模型·自然语言处理
文艺倾年40 分钟前
【大模型专栏—进阶篇】语言模型创新大总结——“三派纷争”
人工智能·pytorch·语言模型·自然语言处理·大模型
陈敬雷-充电了么-CEO兼CTO43 分钟前
自然语言处理系列六十八》搜索引擎项目实战》搜索引擎系统架构设计
人工智能·gpt·搜索引擎·ai·自然语言处理·chatgpt·aigc
⊙月44 分钟前
CMU 10423 Generative AI:lec5(Encoder-only Transformers + 阅读材料Bert, ViT)
人工智能·深度学习·aigc·bert