目录
数据集
dogs Vs Cats
python
import time
import torch.nn as nn
import torch.optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
import os
import torch.nn
import torchvision
from glob import glob
import numpy as np
from torch._C._cudnn import is_cuda
from torchvision import transforms
from torchvision.datasets import ImageFolder
# 数据集的起始目录
path = "./dogs-vs-cats/"
# 通过glob函数获取 目录下的所有jpg文件 *是通配符
files = glob(os.path.join(path, '*/*.jpg'))
# 输出文件个数
print(f'Total no of images{len(files)}')
# 记录文件个数
no_of_images = len(files)
# 创建可以用于创建验证数据集的混合索引
shuffle = np.random.permutation(no_of_images)
# os.mkdir(os.path.join(path,'valid'))
# 在这里对训练集的数据进行分类
for t in ['test1']:
# 获取所有猫的图片
files1 = glob(os.path.join(path + t, "/cat*.jpg"))
print(files1)
# 获取所有狗的图片
files2 = glob(os.path.join(path + t, "/dog*.jpg"))
temp = 0
# 设置目录名
dirs = ['cat', 'dog']
# 根据目录 将对应图片分类存入
for file in [files1, files2]:
for sfile in file:
print(sfile)
folder1 = sfile.split("\\")[-1]
folder2 = sfile.split("\\")[0]
print(folder1, folder2)
# 通过rename修改文件的路径
os.rename(sfile, os.path.join(folder2, dirs[temp], folder1))
temp += 1
# 用于混合排列文件
shuffle=np.random.permutation(no_of_images )
# transforms进行变换和加载图片
simple_transform=transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
train=ImageFolder("dogs-vs-cats/train",simple_transform)
# 设置训练集的标签
train.class_to_idx={"cat":0,"dog":1}
train.classes=['cat','dog']
# 加载测试集合
test=ImageFolder("dogs-vs-cats/test1",simple_transform)
# 设置测试集合的标签
test.class_to_idx={"cat":0,"dog":1}
test.classes=['cat','dog']
# 将train和test数据加载到 数据加载器中
train_data_gen=torch.utils.data.DataLoader(train,batch_size=64,num_workers=0)
test_data_gen=torch.utils.data.DataLoader(test,batch_size=64,num_workers=0)
def train_model(model,criterion,optimizer,scheduler,num_epochs=25):
# 保留开始时间 方便后面计算
since=time.time()
best_model_wts=model.state_dict()
best_acc=0.0
for epoch in range(num_epochs):
print('Epoch{}/{}'.format(epoch,num_epochs-1))
print('-'*10)
# 每一轮都在训练和检验
for phase in ['train','test1']:
# 模型设置为训练模式
if phase == 'train':
scheduler.step()
model.train(True)
# 模型设置为评估模式
else:
model.train(False)
running_loss=0.0
running_correct=0
# 对数据加载器 进行迭代
for data in train_data_gen:
inputs,labels=data
if is_cuda:
inputs=Variable(inputs.cuda())
labels=Variable(labels.cuda())
else:
inputs,labels=Variable(inputs),Variable(labels)
# 梯度清零
optimizer.zero_grad();
# 向前
outputs=model(inputs)
_,preds=torch.max(outputs.data,1)
loss=criterion(outputs,labels)
# 训练的时候 反向优化
if phase=="train":
loss.backward()
optimizer.step()
scheduler.step()
# 统计
running_loss+=loss.item()
running_correct+=torch.sum(preds==labels.data)
import carAndDog
epoch_loss=running_loss/carAndDog.no_of_images
epoch_acc=running_correct/carAndDog.no_of_images
# 复刻模型
if phase=='test1' and epoch_acc >best_acc:
best_acc=epoch_acc
best_model_wts=model.state_dict()
print()
time_elapsed=time.time()-since
print('Training complete in {:.0f}m {:.0f}'.format(time_elapsed//60,time_elapsed%60))
print("Best val Acc:{:4f}".format(best_acc))
# 生成最优权重
model.load_state_dict(best_model_wts)
return model
# 创建算法实例
model_ft = torchvision.models.resnet18()
num_ftrs = model_ft.fc.in_features
# 配置全连接层
model_ft.fc = torch.nn.Linear(num_ftrs, 2)
# 使用GPU加速
if is_cuda:
model_ft = model_ft.cuda()
learning_rate=0.001
# 损失函数
criterion = nn.CrossEntropyLoss()
# 优化器
optimizer_ft=torch.optim.SGD(model_ft.parameters(),lr=0.001,momentum=0.9)
exp_lr_scheduler=lr_scheduler.StepLR(optimizer_ft,step_size=7,gamma=0.1)
# 传参开始训练
train_model(model_ft,criterion,optimizer_ft,exp_lr_scheduler)