1.数据处理
1.1 自己实现一个数据集类
主要目的让他成为一个Dataset的类即可,有没有里面的操作无所谓
包含了加载和处理这些图片所需的所有信息和指令,也即管理数据的数据员,继承Dataset, 是一个数据集管理员,专门为模型的训练和验证服务
python
#定义数据集类
class CustomDataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None, is_test = False):
super().__init__()
self.data = csv_file
self.root_dir = root_dir
self.transform = transform
self.is_test = is_test
def classes(self):
return self.data.iloc[:, 1].unique().tolist() #取csv_file数据中的第二列(索引为 1),这通常是存放标签的列。unique()去除重复项,tolist()将结果转换成一个列表。
def __len__(self):
return len(self.data)
#获取单个数据样本
def __getitem__(self, idx):
img_path = os.path.join(self.root_dir, self.data.iloc[idx, 0])
image = Image.open(img_path)
if self.transform:
image = self.transform(image)
if self.is_test:
return image
label = self.data.iloc[idx, 2]
label = torch.tensor(label)
return image, label
1.2 定义一个加载数据的函数
1.读入文件目录的文件csv格式
2.对数据进行一些变换
3.并且把训练数据集按9:1分成训练和测试集,并且进行随机划分
4.将自定义的CustomDataset数据集类按照batch_size用DataLoader加载进来,加载后进行合并成一个数据加载器dataloaders
python
#加载数据集和数据增强,数据处理
def load_dataset(batch_size, img_size):
train_transforms = transforms.Compose([
transforms.Resize(img_size),#将所有图片调整到统一的尺寸
transforms.RandomHorizontalFlip(),#以 50% 的概率对图片进行随机水平翻转
transforms.RandomVerticalFlip(),#以 50% 的概率对图片进行随机垂直翻转
transforms.ToTensor(),#从 PIL 格式转换成 PyTorch 的 Tensor 格式,并把像素值从 0-255 缩放到 0-1 之间
])
test_transforms = transforms.Compose([
transforms.Resize(img_size),
transforms.ToTensor(),
])
#读取数据集
root_folder = "/kaggle/input/classify-leaves"
train_csv = pd.read_csv("/kaggle/input/classify-leaves/train.csv")
test_csv = pd.read_csv("/kaggle/input/classify-leaves/test.csv")
#标签编码,str到int
leaves_labels = train_csv.iloc[:, 1].unique()#找出所有不重复的类别名称
n_classes = len(leaves_labels)
class_to_num = dict(zip(leaves_labels, range(n_classes))) #创建从"文字"到"数字"的映射字典
train_csv['labelEncoded'] = train_csv.iloc[:, 1].map(class_to_num)#应用映射,生成新的数字标签列
full_dataset = CustomDataset(train_csv, root_folder, transform=train_transforms)
predict_dataset = CustomDataset(test_csv, root_folder, transform=test_transforms, is_test=True)
#分开数据集
train_size = int(0.9*len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])#随机划分
#数据加载
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle=True)
dataloaders = {'train':train_dataloader, 'test':test_dataloader}
print(f"train_size:{train_size},test_size:{test_size}")
print(full_dataset.classes)
return dataloaders, full_dataset.classes, predict_dataset
可选:用matlab打印一些图片来看文件是否是正确的读取了
自己写一个函数想啥时候打印就打印来看
python
#健全性检查
import matplotlib.pyplot as plt
def display_data(dataloader, class_names):
#获取一个batch的图像和标签从数据加载器
images, labels = next(iter(dataloader["train"]))
#展示前5张图片
fig, axes = plt.subplots(1, 5, figsize=(15, 3))#创建一个包含 1行5列 子图的网格
for i in range(5):
axes[i].imshow(images[i].permute(1, 2, 0)) #改变维度顺序(C,H,W)到(H,W,C)
axes[i].set_title(class_names[labels[i]])
axes[i].axis('off')#关闭坐标轴的显示
plt.show()
2.网络模块:这里使用resnet18
2.1resnet残差块
自己实现,继承nn.Module模块,一个残差块两个卷积层
python
import torch
from torch import nn
from torch.nn import functional as F
#残差块
class Residual(nn.Module):
def __int__(self, input_channels, num_channels, use_1x1conv=False, strides=1):
super().__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1, stride=strides)
self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels, num_channels, kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
def forward(self, x):
y = F.relu(self.bn1(self.conv1(x)))
y = self.bn2(self.conv2(y))
if self.conv3:
x = self.conv3(x)
y += x
return F.relu(y)
2.2 将残差的每个层合成一个大的block
因为resnet18,有四个大块,每个大块里面有很多个重复的残差块组成
python
#残差所有层
def resent_block(input_channels, num_channels, num_residuals, first_block=False):
block = []
for i in range(num_residuals):
if i == 0 and not first_block:
block.append(Residual(input_channels, num_channels, use_1x1conv = True, strides = 2))
else :
block.append(Residual(num_channels, num_channels))
return block
2.3 resnet18的实现
每个小的残差块两个卷积层,(3+2+2+2)*2=18层
python
#残差18
def resNet_18(n_classes):
b1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b2 = nn.Sequential(*resent_block(64, 64, 3, first_block=True))
b3 = nn.Sequential(*resent_block(64, 128, 2))
b4 = nn.Sequential(*resent_block(128, 256, 2))
b5 = nn.Sequential(*resent_block(256, 512, 2))
return nn.Sequential(b1, b2, b3, b4, b5,
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(), nn.Linear(512, n_classes))
可以打印每一层看看形状
python
net = resNet_18(n_classes=100)
x = torch.rand(size=(1, 3, 256, 256))
for layer in net:
x = layer(x)
print(layer.__class__.__name__,'output shape:\t', x.shape)
3.定义指标
这里只计算准确率
python
#计算准确率
def calculate_accuracy(model, data_loader, device):
correct = 0
total = 0
with torch.no_grad():
for data, target in data_loader:
data, target = data.to(device), target.to(device)
output = model(data)
#它会返回两个值:第一个是最大值本身(我们不需要,所以用下划线 _ 忽略掉),第二个是最大值所在的索引 (index)
_, predicted = torch.max(output.data, 1)#沿着维度1(即按行)查找每一行的最大值
total += target.size(0)#加上当前批次中的样本数量
correct += (predicted==target).sum().item()
return correct/total
4.封装训练train函数
1.遍历每个epoch,把训练的数据放进去得到一个预测标签
2.然后调用损失函数计算损失
3.在求损失函数的梯度
4.利用优化器来按照梯度来更新模型的参数
5.最好调用自己写的准确率函数来计算准确率
python
#定义训练函数
import torch.optim as optim
def train(model, dataloaders, epochs, criterion, optimizer):
for epoch in range(epochs):
for batch_idx, (data, target) in enumerate(dataloaders['train']):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)#损失函数 (criterion) 会比较模型的预测 output 和真实的标签 target,计算出一个 loss 值
loss.backward()#据损失值 loss 自动计算出模型中每个参数(权重)的梯度
optimizer.step()#优化器 (optimizer) 会根据上一步计算出的梯度信息,来更新模型的所有参数
if(batch_idx) % 100 ==0:
train_accuracy = calculate_accuracy(model, dataloaders['train'], device)
test_accuracy = calculate_accuracy(model, dataloaders['test'], device)
#在当前Epoch 内的处理进度(例如"处理了3200/18346 张图片"
print(f"eopch:{epoch}",
f"[{batch_idx*batch_size}/{len(dataloaders['train'].dataset)} ({100.*batch_idx/len(dataloaders['train']):.0f}%)]\t"
f"loss:{loss.item():.6f}\t"
f"train accuracy:{100. * train_accuracy:.2f}%\t"
f"test accuracy:{100. * test_accuracy:.2f}%")
可选:gpu运算
python
#gpu训练
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
print('Training on CPU')
else :
print('Training on GPU')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
5.最后运行整个项目
1.给一下超参数赋值,然后调用自己写的数据加载函数来加载数据变成dataloaders
2.调用自己写的函数来创建模型resnet18,可以展示一些数据来观看,
3.定义损失函数和优化器
4.最后调用自己写的封装train函数进行训练
python
#resnet18
batch_size = 128
img_size = 48
epochs = 50
dataloaders, class_names, predict_dataset = load_dataset(batch_size=128, img_size=256)
model = resNet_18(len(class_names))
model.to(device)
display_data(dataloaders, class_names)
criterion = nn.CrossEntropyLoss() #nn.CrossEntropyLoss包含softmax
optimizer = optim.Adam(model.parameters(), lr = 0.001)#sdg
train(model, dataloaders, epochs, criterion, optimizer)