PyTorch训练关键点

1.背景

在网上找了一些资料用来训练关键点,一般都是人脸或者车牌关键点训练,或者是联合检测一起训练。很少有是单独基于轻量级网络训练单独关键点模型的工程,本文简单介绍一种简单方法和代码。

2.代码模块

(1)网络结构

文件:model.py

import torch.nn as nn

import torch

import torch.nn.functional as F

import torch.nn.init as init

class Fire(nn.Module):

def init(self, inplanes, squeeze_planes,

expand1x1_planes, expand3x3_planes):

super(Fire, self).init()

self.inplanes = inplanes

self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)

self.squeeze_activation = nn.ReLU(inplace=True)

self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,

kernel_size=1)

#self.expand1x1_activation = nn.ReLU(inplace=True)

self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,

kernel_size=3, padding=1)

#self.expand3x3_activation = nn.ReLU(inplace=True)

def forward(self, x):

x = self.squeeze_activation(self.squeeze(x))

return torch.cat([

self.expand1x1(x),

self.expand3x3(x)

], 1)

class RegressNet(nn.Module):

def init(self,version=1.0,export=False):

super(RegressNet, self).init()

if version not in [1.0, 1.1]:

raise ValueError("Unsupported RegressNet version {version}:"

"1.0 or 1.1 expected".format(version=version))

self.export = export

print(version)

if version == 1.0:

self.features = nn.Sequential(

nn.Conv2d(3, 16, kernel_size=3,padding=(1,1), stride=1),

nn.ReLU(inplace=True),

nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),

Fire(16, 16, 32, 32),

nn.ReLU(inplace=True),

nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),

Fire(64, 32, 32, 32),

nn.ReLU(inplace=True),

nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),

Fire(64, 32, 64, 64),

nn.ReLU(inplace=True),

Fire(128, 32, 64, 64),

nn.ReLU(inplace=True),

nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),

nn.Conv2d(128, 128, kernel_size=3,padding=(0,0), stride=2),

)

else:

self.features = nn.Sequential(

nn.Conv2d(3, 64, kernel_size=3, stride=2),

nn.ReLU(inplace=True),

nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),

Fire(64, 16, 64, 64),

Fire(128, 16, 64, 64),

nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),

Fire(128, 32, 128, 128),

Fire(256, 32, 128, 128),

nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),

Fire(256, 48, 192, 192),

Fire(384, 48, 192, 192),

Fire(384, 64, 256, 256),

Fire(512, 64, 256, 256),

)

Final convolution is initialized differently form the rest

#final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)

#self.classifier = nn.Sequential(

nn.Dropout(p=0.5),

final_conv,

nn.ReLU(inplace=True),

nn.AdaptiveAvgPool2d((1, 1))

#)

self.fc= nn.Linear(128,8)

MAE_Loss = torch.nn.L1Loss()

self.loss = MAE_Loss

for m in self.modules():

if isinstance(m, nn.Conv2d):

init.kaiming_uniform_(m.weight)

if m.bias is not None:

init.constant_(m.bias, 0)

def forward(self, x):

x = self.features(x)

#x = x.squeeze()

#x = x.flatten(0)

x=x.view(-1,128)#使用view函数

x = self.fc(x)

#print(x)

return x

(2)训练工程

文件:train.py 以训练四个关键点为例

import numpy as np

from math import radians, cos, sin

import torchvision.transforms as transforms

import torchvision.transforms.functional as TF

#import imutils

import torch

from PIL import Image

import random

import cv2

import xml.etree.ElementTree as ET

from torch.utils.data import Dataset

import os

import torch.nn as nn

import torchvision.models as models

import torch.nn.functional as F

import torch.nn.init as init

import torch.optim as optim

import time

from tqdm import tqdm

from model import RegressNet

class Transforms():

def init(self):

pass

def rotate(self, image, landmarks, angle):

随机生成一个在 -angle 到 +angle 范围内的旋转角度

angle = random.uniform(-angle, +angle)

基于二维平面上的旋转变换的数学特性构建旋转矩阵

transformation_matrix = torch.tensor([

[+cos(radians(angle)), -sin(radians(angle))],

[+sin(radians(angle)), +cos(radians(angle))]

])

对图像进行旋转:相比于 PIL 的图像旋转计算开销更小

image = imutils.rotate(np.array(image), angle)

将关键点坐标中心化:简化旋转变换的计算,同时确保关键点的变换和图像变换的对应关系

landmarks = landmarks - 0.5

将关键点坐标应用旋转矩阵

new_landmarks = np.matmul(landmarks, transformation_matrix)

恢复关键点坐标范围

new_landmarks = new_landmarks + 0.5

return Image.fromarray(image), new_landmarks

def resize(self, image, landmarks, img_size):

调整图像大小

image = TF.resize(image, img_size)

return image, landmarks

def color_jitter(self, image, landmarks):

定义颜色调整的参数:亮度、对比度、饱和度和色调

color_jitter = transforms.ColorJitter(brightness=0.3,

contrast=0.3,

saturation=0.3,

hue=0.1)

对图像进行颜色调整

image = color_jitter(image)

return image, landmarks

def crop_face(self, image, landmarks, crops):

获取裁剪参数

left = int(crops['left'])

top = int(crops['top'])

width = int(crops['width'])

height = int(crops['height'])

对图像进行裁剪

image = TF.crop(image, top, left, height, width)

获取裁剪后的图像形状

img_shape = np.array(image).shape

对关键点坐标进行裁剪后的调整

landmarks = torch.tensor(landmarks) - torch.tensor([[left, top]])

归一化关键点坐标

landmarks = landmarks / torch.tensor([img_shape[1], img_shape[0]])

return image, landmarks

def call(self, image, landmarks):

将图像从数组转换为 PIL 图像对象

image = Image.fromarray(image)

裁剪图像并调整关键点

调整图像大小

image, landmarks = self.resize(image, landmarks, (64, 64))

对图像进行颜色调整

image, landmarks = self.color_jitter(image, landmarks)

对图像和关键点进行旋转变换

#image, landmarks = self.rotate(image, landmarks, angle=10)

将图像从 PIL 图像对象转换为 Torch 张量

image = TF.to_tensor(image)

标准化图像像素值

image = TF.normalize(image, [0.5], [0.5])

return image, landmarks

(3)dataset定义,数据长度为8 x1,y1,x2,y2,x3,y3,x4,y4

#标签排列规则

XXX.jpg x1/width y1/height x2/width y2/height x3/width y3/height x4/width y4/height

class FaceLandmarksDataset(Dataset):

def init(self, transform=None):

#root = os.listdir(r"C:/")

with open(r"C:\DL_Work\test_pics\path.txt", 'r', encoding="utf-8") as r:

root = r.readlines()

初始化变量

self.image_filenames = []

self.landmarks = []

self.crops = []

self.transform = transform

self.root_dir = r'C:\DL_Work\test_pics/'

遍历 XML 数据:root[2] 表示 XML 中的第三个元素,即 <images> 部分,其中包含了每张图像的标注信息

for filename in root:

pic_path = filename.split(" ")[0]

self.image_filenames.append(os.path.join(self.root_dir, pic_path))

#self.crops.append(filename)

landmark = []

for num in range(4):

x_coordinate = int( filename.split(" ")[num*2+1])

y_coordinate = int(filename.split(" ")[num*2+2])

landmark.append([x_coordinate, y_coordinate])

self.landmarks.append(landmark)

self.landmarks = np.array(self.landmarks).astype('float32')

assert len(self.image_filenames) == len(self.landmarks)

def len(self):

return len(self.image_filenames)

def getitem(self, index):

读取图像以及关键点坐标

image = cv2.imread(self.image_filenames[index]) # 以彩色模式读取图像

image = cv2.imread(self.image_filenames[index], 0) # 以灰色模式读取图像

landmarks = self.landmarks[index]

if self.transform:

如果存在预处理变换,应用变换

image, landmarks = self.transform(image, landmarks)

landmarks = landmarks - 0.5 # 进行中心化操作

return image, landmarks

创建数据集对象,并应用预处理变换

dataset = FaceLandmarksDataset(Transforms())

len_valid_set = int(0.1 * len(dataset))

len_train_set = len(dataset) - len_valid_set

#print("The length of Train set is {}".format(len_train_set))

#print("The length of Valid set is {}".format(len_valid_set))

train_dataset, valid_dataset, = torch.utils.data.random_split(dataset, [len_train_set, len_valid_set])

shuffle and batch the datasets

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=1)

valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=True, num_workers=1)

(4)train

def train():

记录每个 epoch 的训练和验证损失

train_losses = []

valid_losses = []

设置设备

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.autograd.set_detect_anomaly(True)

#network = Network().to(device)

network = RegressNet().to(device)

criterion = nn.MSELoss()

optimizer = optim.Adam(network.parameters(), lr=0.0001)

loss_min = np.inf

num_epochs = 10

start_time = time.time()

for epoch in range(1, num_epochs + 1):

loss_train = 0

loss_valid = 0

running_loss = 0

network.train()

for step in tqdm(range(1, len(train_loader) + 1)):

images, landmarks = next(iter(train_loader))

images = images.to(device)

landmarks = landmarks.view(landmarks.size(0), -1).to(device)

predictions = network(images)

optimizer.zero_grad()

loss_train_step = criterion(predictions, landmarks)

loss_train_step.backward()

optimizer.step()

loss_train += loss_train_step.item()

running_loss = loss_train / step

network.eval()

with torch.no_grad():

for step in range(1, len(valid_loader) + 1):

images, landmarks = next(iter(valid_loader))

images = images.to(device)

landmarks = landmarks.view(landmarks.size(0), -1).to(device)

predictions = network(images)

loss_valid_step = criterion(predictions, landmarks)

loss_valid += loss_valid_step.item()

running_loss = loss_valid / step

loss_train /= len(train_loader)

loss_valid /= len(valid_loader)

train_losses.append(loss_train)

valid_losses.append(loss_valid)

print('\n--------------------------------------------------')

print('Epoch: {} Train Loss: {:.4f} Valid Loss: {:.4f}'.format(epoch, loss_train, loss_valid))

print('--------------------------------------------------')

if loss_valid < loss_min:

loss_min = loss_valid

torch.save(network.state_dict(), 'plate_landmark.pth')

print("\nMinimum Validation Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))

print('Model Saved\n')

print('Training Complete')

print("Total Elapsed Time: {} s".format(time.time() - start_time))

if name == 'main':

train()

3.导出onnx

#export.py

import torch

import torch.nn

import onnx

from onnxsim import simplify

from model import RegressNet

#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device = torch.device('cpu')

model = RegressNet()

model_statedict = torch.load(r'./plate_landmark.pth', map_location=device)

#model.eval()

model.load_state_dict(model_statedict)

input_names = ['input0']

output_names = ['output0']

x = torch.randn(1, 3, 64, 64, device=device)

torch.onnx.export(model, x, 'plate_landmark.onnx', opset_version=11, verbose=True, input_names=input_names, output_names = output_names,dynamic_axes={'input0': {0: 'batch'},

'output0': {0: 'batch'}

})

onnx_model = onnx.load("plate_landmark.onnx")# 简化模型

simplified_model, check = simplify(onnx_model)# 保存简化后的模型

onnx.save_model(simplified_model, "plate_landmark_sim.onnx")

相关推荐
成富8 分钟前
文本转SQL(Text-to-SQL),场景介绍与 Spring AI 实现
数据库·人工智能·sql·spring·oracle
凤枭香21 分钟前
Python OpenCV 傅里叶变换
开发语言·图像处理·python·opencv
CSDN云计算22 分钟前
如何以开源加速AI企业落地,红帽带来新解法
人工智能·开源·openshift·红帽·instructlab
测试杂货铺28 分钟前
外包干了2年,快要废了。。
自动化测试·软件测试·python·功能测试·测试工具·面试·职场和发展
艾派森32 分钟前
大数据分析案例-基于随机森林算法的智能手机价格预测模型
人工智能·python·随机森林·机器学习·数据挖掘
hairenjing112334 分钟前
在 Android 手机上从SD 卡恢复数据的 6 个有效应用程序
android·人工智能·windows·macos·智能手机
小蜗子39 分钟前
Multi‐modal knowledge graph inference via media convergenceand logic rule
人工智能·知识图谱
SpikeKing1 小时前
LLM - 使用 LLaMA-Factory 微调大模型 环境配置与训练推理 教程 (1)
人工智能·llm·大语言模型·llama·环境配置·llamafactory·训练框架
小码的头发丝、1 小时前
Django中ListView 和 DetailView类的区别
数据库·python·django
黄焖鸡能干四碗1 小时前
信息化运维方案,实施方案,开发方案,信息中心安全运维资料(软件资料word)
大数据·人工智能·软件需求·设计规范·规格说明书