一、早停策略
在训练模型时同时监控模型在验证集上的指标,若指标在验证集上不在变好,则终止训练。
python
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import time
import matplotlib.pyplot as plt
from tqdm import tqdm # 导入tqdm库用于进度条显示
import warnings
warnings.filterwarnings("ignore") # 忽略警告信息
# 设置GPU设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 加载鸢尾花数据集
iris = load_iris()
X = iris.data # 特征数据
y = iris.target # 标签数据
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 归一化数据
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 将数据转换为PyTorch张量并移至GPU
X_train = torch.FloatTensor(X_train).to(device)
y_train = torch.LongTensor(y_train).to(device)
X_test = torch.FloatTensor(X_test).to(device)
y_test = torch.LongTensor(y_test).to(device)
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.fc1 = nn.Linear(4, 10) # 输入层到隐藏层
self.relu = nn.ReLU()
self.fc2 = nn.Linear(10, 3) # 隐藏层到输出层
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
# 实例化模型并移至GPU
model = MLP().to(device)
# 分类问题使用交叉熵损失函数
criterion = nn.CrossEntropyLoss()
# 使用随机梯度下降优化器
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练模型
num_epochs = 20000 # 训练的轮数
# 用于存储每200个epoch的损失值和对应的epoch数
train_losses = [] # 存储训练集损失
test_losses = [] # 存储测试集损失
epochs = []
# ===== 新增早停相关参数 =====
best_test_loss = float('inf') # 记录最佳测试集损失
best_epoch = 0 # 记录最佳epoch
patience = 50 # 早停耐心值(连续多少轮测试集损失未改善时停止训练)
counter = 0 # 早停计数器
early_stopped = False # 是否早停标志
# ==========================
start_time = time.time() # 记录开始时间
# 创建tqdm进度条
with tqdm(total=num_epochs, desc="训练进度", unit="epoch") as pbar:
# 训练模型
for epoch in range(num_epochs):
# 前向传播
outputs = model(X_train) # 隐式调用forward函数
train_loss = criterion(outputs, y_train)
# 反向传播和优化
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
# 记录损失值并更新进度条
if (epoch + 1) % 200 == 0:
# 计算测试集损失
model.eval()
with torch.no_grad():
test_outputs = model(X_test)
test_loss = criterion(test_outputs, y_test)
model.train()
train_losses.append(train_loss.item())
test_losses.append(test_loss.item())
epochs.append(epoch + 1)
# 更新进度条的描述信息
pbar.set_postfix({'Train Loss': f'{train_loss.item():.4f}', 'Test Loss': f'{test_loss.item():.4f}'})
# ===== 新增早停逻辑 =====
if test_loss.item() < best_test_loss: # 如果当前测试集损失小于最佳损失
best_test_loss = test_loss.item() # 更新最佳损失
best_epoch = epoch + 1 # 更新最佳epoch
counter = 0 # 重置计数器
# 保存最佳模型
torch.save(model.state_dict(), 'best_model.pth')
else:
counter += 1
if counter >= patience:
print(f"早停触发!在第{epoch+1}轮,测试集损失已有{patience}轮未改善。")
print(f"最佳测试集损失出现在第{best_epoch}轮,损失值为{best_test_loss:.4f}")
early_stopped = True
break # 终止训练循环
# ======================
# 每1000个epoch更新一次进度条
if (epoch + 1) % 1000 == 0:
pbar.update(1000) # 更新进度条
# 确保进度条达到100%
if pbar.n < num_epochs:
pbar.update(num_epochs - pbar.n) # 计算剩余的进度并更新
time_all = time.time() - start_time # 计算训练时间
print(f'Training time: {time_all:.2f} seconds')
# ===== 新增:加载最佳模型用于最终评估 =====
if early_stopped:
print(f"加载第{best_epoch}轮的最佳模型进行最终评估...")
model.load_state_dict(torch.load('best_model.pth'))
# ================================
# 可视化损失曲线
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_losses, label='Train Loss')
plt.plot(epochs, test_losses, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Test Loss over Epochs')
plt.legend()
plt.grid(True)
plt.show()
# 在测试集上评估模型
model.eval()
with torch.no_grad():
outputs = model(X_test)
_, predicted = torch.max(outputs, 1)
correct = (predicted == y_test).sum().item()
accuracy = correct / y_test.size(0)
print(f'测试集准确率: {accuracy * 100:.2f}%')
二、权重的保存

作业:对信贷数据集进行训练后保存权重,加载权重后,继续训练50轮,并采取早停策略
python
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import time
import matplotlib.pyplot as plt
from tqdm import tqdm # 导入tqdm库用于进度条显示
import warnings
warnings.filterwarnings("ignore") # 忽略警告信息
# 设置GPU设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
import pandas as pd
# 加载信贷数据集
data = pd.read_csv(r"E:\PythonStudy\python60-days-challenge-master\data.csv")
import numpy as np #用于数值计算,提供了高效的数组操作。
# 先筛选字符串变量
discrete_features = data.select_dtypes(include=['object']).columns.tolist()
# Home Ownership 标签编码
home_ownership_mapping = {
'Own Home': 1,
'Rent': 2,
'Have Mortgage': 3,
'Home Mortgage': 4
}
data['Home Ownership'] = data['Home Ownership'].map(home_ownership_mapping)
# Years in current job 标签编码
years_in_job_mapping = {
'< 1 year': 1,
'1 year': 2,
'2 years': 3,
'3 years': 4,
'4 years': 5,
'5 years': 6,
'6 years': 7,
'7 years': 8,
'8 years': 9,
'9 years': 10,
'10+ years': 11
}
data['Years in current job'] = data['Years in current job'].map(years_in_job_mapping)
# Purpose 独热编码,记得需要将bool类型转换为数值
data = pd.get_dummies(data, columns=['Purpose'])
data2 = pd.read_csv(r"E:\PythonStudy\python60-days-challenge-master\data.csv") # 重新读取数据,用来做列名对比
list_final = [] # 新建一个空列表,用于存放独热编码后新增的特征名
for i in data.columns:
if i not in data2.columns:
list_final.append(i) # 这里打印出来的就是独热编码后的特征名
for i in list_final:
data[i] = data[i].astype(int) # 这里的i就是独热编码后的特征名
# Term 0 - 1 映射
term_mapping = {
'Short Term': 0,
'Long Term': 1
}
data['Term'] = data['Term'].map(term_mapping)
data.rename(columns={'Term': 'Long Term'}, inplace=True) # 重命名列
continuous_features = data.select_dtypes(include=['int64', 'float64']).columns.tolist() #把筛选出来的列名转换成列表
# 连续特征用中位数补全
for feature in continuous_features:
mode_value = data[feature].mode()[0] #获取该列的众数。
data[feature].fillna(mode_value, inplace=True) #用众数填充该列的缺失值,inplace=True表示直接在原数据上修改。
# 最开始也说了 很多调参函数自带交叉验证,甚至是必选的参数,你如果想要不交叉反而实现起来会麻烦很多
# 所以这里我们还是只划分一次数据集
from sklearn.model_selection import train_test_split
X = data.drop(['Credit Default'], axis=1) # 特征,axis=1表示按列删除
y = data['Credit Default'] # 标签
# 按照7:3划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#将测试集1:1划分验证集和测试集
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42) # 50%验证集,50%测试集
#归一化处理
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1).to(device)
y_val = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1).to(device)
y_test = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1).to(device)
#定义神经网络模型
# 定义模型(简单的3层全连接网络,相当于"小炒锅",足够炒信贷数据这个菜)
import torch.nn as nn
class CreditModel(nn.Module):
def __init__(self, input_dim):
super(CreditModel, self).__init__()
# 网络层:输入层→隐藏层1→隐藏层2→输出层(二分类只有1个输出)
self.fc1 = nn.Linear(input_dim, 128) # 第一层:输入维度→128个神经元
self.fc2 = nn.Linear(128, 64) # 第二层:128→64
self.fc3 = nn.Linear(64, 1) # 输出层:64→1(预测是否违约)
self.relu = nn.ReLU() # 激活函数(让模型学复杂规律)
self.sigmoid = nn.Sigmoid() # 输出0~1之间的概率
def forward(self, x):
# 前向传播(数据在网络里的流动)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.sigmoid(self.fc3(x))
return x
# 实例化模型(把锅架起来)
model = CreditModel(X_train.shape[1]).to(device)
# 定义损失函数和优化器
criterion = nn.BCELoss() # 二分类任务的损失函数(适合判断违约)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 优化器(让模型学的更快)
# 构建数据加载器
from torch.utils.data import TensorDataset, DataLoader
batch_size = 32
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 查看处理后的数据维度(确认没问题)
input_dim = X_train.shape[1]
print(f"模型输入维度:{input_dim}")

python
# ---------------------- 首次训练(20轮) ----------------------
def train_one_epoch(model, train_loader, criterion, optimizer):
# 训练一轮的函数
model.train() # 切换到训练模式
total_loss = 0.0
for batch_x, batch_y in train_loader:
optimizer.zero_grad() # 清空上一轮的梯度(避免残留)
outputs = model(batch_x)
loss = criterion(outputs, batch_y)
loss.backward() # 反向传播(计算梯度)
optimizer.step() # 更新权重(模型学东西)
total_loss += loss.item() * batch_x.size(0)
avg_loss = total_loss / len(train_loader.dataset)
return avg_loss
# 首次训练20轮
initial_epochs = 20
for epoch in range(initial_epochs):
train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
if (epoch+1) % 5 == 0: # 每5轮打印一次,看进度
print(f"首次训练第{epoch+1}轮,训练损失:{train_loss:.4f}")
# ---------------------- 保存权重(关键:存成文件,方便后续续训) ----------------------
import os
save_dir = os.path.expanduser("E:/PythonStudy/pystudy check-in/credit_train")
if not os.path.exists(save_dir):
os.makedirs(save_dir) # 文件夹不存在就创建
# 保存权重(用state_dict,只存参数,文件小且稳)
weight_path = os.path.join(save_dir, "credit_model_initial.pth")
torch.save(model.state_dict(), weight_path)
print(f"首次训练权重已保存到:{weight_path}")

python
# ---------------------- 定义早停策略 ----------------------
# 早停的参数(今天学的:耐心值、最小提升阈值、最佳验证损失)
patience = 10 # 连续10轮验证损失没进步就停
delta = 0.0001 # 至少下降0.0001才算进步
best_val_loss = float('inf') # 初始化为无穷大(因为损失越小越好)
counter = 0 # 计数轮数
stop_training = False # 是否停止训练的标志
# 验证函数(判断模型炒得好不好)
def validate(model, val_loader, criterion):
model.eval() # 切换到评估模式
total_loss = 0.0
with torch.no_grad(): # 关闭梯度计算,加快速度
for batch_x, batch_y in val_loader:
outputs = model(batch_x)
loss = criterion(outputs, batch_y)
total_loss += loss.item() * batch_x.size(0)
avg_loss = total_loss / len(val_loader.dataset)
return avg_loss
# ---------------------- 继续训练50轮(带早停) ----------------------
max_continue_epochs = 50 # 计划续训50轮
start_epoch = 0 # 续训的起始轮数
for epoch in range(start_epoch, max_continue_epochs):
if stop_training:
print("早停触发,提前结束训练!")
break # 触发早停就跳出循环
# 训练一轮
train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
# 验证一轮
val_loss = validate(model, val_loader, criterion)
# 打印进度
print(f"续训第{epoch+1}轮 | 训练损失:{train_loss:.4f} | 验证损失:{val_loss:.4f}")
# 早停判断(今天学的核心逻辑)
if val_loss < best_val_loss - delta:
# 验证损失有有效提升,更新最佳损失,重置计数器
best_val_loss = val_loss
counter = 0
# 保存最佳模型(可选,推荐)
best_weight_path = os.path.join(save_dir, "credit_model_best.pth")
torch.save(model.state_dict(), best_weight_path)
print(f"✨ 发现更好的模型,已保存到:{best_weight_path}")
else:
# 验证损失没进步,计数器+1
counter += 1
print(f"⚠️ 连续{counter}轮验证损失无有效提升,耐心值:{patience}")
if counter >= patience:
stop_training = True # 触发早停
# 打印最终结果
print(f"续训完成!实际训练轮数:{epoch+1 if not stop_training else counter}")

