作业
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
1. 加载数据
data = pd.read_csv('data.csv')
2. 数据预处理
划分训练集和验证集
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
特征标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
转换为PyTorch张量
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)
X_val_tensor = torch.FloatTensor(X_val)
y_val_tensor = torch.FloatTensor(y_val)
创建数据集和数据加载器
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
3. 定义模型
class CreditModel(nn.Module):
def init(self, input_size):
super(CreditModel, self).init()
self.fc1 = nn.Linear(input_size, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, 1)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.sigmoid(self.fc3(x))
return x
4. 初始化模型、损失函数和优化器
input_size = X_train.shape[1] # 特征维度
model = CreditModel(input_size)
criterion = nn.BCELoss() # 二分类任务,如果是回归任务使用MSELoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
5. 加载预训练权重(如果存在)
try:
model.load_state_dict(torch.load('credit_model_weights.pth'))
print("成功加载预训练权重")
except:
print("未找到预训练权重,从头开始训练")
6. 训练配置
patience = 10 # 早停耐心值
best_val_loss = float('inf')
counter = 0
additional_epochs = 50 # 继续训练的轮数
7. 训练循环
for epoch in range(additional_epochs):
训练阶段
model.train()
train_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels.unsqueeze(1))
loss.backward()
optimizer.step()
train_loss += loss.item() * inputs.size(0)
验证阶段
model.eval()
val_loss = 0.0
with torch.no_grad():
for inputs, labels in val_loader:
outputs = model(inputs)
loss = criterion(outputs, labels.unsqueeze(1))
val_loss += loss.item() * inputs.size(0)
计算平均损失
train_loss = train_loss / len(train_loader.dataset)
val_loss = val_loss / len(val_loader.dataset)
print(f'Epoch [{epoch+1}/{additional_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
早停策略
if val_loss < best_val_loss:
best_val_loss = val_loss
counter = 0
保存最佳模型权重
torch.save(model.state_dict(), 'best_credit_model_weights.pth')
else:
counter += 1
if counter >= patience:
print(f'验证损失连续{patience}轮未改善,提前停止训练!')
break
8. 保存最终模型权重
torch.save(model.state_dict(), 'credit_model_weights_final.pth')
print("训练完成,模型权重已保存")