python打卡day53

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
python 复制代码
# 设置中文字体支持
plt.rcParams["font.family"] = ["SimHei"]
plt.rcParams['axes.unicode_minus'] = False

# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

使用设备: cuda

python 复制代码
# 参数配置
LATENT_DIM = 10
EPOCHS = 10000
BATCH_SIZE = 32
LR = 0.0002
BETA1 = 0.5
python 复制代码
# 1. 加载并预处理数据
data = pd.read_csv("e:/python打卡/python60-days-challenge/heart.csv")
X = data.drop('target', axis=1).values
y = data['target'].values

# 只选择有心脏病的样本(target=1)
X_patient = X[y == 1]
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X_patient)

# 转换为PyTorch Tensor并创建DataLoader
real_data_tensor = torch.from_numpy(X_scaled).float()
dataset = TensorDataset(real_data_tensor)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
python 复制代码
# 2. 构建模型
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(LATENT_DIM, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 13),  # 13个特征
            nn.Tanh()
        )

    def forward(self, x):
        return self.model(x)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(13, 32),
            nn.LeakyReLU(0.2),
            nn.Linear(32, 16),
            nn.LeakyReLU(0.2),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# 实例化模型
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# 损失函数和优化器
criterion = nn.BCELoss()
g_optimizer = optim.Adam(generator.parameters(), lr=LR, betas=(BETA1, 0.999))
d_optimizer = optim.Adam(discriminator.parameters(), lr=LR, betas=(BETA1, 0.999))
python 复制代码
# 3. 训练循环
print("\n--- 开始训练 ---")
for epoch in range(EPOCHS):
    for i, (real_data,) in enumerate(dataloader):
        real_data = real_data.to(device)
        current_batch_size = real_data.size(0)
        
        # 训练判别器
        d_optimizer.zero_grad()
        
        # 真实数据
        real_labels = torch.ones(current_batch_size, 1).to(device)
        real_output = discriminator(real_data)
        d_loss_real = criterion(real_output, real_labels)
        
        # 生成数据
        noise = torch.randn(current_batch_size, LATENT_DIM).to(device)
        fake_data = generator(noise).detach()
        fake_labels = torch.zeros(current_batch_size, 1).to(device)
        fake_output = discriminator(fake_data)
        d_loss_fake = criterion(fake_output, fake_labels)
        
        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        d_optimizer.step()
        
        # 训练生成器
        g_optimizer.zero_grad()
        noise = torch.randn(current_batch_size, LATENT_DIM).to(device)
        fake_data = generator(noise)
        fake_output = discriminator(fake_data)
        g_loss = criterion(fake_output, real_labels)
        g_loss.backward()
        g_optimizer.step()
    
    if (epoch + 1) % 1000 == 0:
        print(f"Epoch [{epoch+1}/{EPOCHS}], Discriminator Loss: {d_loss.item():.4f}, Generator Loss: {g_loss.item():.4f}")

--- 开始训练 ---

Epoch 1000/10000, Discriminator Loss: 1.9460, Generator Loss: 0.4685

Epoch 2000/10000, Discriminator Loss: 1.1531, Generator Loss: 0.8496

Epoch 3000/10000, Discriminator Loss: 1.2105, Generator Loss: 0.9245

Epoch 4000/10000, Discriminator Loss: 1.3388, Generator Loss: 0.8858

Epoch 5000/10000, Discriminator Loss: 0.8793, Generator Loss: 1.0353

Epoch 6000/10000, Discriminator Loss: 0.8470, Generator Loss: 0.6334

Epoch 7000/10000, Discriminator Loss: 1.0139, Generator Loss: 1.3785

Epoch 8000/10000, Discriminator Loss: 1.2486, Generator Loss: 1.8814

Epoch 9000/10000, Discriminator Loss: 1.0721, Generator Loss: 1.3251

Epoch 10000/10000, Discriminator Loss: 0.7876, Generator Loss: 1.5542

python 复制代码
# 4. 生成新数据并评估
# 生成样本
generator.eval()
with torch.no_grad():
    num_new_samples = len(X_patient)  # 生成与原始样本相同数量的数据
    noise = torch.randn(num_new_samples, LATENT_DIM).to(device)
    generated_data_scaled = generator(noise)

# 转换回原始尺度
generated_data = scaler.inverse_transform(generated_data_scaled.cpu().numpy())
python 复制代码
# 这里使用一个简单的分类器作为示例
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
original_f1 = f1_score(y_test, clf.predict(X_test))

# 使用GAN生成数据后的评估
# 将生成的数据标记为1(心脏病)
generated_y = np.ones(len(generated_data))
# 合并原始数据和生成数据
X_augmented = np.vstack([X_train, generated_data])
y_augmented = np.hstack([y_train, generated_y])
python 复制代码
# 重新训练
clf.fit(X_augmented, y_augmented)
augmented_f1 = f1_score(y_test, clf.predict(X_test))

print(f"\n原始F1分数: {original_f1:.4f}")
print(f"使用GAN数据增强后F1分数: {augmented_f1:.4f}")

原始F1分数: 0.8400

使用GAN数据增强后F1分数: 0.8163

python 复制代码
# 6. 可视化部分特征对比
plt.figure(figsize=(12, 6))
for i in range(4):  # 只可视化前4个特征
    plt.subplot(2, 2, i+1)
    plt.hist(X_patient[:, i], bins=20, alpha=0.5, label='真实数据')
    plt.hist(generated_data[:, i], bins=20, alpha=0.5, label='生成数据')
    plt.title(f'特征 {i+1} 分布对比')
    plt.legend()
plt.tight_layout()
plt.show()

@浙大疏锦行

相关推荐
程序员龙叔5 小时前
编写高质量 Skill 系列 -- 如何设计需求分析与用例生成的 SKILL
自动化测试·软件测试·python·软件测试工程师·接口测试·性能测试·skill·ai测试
用户8356290780517 小时前
使用 Python 操作 Word 内容控件
后端·python
通信小呆呆8 小时前
当算法有了“五感”:多模态数据融合如何向人体感官协同学习?
人工智能·学习·算法·机器学习·机器人
xiao5kou4chang6kai48 小时前
MATLAB机器学习、深度学习--从数据预处理到模型训练
深度学习·机器学习·matlab·数据预处理
renhongxia19 小时前
世界模型作为AGI落地底层底座的作用
人工智能·深度学习·生成对抗网络·自然语言处理·知识图谱·agi
计算机科研狗@OUC9 小时前
(cvpr26) AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation
人工智能·深度学习·计算机视觉
code_pgf9 小时前
端到端自动驾驶 BEV stack
人工智能·机器学习·自动驾驶
码云骑士9 小时前
32-慢查询排查全流程(下)-索引优化实战与最左前缀原则
python
闵孚龙10 小时前
《PyTorch 深度修炼》Dataset 和 DataLoader:数据如何喂给模型
人工智能·pytorch·python