python打卡day53

python 复制代码
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
python 复制代码
# 设置中文字体支持
plt.rcParams["font.family"] = ["SimHei"]
plt.rcParams['axes.unicode_minus'] = False

# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

使用设备: cuda

python 复制代码
# 参数配置
LATENT_DIM = 10
EPOCHS = 10000
BATCH_SIZE = 32
LR = 0.0002
BETA1 = 0.5
python 复制代码
# 1. 加载并预处理数据
data = pd.read_csv("e:/python打卡/python60-days-challenge/heart.csv")
X = data.drop('target', axis=1).values
y = data['target'].values

# 只选择有心脏病的样本(target=1)
X_patient = X[y == 1]
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X_patient)

# 转换为PyTorch Tensor并创建DataLoader
real_data_tensor = torch.from_numpy(X_scaled).float()
dataset = TensorDataset(real_data_tensor)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
python 复制代码
# 2. 构建模型
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(LATENT_DIM, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 13),  # 13个特征
            nn.Tanh()
        )

    def forward(self, x):
        return self.model(x)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(13, 32),
            nn.LeakyReLU(0.2),
            nn.Linear(32, 16),
            nn.LeakyReLU(0.2),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# 实例化模型
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# 损失函数和优化器
criterion = nn.BCELoss()
g_optimizer = optim.Adam(generator.parameters(), lr=LR, betas=(BETA1, 0.999))
d_optimizer = optim.Adam(discriminator.parameters(), lr=LR, betas=(BETA1, 0.999))
python 复制代码
# 3. 训练循环
print("\n--- 开始训练 ---")
for epoch in range(EPOCHS):
    for i, (real_data,) in enumerate(dataloader):
        real_data = real_data.to(device)
        current_batch_size = real_data.size(0)
        
        # 训练判别器
        d_optimizer.zero_grad()
        
        # 真实数据
        real_labels = torch.ones(current_batch_size, 1).to(device)
        real_output = discriminator(real_data)
        d_loss_real = criterion(real_output, real_labels)
        
        # 生成数据
        noise = torch.randn(current_batch_size, LATENT_DIM).to(device)
        fake_data = generator(noise).detach()
        fake_labels = torch.zeros(current_batch_size, 1).to(device)
        fake_output = discriminator(fake_data)
        d_loss_fake = criterion(fake_output, fake_labels)
        
        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        d_optimizer.step()
        
        # 训练生成器
        g_optimizer.zero_grad()
        noise = torch.randn(current_batch_size, LATENT_DIM).to(device)
        fake_data = generator(noise)
        fake_output = discriminator(fake_data)
        g_loss = criterion(fake_output, real_labels)
        g_loss.backward()
        g_optimizer.step()
    
    if (epoch + 1) % 1000 == 0:
        print(f"Epoch [{epoch+1}/{EPOCHS}], Discriminator Loss: {d_loss.item():.4f}, Generator Loss: {g_loss.item():.4f}")

--- 开始训练 ---

Epoch [1000/10000], Discriminator Loss: 1.9460, Generator Loss: 0.4685

Epoch [2000/10000], Discriminator Loss: 1.1531, Generator Loss: 0.8496

Epoch [3000/10000], Discriminator Loss: 1.2105, Generator Loss: 0.9245

Epoch [4000/10000], Discriminator Loss: 1.3388, Generator Loss: 0.8858

Epoch [5000/10000], Discriminator Loss: 0.8793, Generator Loss: 1.0353

Epoch [6000/10000], Discriminator Loss: 0.8470, Generator Loss: 0.6334

Epoch [7000/10000], Discriminator Loss: 1.0139, Generator Loss: 1.3785

Epoch [8000/10000], Discriminator Loss: 1.2486, Generator Loss: 1.8814

Epoch [9000/10000], Discriminator Loss: 1.0721, Generator Loss: 1.3251

Epoch [10000/10000], Discriminator Loss: 0.7876, Generator Loss: 1.5542

python 复制代码
# 4. 生成新数据并评估
# 生成样本
generator.eval()
with torch.no_grad():
    num_new_samples = len(X_patient)  # 生成与原始样本相同数量的数据
    noise = torch.randn(num_new_samples, LATENT_DIM).to(device)
    generated_data_scaled = generator(noise)

# 转换回原始尺度
generated_data = scaler.inverse_transform(generated_data_scaled.cpu().numpy())
python 复制代码
# 这里使用一个简单的分类器作为示例
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
original_f1 = f1_score(y_test, clf.predict(X_test))

# 使用GAN生成数据后的评估
# 将生成的数据标记为1(心脏病)
generated_y = np.ones(len(generated_data))
# 合并原始数据和生成数据
X_augmented = np.vstack([X_train, generated_data])
y_augmented = np.hstack([y_train, generated_y])
python 复制代码
# 重新训练
clf.fit(X_augmented, y_augmented)
augmented_f1 = f1_score(y_test, clf.predict(X_test))

print(f"\n原始F1分数: {original_f1:.4f}")
print(f"使用GAN数据增强后F1分数: {augmented_f1:.4f}")

原始F1分数: 0.8400

使用GAN数据增强后F1分数: 0.8163

python 复制代码
# 6. 可视化部分特征对比
plt.figure(figsize=(12, 6))
for i in range(4):  # 只可视化前4个特征
    plt.subplot(2, 2, i+1)
    plt.hist(X_patient[:, i], bins=20, alpha=0.5, label='真实数据')
    plt.hist(generated_data[:, i], bins=20, alpha=0.5, label='生成数据')
    plt.title(f'特征 {i+1} 分布对比')
    plt.legend()
plt.tight_layout()
plt.show()

@浙大疏锦行

相关推荐
Python×CATIA工业智造2 小时前
Frida RPC高级应用:动态模拟执行Android so文件实战指南
开发语言·python·pycharm
千宇宙航2 小时前
闲庭信步使用SV搭建图像测试平台:第三十一课——基于神经网络的手写数字识别
图像处理·人工智能·深度学习·神经网络·计算机视觉·fpga开发
IT古董2 小时前
【第二章:机器学习与神经网络概述】04.回归算法理论与实践 -(4)模型评价与调整(Model Evaluation & Tuning)
神经网络·机器学习·回归
onceco2 小时前
领域LLM九讲——第5讲 为什么选择OpenManus而不是QwenAgent(附LLM免费api邀请码)
人工智能·python·深度学习·语言模型·自然语言处理·自动化
天水幼麟3 小时前
动手学深度学习-学习笔记(总)
笔记·深度学习·学习
狐凄3 小时前
Python实例题:基于 Python 的简单聊天机器人
开发语言·python
悦悦子a啊4 小时前
Python之--基本知识
开发语言·前端·python
天水幼麟5 小时前
动手学深度学习-学习笔记【二】(基础知识)
笔记·深度学习·学习
笑稀了的野生俊6 小时前
在服务器中下载 HuggingFace 模型:终极指南
linux·服务器·python·bash·gpu算力
Naiva6 小时前
【小技巧】Python+PyCharm IDE 配置解释器出错,环境配置不完整或不兼容。(小智AI、MCP、聚合数据、实时新闻查询、NBA赛事查询)
ide·python·pycharm