用于糖尿病视网膜病变图像生成的GAN

本项目利用 TensorFlow 构建了一个生成对抗网络 (GAN)，用于生成逼真的糖尿病视网膜病变 (DR) 图像。该 GAN 由基于 ResNet 的生成器和鉴别器组成。

资料来源

1. 数据集

结构：

/diabetic-retinopathy/

0/ -> 健康

1/ -> 轻度糖尿病视网膜病变

2/ -> 中度糖尿病视网膜病变

3/ -> 重度糖尿病视网膜病变

4/ -> 增生性糖尿病视网膜病变

4/ -> proliferative DR

分割：60% 用于训练，20% 用于验证，20% 用于测试

变量：train_paths、train_labels、val_paths、val_labels、test_paths、test_labels

图像尺寸调整为 128×128×3，并归一化为 [-1, 1]

预处理

python 复制代码

def load_image(image_path):
img = tf.io.read_file(image_path)
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.resize(img, IMG_SIZE)
img = (tf.cast(img, tf.float32) - 127.5) / 127.5
return img

train_dataset = tf.data.Dataset.from_tensor_slices(train_paths)
train_dataset = train_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

3. GAN 架构

生成器（ResNet 风格）

输入随机噪声向量 (NOISE_DIM = 100)

通过 Conv2DTranspose + BatchNorm + LeakyReLU 进行上采样

使用 tanh 激活函数输出 128×128×3 的图像

生成器 = make_generator_model()
鉴别器（ResNet 风格）

输入 128×128×3 的图像

通过残差块 + AveragePooling2D 进行下采样

使用 S 型函数输出单个值（真实/虚假概率）

鉴别器 = build_discriminator(img_shape=(128,128,3))

4. 损失与优化器

python 复制代码

cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
generator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)

生成器损失：它欺骗鉴别器的程度

鉴别器损失：它区分真假的能力

5. 训练步骤

python 复制代码

@tf.function
def train_step(images):
    noise = tf.random.normal([BATCH_SIZE, NOISE_DIM])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator(noise, training=True)
        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)
        real_loss = cross_entropy(tf.ones_like(real_output), real_output)
        fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
        disc_loss = (real_loss + fake_loss) / 2

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

    return gen_loss, disc_loss

6. 训练循环

python 复制代码

seed = tf.random.normal([NUM_EXAMPLES, NOISE_DIM])

for epoch in range(1, EPOCHS + 1):
    for image_batch in train_dataset:
        gen_loss, disc_loss = train_step(image_batch)
    print(f"Epoch {epoch}, Generator Loss: {gen_loss:.4f}, Discriminator Loss: {disc_loss:.4f}")
    generate_and_save_images(generator, epoch, seed)

7. 可视化

生成的图像：每个 epoch 后

鉴别器置信度：绿色 = 真实，红色 = 虚假

python 复制代码

preds = discriminator(fake_images, training=False).numpy()
pred_value = preds[i][0]
color = 'green' if pred_value > 0.5 else 'red'

8. 测试/评估

GAN 不像分类器那样具有"准确性"。

检查生成图像的视觉质量。

生成图像的鉴别器置信度 (0-1)。

可选计算 FID（Fréchet 初始距离）进行定量评估。

注意：您可以根据 GPU 性能调整 IMG_SIZE、BATCH_SIZE 和 NOISE_DIM。

所需库

python 复制代码

import os ,glob
import random
import warnings


import numpy as np # linear algebra
import pandas as pd # data processing
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import Image

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from sklearn.utils import shuffle
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image


print("Library Called ")

拆分数据并创建类

python 复制代码

# Path of Data File
path='/input/diabetic-retinopathy'
if not os.path.exists(path):
    print("Dataset folder not found at:", path)
    print("Download and extract the dataset to that path.")
    raise SystemExit(1)

#discover classes and image paths
class_dirs = sorted([d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))])
if not class_dirs:
    print("No class subfolders found. Expect: data/diabetic_retinopathy/<class>/*.jpg")
    raise SystemExit(1)

print("Found classes:", class_dirs)

# List after split (images , Labels)
image_paths = [] 
labels = []
for idx, cls in enumerate(class_dirs):
    files = glob.glob(os.path.join(path, cls, "*"))
    files = [f for f in files if os.path.splitext(f)[1].lower() in (".jpg", ".jpeg", ".png")]
    for f in files:
        image_paths.append(f)
        labels.append(idx)

print("Total images:", len(image_paths))
if len(image_paths) == 0:
    print("No images found - check paths.")
    raise SystemExit(1)

#Combination and Random distribution
combined=list(zip(image_paths,labels))
random.seed(42)
random.shuffle(combined)
image_paths,labels = zip(*combined)
image_paths=list(image_paths)
labels=list(labels)
n =len(image_paths)

# ٍSplitting Fractions
train_frac = 0.60
val_frac = 0.20 
n_train = int(n* train_frac)
n_val = int(n*val_frac)
n_test = n - n_train - n_val

train_paths = image_paths[:n_train]
train_labels =labels[:n_train]
val_paths = image_paths[n_train:n_train+n_val]
val_labels = labels[n_train:n_train+n_val]
test_paths = image_paths[n_train+n_val:]
test_labels = labels[n_train+n_val:]
print("Split sizes -> train:", len(train_paths), "val:", len(val_paths), "test:", len(test_paths))

dist = pd.DataFrame({
    "train": pd.Series(train_labels).value_counts().sort_index(),
    "val": pd.Series(val_labels).value_counts().sort_index(),
    "test": pd.Series(test_labels).value_counts().sort_index()
}).fillna(0).astype(int)
print("\nClass distribution (by index):\n", dist)

python 复制代码

Found classes: ['Healthy', 'Mild DR', 'Moderate DR', 'Proliferate DR', 'Severe DR']
Total images: 2750
Split sizes -> train: 1650 val: 550 test: 550

Class distribution (by index):
    train  val  test
0    594  200   206
1    212   77    81
2    547  183   170
3    181   55    54
4    116   35    39

GAN模型参数

python 复制代码

# Parameters
BUFFER_SIZE = 60000
BATCH_SIZE = 128
NOISE_DIM = 100
# Hyperparameters
LATENT_DIM = 100
EPOCHS = 80
NUM_EXAMPLES = 16
IMG_SIZE = (128, 128)

将它们转换为 TensorFlow 数据集

python 复制代码

def load_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    img = (tf.cast(img, tf.float32) - 127.5) / 127.5  # Normalize [-1,1]
    return img

建立管道

python 复制代码

# Build tf.data pipeline correctly
train_dataset = tf.data.Dataset.from_tensor_slices(train_paths)
train_dataset = train_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices(val_paths)
val_dataset = val_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices(test_paths)
test_dataset = test_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE)

python 复制代码

for images in train_dataset.take(1):  # take one batch
    plt.figure(figsize=(10, 10))
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow((images[i] * 127.5 + 127.5).numpy().astype("uint8"))
        plt.axis("off")

构建生成器

我们有两种选择来构建生成器（基于 ResNet 或从头开始制作生成器）

概念	DCGAN 生成器（您的代码）	基于 ResNet 的生成器（我的）
架构类型	顺序堆叠的转置卷积层	带有跳跃连接的残差块
数据流	简单的向上卷积堆栈	每个块学习残差并加回到输入
跳跃连接	❌ 无跳跃连接（普通结构）	✅ 使用 `x + F(x)` 来保持梯度流
梯度流	训练期间可能梯度消失或爆炸（对复杂数据稳定性较差）	稳定得多（梯度可以通过跳跃连接"捷径"传播）
图像质量	在简单数据集上效果良好	对于高分辨率或复杂图像效果更好
训练稳定性	中等；可能需要调整学习率	更高；通常收敛更平滑
模型灵活性	更易于实现；参数较少	更强大但稍显复杂
适用场景	用于小型、低分辨率数据集训练时	用于医学、真实感或高分辨率数据集训练时

首先构建块

python 复制代码

def residual_block(x, filters):
    shortcut = x
    x = layers.Conv2D(filters, kernel_size=3, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters, kernel_size=3, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.add([shortcut, x])
    x = layers.ReLU()(x)
    return x

构建生成器（接受噪声参数）

python 复制代码

def build_generator(noise_dim):
    inputs = layers.Input(shape=(noise_dim,))
    x = layers.Dense(8 * 8 * 256, use_bias=False)(inputs)
    x = layers.Reshape((8, 8, 256))(x)
    x = layers.ReLU()(x)
    
    # Residual blocks
    for _ in range(3):
        x = residual_block(x, 256)
    
    # Upsampling layers
    x = layers.Conv2DTranspose(128, 4, strides=2, padding='same')(x)
    x = layers.ReLU()(x)
    x = layers.Conv2DTranspose(64, 4, strides=2, padding='same')(x)
    x = layers.ReLU()(x)
    x = layers.Conv2DTranspose(32, 4, strides=2, padding='same')(x)
    x = layers.ReLU()(x)
    
    # Output
    outputs = layers.Conv2D(3, (7,7), padding='same', activation='tanh')(x)
    
    return Model(inputs, outputs, name="generator")

python 复制代码

generator = build_generator(NOISE_DIM)
generator.summary()

🧱构建判别器

python 复制代码

def residual_block(x, filters, downsample=False):
    shortcut = x

    # First convolution
    x = layers.Conv2D(filters, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.2)(x)

    # Second convolution
    x = layers.Conv2D(filters, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)

    # Optionally downsample
    if downsample:
        x = layers.AveragePooling2D(pool_size=(2, 2))(x)
        shortcut = layers.Conv2D(filters, (1, 1), padding='same')(shortcut)
        shortcut = layers.AveragePooling2D(pool_size=(2, 2))(shortcut)

    # Add skip connection
    x = layers.add([x, shortcut])
    x = layers.LeakyReLU(alpha=0.2)(x)
    return x

def build_discriminator(img_shape=(128, 128, 3)):
    inp = layers.Input(shape=img_shape)

    x = layers.Conv2D(64, (7,7), strides=2, padding='same')(inp)
    x = layers.LeakyReLU(alpha=0.2)(x)

    # Residual blocks with downsampling
    for filters in [64, 128, 256]:
        x = residual_block(x, filters, downsample=True)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inp, x, name="Discriminator")
    return model

discriminator = build_discriminator()
discriminator.summary()

定义损失函数和优化器

python 复制代码

cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

generator_optimizer = tf.keras.optimizers.Adam(1e-4) # => for generator
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4) # => for Discriminator

显示训练数据的随机样本

python 复制代码

train_dataset = tf.data.Dataset.from_tensor_slices(train_paths)
train_dataset = train_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
# Take one batch from dataset
for image_batch in train_dataset.take(1):
    sample_images = image_batch[:16]  # first 16 images
    break

# Denormalize from [-1,1] → [0,1] for display
sample_images = (sample_images + 1) / 2.0  

# Plot them
plt.figure(figsize=(8, 8))
for i in range(16):
    plt.subplot(4, 4, i + 1)
    plt.imshow(sample_images[i])
    plt.axis("off")
plt.suptitle("Random Samples from Training Data", fontsize=14)
plt.show()

生成器可视化

解决问题

python 复制代码

noise = tf.random.normal([1, 100])
generated_image = generator(noise, training=False)
print(generated_image.shape)

(1, 64, 64, 3)

python 复制代码

layers.Conv2DTranspose(3, (5,5), strides=(2,2), padding='same', activation='tanh')

这个版本输出 (128, 128, 3) --- 完美适合你的判别器

python 复制代码

def make_generator_model():
    model = tf.keras.Sequential([
        layers.Dense(8*8*512, use_bias=False, input_shape=(100,)),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        layers.Reshape((8, 8, 512)),

        layers.Conv2DTranspose(256, (5,5), strides=(2,2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),  # 16x16

        layers.Conv2DTranspose(128, (5,5), strides=(2,2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),  # 32x32

        layers.Conv2DTranspose(64, (5,5), strides=(2,2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),  # 64x64

        # 👇 ADD ONE MORE UPSAMPLING TO REACH 128x128
        layers.Conv2DTranspose(32, (5,5), strides=(2,2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),  # 128x128

        layers.Conv2DTranspose(3, (5,5), strides=(1,1), padding='same', use_bias=False, activation='tanh')
    ])
    return model

python 复制代码

# check again the output 
generator = make_generator_model()
noise = tf.random.normal([1, 100])
print(generator(noise).shape)

构建 Gans 模型

python 复制代码

# Loss and optimizers
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

generator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)

🧠 定义训练步骤

python 复制代码

@tf.function
def train_step(images):
    noise = tf.random.normal([BATCH_SIZE, NOISE_DIM])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator(noise, training=True)

        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        # Losses
        gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)
        real_loss = cross_entropy(tf.ones_like(real_output), real_output)
        fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
        disc_loss = (real_loss + fake_loss) / 2

    # Compute gradients
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    # Apply updates
    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

    return gen_loss, disc_loss

定义函数以可视化结果

python 复制代码

def generate_and_save_images(model, epoch, test_input):
    predictions = model(test_input, training=False)
    predictions = (predictions + 1) / 2.0  # back to [0,1]

    fig = plt.figure(figsize=(4,4))
    for i in range(predictions.shape[0]):
        plt.subplot(4, 4, i+1)
        plt.imshow(predictions[i])
        plt.axis('off')
    plt.suptitle(f"Epoch {epoch}")
    plt.show()

🧪 训练循环

python 复制代码

seed = tf.random.normal([NUM_EXAMPLES, NOISE_DIM])

from tqdm import tqdm # => to display epochs when training

for epoch in range(1, EPOCHS + 1):
    for image_batch in tqdm(train_dataset, desc=f"Epoch {epoch}/{EPOCHS}"):
        gen_loss, disc_loss = train_step(image_batch)

    print(f"Epoch {epoch}, Generator Loss: {gen_loss:.4f}, Discriminator Loss: {disc_loss:.4f}")
    generate_and_save_images(generator, epoch, seed)

🎯 为什么GAN中没有"准确率"

在GAN中，你有两个模型：

- 生成器（G）：尝试创建看起来真实的假图像。
- 判别器（D）：尝试区分真实与虚假。

它们是对抗训练的：

- 当生成器成功欺骗判别器时，它会得到提升。
- 当判别器正确识别假图像时，它会得到提升。

因此，你追踪的是损失，而不是准确率：

- gen_loss → 生成器欺骗判别器的能力
- disc_loss → 判别器区分真伪的能力如果两者随时间稳定（而不是发散），说明训练在正常进行。

python 复制代码

# Generate noise
noise = tf.random.normal([16, NOISE_DIM])

# Generate fake images
generated_images = generator(noise, training=False)

# Plot them
plt.figure(figsize=(4, 4))
for i in range(generated_images.shape[0]):
    plt.subplot(4, 4, i + 1)
    plt.imshow((generated_images[i] * 127.5 + 127.5).numpy().astype("uint8"))
    plt.axis("off")
plt.show()

python 复制代码

# Generate fake images
noise = tf.random.normal([100, NOISE_DIM])
fake_images = generator(noise, training=False)

# Discriminator output for generated images
predictions = discriminator(fake_images, training=False)

# Check how "real" the discriminator thinks they are
mean_pred = tf.reduce_mean(predictions).numpy()
print(f"Average discriminator confidence (real=1, fake=0): {mean_pred:.3f}")

保存模型

python 复制代码

generator.save('generator_model.h5')
discriminator.save('discriminator_model.h5')

调用 CNN 模型

python 复制代码

from tensorflow.keras.models import load_model

classifier = load_model('/input/cnn/keras/default/1/improved_dr_cnn.h5')

下一步分析与解读 🔍

python 复制代码

N_SAMPLES = 500  # increase if you want
noise = tf.random.normal([N_SAMPLES, NOISE_DIM])
generated_images = generator(noise, training=False)

# recove the image to the real size 
generated_images = (generated_images + 1) / 2.0

python 复制代码

preds = classifier.predict(generated_images)
pred_labels = np.argmax(preds, axis=1)

python 复制代码

class_names = ['Healthy', 'Mild DR', 'Moderate DR','Proliferate DR', 'Severe DR']

plt.figure(figsize=(12, 12))
for i in range(16):
    plt.subplot(4, 4, i + 1)
    plt.imshow(generated_images[i])
    plt.title(class_names[pred_labels[i]])
    plt.axis("off")

plt.suptitle("Predicted Classes for GAN Generated Images", fontsize=16)
plt.show()

上传 GAN 模型，训练轮次 = 100

python 复制代码

generator = load_model("/input/gans-model/keras/epoch-100/1/generator_model.h5", compile=False)
discriminator = load_model("/input/gans-model/keras/epoch-100/1/discriminator_model.h5", compile=False)

python 复制代码

NOISE_DIM = 100
NUM_IMAGES = 500

noise = tf.random.normal([NUM_IMAGES, NOISE_DIM])
generated_images = generator(noise, training=False)
generated_images = (generated_images + 1) / 2.0  # normalize to [0, 1]

print(f"✅ Generated {NUM_IMAGES} fake images from GAN")

python 复制代码

# Resize if classifier expects a different size (224x224)
IMG_SIZE = classifier.input_shape[1]
generated_images_resized = tf.image.resize(generated_images, (IMG_SIZE, IMG_SIZE))

# Predict classes
predictions = classifier.predict(generated_images_resized)
pred_classes = np.argmax(predictions, axis=1)

python 复制代码

# Class labels (edit if your CNN uses other labels)
class_names = ["Healthy", "Mild DR", "Moderate DR", "Proliferate DR","Severe DR"]

python 复制代码

 #Show sample images per class
n = 5  # show 5 samples per class
plt.figure(figsize=(15, 8))
for i, class_id in enumerate(unique):
    idxs = np.where(pred_classes == class_id)[0][:n]
    for j, idx in enumerate(idxs):
        plt.subplot(len(unique), n, i * n + j + 1)
        plt.imshow(generated_images[idx])
        plt.axis("off")
        if j == 0:
            plt.ylabel(class_names[class_id], fontsize=12)
plt.suptitle("Example Generated Images by Predicted Class", fontsize=16)
plt.show()

python 复制代码

IMG_SIZE = classifier.input_shape[1]  

def preprocess_path(p):
    img = tf.io.read_file(p)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0   
    return img

# Build val dataset if you have val_paths & val_labels
val_imgs = np.stack([preprocess_path(p).numpy() for p in val_paths])
preds_val = classifier.predict(val_imgs, batch_size=32)
pred_labels_val = np.argmax(preds_val, axis=1)

# report per-class accuracy
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(val_labels, pred_labels_val, target_names=class_names))
print("Confusion matrix:\n", confusion_matrix(val_labels, pred_labels_val))

用于糖尿病视网膜病变图像生成的GAN