文章目录
- 项目目标
- 数据准备:从原始数据到可训练格式
- 神经网络实现:从零开始
- 训练器实现
- 训练模型
- 模型评估
- 超参数调优
- 保存和加载模型
- 实用功能扩展
-
- 学习率调度
- [早停(Early Stopping)](#早停(Early Stopping))
- 项目总结
- 完整代码示例
- 总结
在前面的文章中,我们学习了神经网络的基本原理、激活函数、损失函数、优化器,以及前向传播和反向传播的数学推导。现在,让我们把所有这些知识整合起来,从零开始手写实现一个完整的神经网络,并把它应用到一个实际的分类任务中。
项目目标
我们将实现一个神经网络来识别手写数字(MNIST数据集的简化版本)。这个项目将帮助我们:
- 理解神经网络训练的完整流程
- 掌握从数据预处理到模型部署的全过程
- 学会调试和优化神经网络
- 为后续学习更复杂的深度学习框架打下基础
否
是
项目目标
数据准备
网络设计
前向传播
损失计算
反向传播
参数更新
模型评估
是否收敛?
完成
数据准备:从原始数据到可训练格式
在开始实现神经网络之前,我们需要准备数据。为了简化演示,我们将使用sklearn提供的数字数据集。
加载数据
python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
# 加载数字数据集
digits = load_digits()
print(f"数据集信息:")
print(f" 样本数量: {digits.data.shape[0]}")
print(f" 特征数量: {digits.data.shape[1]}") # 8x8 = 64个像素
print(f" 类别数量: {len(digits.target_names)}")
# 可视化一些样本
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
for i, ax in enumerate(axes.flat):
ax.imshow(digits.images[i], cmap='gray')
ax.set_title(f'数字: {digits.target[i]}')
ax.axis('off')
plt.tight_layout()
plt.show()
数据预处理
python
def prepare_data(test_size=0.2, random_state=42):
"""
准备训练和测试数据
"""
# 加载数据
digits = load_digits()
X = digits.data
y = digits.target
# 归一化像素值到[0, 1]范围
X = X / 16.0 # 像素值范围是0-16
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=test_size, random_state=random_state, stratify=y
)
# 将标签转换为one-hot编码
encoder = OneHotEncoder(sparse_output=False)
y_train_onehot = encoder.fit_transform(y_train.reshape(-1, 1)).T
y_test_onehot = encoder.transform(y_test.reshape(-1, 1)).T
# 转置数据:shape (n_features, n_samples)
X_train = X_train.T
X_test = X_test.T
print(f"训练集: X_train={X_train.shape}, y_train={y_train_onehot.shape}")
print(f"测试集: X_test={X_test.shape}, y_test={y_test_onehot.shape}")
return X_train, X_test, y_train_onehot, y_test_onehot, y_train, y_test
# 准备数据
X_train, X_test, y_train, y_test, y_train_labels, y_test_labels = prepare_data()
神经网络实现:从零开始
定义神经网络类
python
class NeuralNetwork:
"""手动实现的神经网络"""
def __init__(self, layer_sizes, activation='relu'):
"""
初始化神经网络
layer_sizes: 列表,例如 [64, 128, 64, 10] 表示
- 输入层64个神经元
- 隐藏层128个神经元
- 隐藏层64个神经元
- 输出层10个神经元
activation: 激活函数类型 ('relu', 'sigmoid', 'tanh')
"""
self.layer_sizes = layer_sizes
self.num_layers = len(layer_sizes)
self.activation = activation
# 初始化权重和偏置
self.weights = []
self.biases = []
for i in range(1, self.num_layers):
n_inputs = layer_sizes[i-1]
n_outputs = layer_sizes[i]
# He初始化(适用于ReLU)
if activation == 'relu':
scale = np.sqrt(2.0 / n_inputs)
else:
scale = np.sqrt(1.0 / n_inputs) # Xavier初始化
W = np.random.randn(n_outputs, n_inputs) * scale
b = np.zeros((n_outputs, 1))
self.weights.append(W)
self.biases.append(b)
def _activation(self, z):
"""激活函数"""
if self.activation == 'relu':
return np.maximum(0, z)
elif self.activation == 'sigmoid':
return 1 / (1 + np.exp(-z))
elif self.activation == 'tanh':
return np.tanh(z)
else:
raise ValueError(f"未知的激活函数: {self.activation}")
def _activation_derivative(self, z):
"""激活函数的导数"""
if self.activation == 'relu':
return (z > 0).astype(float)
elif self.activation == 'sigmoid':
s = 1 / (1 + np.exp(-z))
return s * (1 - s)
elif self.activation == 'tanh':
return 1 - np.tanh(z) ** 2
else:
raise ValueError(f"未知的激活函数: {self.activation}")
def _softmax(self, z):
"""Softmax激活函数(用于输出层)"""
exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))
return exp_z / np.sum(exp_z, axis=0, keepdims=True)
def forward(self, X):
"""
前向传播
X: 输入,shape (n_features, n_samples)
返回: 输出和缓存
"""
a = X
cache = {'a0': a}
# 隐藏层
for i in range(self.num_layers - 2):
z = np.dot(self.weights[i], a) + self.biases[i]
a = self._activation(z)
cache[f'z{i+1}'] = z
cache[f'a{i+1}'] = a
# 输出层
z = np.dot(self.weights[-1], a) + self.biases[-1]
a = self._softmax(z)
cache[f'z{self.num_layers-1}'] = z
cache[f'a{self.num_layers-1}'] = a
return a, cache
def backward(self, X, y, cache):
"""
反向传播
X: 输入
y: 真实标签(one-hot编码)
cache: 前向传播的缓存
返回: 梯度
"""
m = X.shape[1] # 样本数量
gradients = {}
# 输出层梯度
a_output = cache[f'a{self.num_layers-1}']
dz = a_output - y # Softmax + 交叉熵的梯度
gradients[f'dW{self.num_layers-1}'] = (1/m) * np.dot(dz, cache[f'a{self.num_layers-2}'].T)
gradients[f'db{self.num_layers-1}'] = (1/m) * np.sum(dz, axis=1, keepdims=True)
# 反向传播隐藏层
for l in range(self.num_layers - 2, 0, -1):
da = np.dot(self.weights[l].T, dz)
z = cache[f'z{l}']
dz = da * self._activation_derivative(z)
gradients[f'dW{l}'] = (1/m) * np.dot(dz, cache[f'a{l-1}'].T)
gradients[f'db{l}'] = (1/m) * np.sum(dz, axis=1, keepdims=True)
return gradients
def compute_loss(self, y_true, y_pred):
"""
计算交叉熵损失
y_true: 真实标签(one-hot编码)
y_pred: 预测概率
"""
epsilon = 1e-15
y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
return -np.mean(np.sum(y_true * np.log(y_pred), axis=0))
def predict(self, X):
"""
预测类别
"""
output, _ = self.forward(X)
return np.argmax(output, axis=0)
def predict_proba(self, X):
"""
预测概率
"""
output, _ = self.forward(X)
return output
测试网络初始化
python
# 创建网络
layer_sizes = [64, 128, 64, 10] # 输入64,隐藏128,隐藏64,输出10
nn = NeuralNetwork(layer_sizes, activation='relu')
print("网络结构:")
for i, (W, b) in enumerate(zip(nn.weights, nn.biases)):
print(f" 层{i+1}: W{W.shape}, b{b.shape}")
# 测试前向传播
output, cache = nn.forward(X_train[:, :5]) # 使用前5个样本测试
print(f"\n前向传播测试:")
print(f" 输入形状: {X_train[:, :5].shape}")
print(f" 输出形状: {output.shape}")
print(f" 输出概率和(应该是1.0): {np.sum(output, axis=0)}")
print(f" 预测类别: {nn.predict(X_train[:, :5])}")
print(f" 真实类别: {y_train_labels[:5]}")
训练器实现
为了方便训练,我们实现一个训练器类。
python
class Trainer:
"""神经网络训练器"""
def __init__(self, model, learning_rate=0.01, batch_size=32):
"""
初始化训练器
model: 神经网络模型
learning_rate: 学习率
batch_size: 批大小
"""
self.model = model
self.learning_rate = learning_rate
self.batch_size = batch_size
self.history = {
'train_loss': [],
'val_loss': [],
'train_acc': [],
'val_acc': []
}
def _update_parameters(self, gradients):
"""更新参数"""
for i in range(len(self.model.weights)):
self.model.weights[i] -= self.learning_rate * gradients[f'dW{i+1}']
self.model.biases[i] -= self.learning_rate * gradients[f'db{i+1}']
def train_epoch(self, X_train, y_train):
"""训练一个epoch"""
n_samples = X_train.shape[1]
n_batches = n_samples // self.batch_size
epoch_loss = 0
correct = 0
# Shuffle数据
indices = np.random.permutation(n_samples)
X_train = X_train[:, indices]
y_train = y_train[:, indices]
for batch in range(n_batches):
start = batch * self.batch_size
end = start + self.batch_size
X_batch = X_train[:, start:end]
y_batch = y_train[:, start:end]
# 前向传播
output, cache = self.model.forward(X_batch)
# 计算损失
batch_loss = self.model.compute_loss(y_batch, output)
epoch_loss += batch_loss
# 计算准确率
pred = np.argmax(output, axis=0)
true = np.argmax(y_batch, axis=0)
correct += np.sum(pred == true)
# 反向传播
gradients = self.model.backward(X_batch, y_batch, cache)
# 更新参数
self._update_parameters(gradients)
# 平均损失和准确率
avg_loss = epoch_loss / n_batches
accuracy = correct / n_samples
return avg_loss, accuracy
def evaluate(self, X, y):
"""评估模型"""
output, _ = self.model.forward(X)
loss = self.model.compute_loss(y, output)
pred = np.argmax(output, axis=0)
true = np.argmax(y, axis=0)
accuracy = np.mean(pred == true)
return loss, accuracy
def train(self, X_train, y_train, X_val, y_val, n_epochs,
verbose=True, plot_results=True):
"""
训练模型
n_epochs: 训练轮数
verbose: 是否打印训练信息
plot_results: 是否绘制训练曲线
"""
print(f"开始训练...")
print(f" 训练样本: {X_train.shape[1]}")
print(f" 验证样本: {X_val.shape[1]}")
print(f" 批大小: {self.batch_size}")
print(f" 每轮批次数: {X_train.shape[1] // self.batch_size}")
print()
for epoch in range(n_epochs):
# 训练一个epoch
train_loss, train_acc = self.train_epoch(X_train, y_train)
# 验证
val_loss, val_acc = self.evaluate(X_val, y_val)
# 记录历史
self.history['train_loss'].append(train_loss)
self.history['val_loss'].append(val_loss)
self.history['train_acc'].append(train_acc)
self.history['val_acc'].append(val_acc)
# 打印进度
if verbose and (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{n_epochs}")
print(f" 训练 - 损失: {train_loss:.4f}, 准确率: {train_acc:.4f}")
print(f" 验证 - 损失: {val_loss:.4f}, 准确率: {val_acc:.4f}")
print()
print("训练完成!")
# 绘制训练曲线
if plot_results:
self.plot_training_history()
return self.history
def plot_training_history(self):
"""绘制训练历史"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
# 损失曲线
ax1.plot(self.history['train_loss'], 'b-', linewidth=2, label='训练损失')
ax1.plot(self.history['val_loss'], 'r-', linewidth=2, label='验证损失')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('损失')
ax1.set_title('训练和验证损失')
ax1.legend()
ax1.grid(True)
# 准确率曲线
ax2.plot(self.history['train_acc'], 'b-', linewidth=2, label='训练准确率')
ax2.plot(self.history['val_acc'], 'r-', linewidth=2, label='验证准确率')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('准确率')
ax2.set_title('训练和验证准确率')
ax2.legend()
ax2.grid(True)
plt.tight_layout()
plt.show()
训练模型
现在让我们训练我们的神经网络。
python
# 创建网络和训练器
layer_sizes = [64, 128, 64, 10]
nn = NeuralNetwork(layer_sizes, activation='relu')
trainer = Trainer(nn, learning_rate=0.01, batch_size=32)
# 训练模型
history = trainer.train(
X_train, y_train, X_test, y_test,
n_epochs=100,
verbose=True
)
模型评估
训练完成后,我们需要详细评估模型的性能。
准确率评估
python
def evaluate_model(model, X_test, y_test, y_test_labels):
"""详细评估模型"""
# 预测
y_pred = model.predict(X_test)
# 计算准确率
accuracy = np.mean(y_pred == y_test_labels)
print(f"测试集准确率: {accuracy:.4f}")
# 计算混淆矩阵
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test_labels, y_pred)
# 绘制混淆矩阵
plt.figure(figsize=(10, 8))
import seaborn as sns
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=range(10), yticklabels=range(10))
plt.xlabel('预测标签')
plt.ylabel('真实标签')
plt.title('混淆矩阵')
plt.show()
# 分类报告
from sklearn.metrics import classification_report
print("\n分类报告:")
print(classification_report(y_test_labels, y_pred))
return accuracy, cm
# 评估模型
accuracy, cm = evaluate_model(nn, X_test, y_test, y_test_labels)
可视化预测结果
python
def visualize_predictions(model, X_test, y_test_labels, n_samples=10):
"""可视化预测结果"""
# 随机选择样本
indices = np.random.choice(X_test.shape[1], n_samples, replace=False)
X_samples = X_test[:, indices]
y_true = y_test_labels[indices]
y_pred = model.predict(X_samples)
y_proba = model.predict_proba(X_samples)
# 绘制
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
axes = axes.flat
for i, idx in enumerate(indices):
ax = axes[i]
# 显示图像
img = X_test[:, idx].reshape(8, 8)
ax.imshow(img, cmap='gray')
# 显示预测信息
pred = y_pred[i]
true = y_true[i]
proba = y_proba[pred, i]
correct = pred == true
title = f'真实: {true}\n预测: {pred} ({proba:.2f})'
color = 'green' if correct else 'red'
ax.set_title(title, color=color, fontweight='bold')
ax.axis('off')
plt.tight_layout()
plt.show()
# 可视化预测
visualize_predictions(nn, X_test, y_test_labels, n_samples=10)
错误分析
python
def analyze_errors(model, X_test, y_test, y_test_labels):
"""分析预测错误"""
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)
# 找出预测错误的样本
errors = y_pred != y_test_labels
error_indices = np.where(errors)[0]
print(f"总错误数: {len(error_indices)}")
print(f"错误率: {len(error_indices) / len(y_test_labels):.4f}")
if len(error_indices) == 0:
return
# 分析每个数字的错误情况
print("\n各数字的错误情况:")
for digit in range(10):
digit_indices = np.where(y_test_labels == digit)[0]
digit_errors = np.sum(y_pred[digit_indices] != digit)
if len(digit_indices) > 0:
error_rate = digit_errors / len(digit_indices)
print(f" 数字{digit}: {digit_errors}/{len(digit_indices)} ({error_rate:.4f})")
# 可视化一些错误样本
print("\n错误样本可视化:")
n_show = min(10, len(error_indices))
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
axes = axes.flat
for i in range(n_show):
idx = error_indices[i]
ax = axes[i]
img = X_test[:, idx].reshape(8, 8)
ax.imshow(img, cmap='gray')
true_label = y_test_labels[idx]
pred_label = y_pred[idx]
pred_proba = y_pred_proba[pred_label, idx]
# 显示预测概率
proba_str = ", ".join([f"{d}:{y_pred_proba[d, idx]:.2f}"
for d in range(10) if y_pred_proba[d, idx] > 0.1])
ax.set_title(f'真实:{true_label} 预测:{pred_label}\n{proba_str}',
fontsize=8)
ax.axis('off')
plt.tight_layout()
plt.show()
# 分析错误
analyze_errors(nn, X_test, y_test, y_test_labels)
超参数调优
让我们尝试不同的超参数组合,找到最佳配置。
python
def hyperparameter_tuning(X_train, y_train, X_val, y_val,
y_train_labels, y_val_labels):
"""超参数调优"""
# 定义超参数组合
configs = [
{'layers': [64, 64, 10], 'lr': 0.01, 'activation': 'relu'},
{'layers': [64, 128, 64, 10], 'lr': 0.01, 'activation': 'relu'},
{'layers': [64, 128, 128, 10], 'lr': 0.01, 'activation': 'relu'},
{'layers': [64, 128, 64, 10], 'lr': 0.001, 'activation': 'relu'},
{'layers': [64, 128, 64, 10], 'lr': 0.01, 'activation': 'sigmoid'},
{'layers': [64, 128, 64, 10], 'lr': 0.01, 'activation': 'tanh'},
]
results = []
for i, config in enumerate(configs):
print(f"\n配置 {i+1}/{len(configs)}")
print(f" 网络结构: {config['layers']}")
print(f" 学习率: {config['lr']}")
print(f" 激活函数: {config['activation']}")
# 创建网络
nn = NeuralNetwork(config['layers'], activation=config['activation'])
trainer = Trainer(nn, learning_rate=config['lr'], batch_size=32)
# 训练
history = trainer.train(X_train, y_train, X_val, y_val,
n_epochs=50, verbose=False, plot_results=False)
# 评估
y_pred = nn.predict(X_val)
accuracy = np.mean(y_pred == y_val_labels)
results.append({
'config': config,
'accuracy': accuracy,
'train_acc': history['train_acc'][-1],
'val_acc': history['val_acc'][-1]
})
print(f" 最终验证准确率: {accuracy:.4f}")
# 排序并显示结果
results = sorted(results, key=lambda x: x['accuracy'], reverse=True)
print("\n" + "="*60)
print("超参数调优结果(按验证准确率排序):")
print("="*60)
for i, result in enumerate(results):
config = result['config']
print(f"\n排名 {i+1}:")
print(f" 网络结构: {config['layers']}")
print(f" 学习率: {config['lr']}")
print(f" 激活函数: {config['activation']}")
print(f" 验证准确率: {result['accuracy']:.4f}")
return results
# 执行超参数调优(可能需要较长时间)
print("开始超参数调优...")
results = hyperparameter_tuning(X_train, y_train, X_test, y_test,
y_train_labels, y_test_labels)
保存和加载模型
训练好模型后,我们希望能够保存模型以便后续使用。
python
def save_model(model, filename):
"""保存模型"""
import pickle
model_data = {
'layer_sizes': model.layer_sizes,
'activation': model.activation,
'weights': model.weights,
'biases': model.biases
}
with open(filename, 'wb') as f:
pickle.dump(model_data, f)
print(f"模型已保存到 {filename}")
def load_model(filename):
"""加载模型"""
import pickle
with open(filename, 'rb') as f:
model_data = pickle.load(f)
# 创建模型
model = NeuralNetwork(model_data['layer_sizes'],
activation=model_data['activation'])
# 加载权重和偏置
model.weights = model_data['weights']
model.biases = model_data['biases']
print(f"模型已从 {filename} 加载")
return model
# 保存模型
save_model(nn, 'digits_classifier.pkl')
# 测试加载
loaded_nn = load_model('digits_classifier.pkl')
# 验证加载的模型
output1, _ = nn.forward(X_test[:, :5])
output2, _ = loaded_nn.forward(X_test[:, :5])
print(f"\n预测结果一致: {np.allclose(output1, output2)}")
实用功能扩展
学习率调度
python
class TrainerWithLRScheduler(Trainer):
"""带学习率调度的训练器"""
def __init__(self, model, learning_rate=0.01, batch_size=32,
lr_decay_rate=0.95, lr_decay_step=10):
super().__init__(model, learning_rate, batch_size)
self.initial_lr = learning_rate
self.lr_decay_rate = lr_decay_rate
self.lr_decay_step = lr_decay_step
def train(self, X_train, y_train, X_val, y_val, n_epochs,
verbose=True, plot_results=True):
"""训练模型(带学习率衰减)"""
print(f"开始训练(带学习率调度)...")
print()
for epoch in range(n_epochs):
# 学习率衰减
if epoch > 0 and epoch % self.lr_decay_step == 0:
self.learning_rate *= self.lr_decay_rate
if verbose:
print(f"学习率调整为: {self.learning_rate:.6f}")
# 训练一个epoch
train_loss, train_acc = self.train_epoch(X_train, y_train)
val_loss, val_acc = self.evaluate(X_val, y_val)
# 记录历史
self.history['train_loss'].append(train_loss)
self.history['val_loss'].append(val_loss)
self.history['train_acc'].append(train_acc)
self.history['val_acc'].append(val_acc)
# 打印进度
if verbose and (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{n_epochs} (LR={self.learning_rate:.6f})")
print(f" 训练 - 损失: {train_loss:.4f}, 准确率: {train_acc:.4f}")
print(f" 验证 - 损失: {val_loss:.4f}, 准确率: {val_acc:.4f}")
print()
print("训练完成!")
if plot_results:
self.plot_training_history()
return self.history
# 使用带学习率调度的训练器
nn_lr = NeuralNetwork([64, 128, 64, 10], activation='relu')
trainer_lr = TrainerWithLRScheduler(nn_lr, learning_rate=0.01,
batch_size=32, lr_decay_rate=0.9, lr_decay_step=15)
history_lr = trainer_lr.train(X_train, y_train, X_test, y_test,
n_epochs=100, verbose=True)
早停(Early Stopping)
python
class TrainerWithEarlyStopping(Trainer):
"""带早停的训练器"""
def __init__(self, model, learning_rate=0.01, batch_size=32,
patience=10, min_delta=0.001):
super().__init__(model, learning_rate, batch_size)
self.patience = patience # 容忍的epoch数
self.min_delta = min_delta # 最小改善量
self.best_val_loss = float('inf')
self.wait = 0
self.best_weights = None
self.early_stopped = False
def train(self, X_train, y_train, X_val, y_val, n_epochs,
verbose=True, plot_results=True):
"""训练模型(带早停)"""
print(f"开始训练(带早停,patience={self.patience})...")
print()
for epoch in range(n_epochs):
# 训练一个epoch
train_loss, train_acc = self.train_epoch(X_train, y_train)
val_loss, val_acc = self.evaluate(X_val, y_val)
# 记录历史
self.history['train_loss'].append(train_loss)
self.history['val_loss'].append(val_loss)
self.history['train_acc'].append(train_acc)
self.history['val_acc'].append(val_acc)
# 检查是否是最佳模型
if val_loss < self.best_val_loss - self.min_delta:
self.best_val_loss = val_loss
self.wait = 0
# 保存最佳权重
self.best_weights = [w.copy() for w in self.model.weights]
self.best_biases = [b.copy() for b in self.model.biases]
else:
self.wait += 1
# 打印进度
if verbose and (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{n_epochs}")
print(f" 训练 - 损失: {train_loss:.4f}, 准确率: {train_acc:.4f}")
print(f" 验证 - 损失: {val_loss:.4f}, 准确率: {val_acc:.4f}")
print(f" 早停计数: {self.wait}/{self.patience}")
print()
# 检查是否早停
if self.wait >= self.patience:
print(f"早停触发!在第{epoch+1}轮停止训练")
print(f"最佳验证损失: {self.best_val_loss:.4f}")
# 恢复最佳权重
self.model.weights = self.best_weights
self.model.biases = self.best_biases
self.early_stopped = True
break
if not self.early_stopped:
print(f"训练完成!未触发早停")
if plot_results:
self.plot_training_history()
return self.history
# 使用带早停的训练器
nn_es = NeuralNetwork([64, 128, 64, 10], activation='relu')
trainer_es = TrainerWithEarlyStopping(nn_es, learning_rate=0.01,
batch_size=32, patience=15, min_delta=0.001)
history_es = trainer_es.train(X_train, y_train, X_test, y_test,
n_epochs=200, verbose=True)
项目总结
我们实现了什么
-
完整的神经网络类
- 支持任意层数的网络结构
- 可配置的激活函数
- 前向传播和反向传播
- 损失计算和预测
-
训练器类
- Mini-batch训练
- 学习率调度
- 早停机制
- 训练历史记录和可视化
-
实用工具
- 数据预处理
- 模型评估
- 可视化工具
- 模型保存和加载
项目完成
核心组件
神经网络类
训练器类
工具函数
功能特性
灵活的网络结构
多种训练策略
完善的评估体系
可视化工具
性能优化建议
-
网络结构优化
- 尝试不同的网络深度和宽度
- 使用残差连接
- 添加批归一化
-
训练技巧
- 使用更复杂的学习率调度策略
- 实现Dropout防止过拟合
- 使用数据增强
-
代码优化
- 使用Numba或Cython加速计算
- 实现GPU加速
- 并行化批量计算
后续学习方向
完成这个项目后,你可以:
-
学习深度学习框架
- PyTorch:灵活易用,适合研究
- TensorFlow:工业界常用,功能全面
-
探索更复杂的网络架构
- 卷积神经网络(CNN):适合图像
- 循环神经网络(RNN):适合序列数据
- Transformer:现代NLP的基础
-
学习高级技术
- 迁移学习
- 微调
- 模型蒸馏
完整代码示例
为了方便使用,这里提供一个完整的训练脚本:
python
def main():
"""主函数:完整的训练流程"""
# 1. 准备数据
print("步骤1: 准备数据")
X_train, X_test, y_train, y_test, y_train_labels, y_test_labels = prepare_data()
# 2. 创建网络
print("\n步骤2: 创建神经网络")
layer_sizes = [64, 128, 64, 10]
nn = NeuralNetwork(layer_sizes, activation='relu')
print(f"网络结构: {layer_sizes}")
# 3. 创建训练器
print("\n步骤3: 创建训练器")
trainer = TrainerWithEarlyStopping(
nn,
learning_rate=0.01,
batch_size=32,
patience=15,
min_delta=0.001
)
# 4. 训练模型
print("\n步骤4: 训练模型")
history = trainer.train(
X_train, y_train, X_test, y_test,
n_epochs=200,
verbose=True
)
# 5. 评估模型
print("\n步骤5: 评估模型")
evaluate_model(nn, X_test, y_test, y_test_labels)
# 6. 可视化预测
print("\n步骤6: 可视化预测结果")
visualize_predictions(nn, X_test, y_test_labels, n_samples=10)
# 7. 分析错误
print("\n步骤7: 分析预测错误")
analyze_errors(nn, X_test, y_test, y_test_labels)
# 8. 保存模型
print("\n步骤8: 保存模型")
save_model(nn, 'digits_classifier_final.pkl')
print("\n" + "="*60)
print("项目完成!")
print("="*60)
# 运行主函数
if __name__ == '__main__':
main()
总结
通过这个项目,我们从零开始实现了一个完整的神经网络,并成功应用于手写数字识别任务。我们学习了:
- 数据准备和预处理
- 神经网络的实现
- 前向传播和反向传播
- 训练循环和优化
- 模型评估和可视化
- 超参数调优
- 实用功能扩展
这个项目不仅帮助我们理解了神经网络的工作原理,还让我们掌握了深度学习的基本流程。从理论到实践,从概念到代码,我们完整地经历了一个机器学习项目的生命周期。
手写实现神经网络虽然比使用框架要复杂,但它能让我们深刻理解每个细节。当我们转向PyTorch或TensorFlow等框架时,会发现它们提供的功能正是我们手动实现的这些概念的封装和优化。
继续深入深度学习,你会发现更多有趣和强大的技术。但记住,所有这些高级技术都建立在我们今天学习的基础之上。理解了这些基础,你就已经迈出了成为深度学习专家的重要一步。