使用房屋价格预测的场景，展示如何从多个影响因素计算权重和偏置的梯度

这个案例的关键要点：

1. 自动微分核心机制

requires_grad=True：告诉PyTorch需要跟踪这些张量的梯度
loss.backward()：自动计算所有requires_grad=True的张量的梯度
梯度存储在.grad属性中

2. 多特征影响分析

经度/纬度：地理位置影响
楼层：建筑高度影响
面积：房屋大小影响
房龄：新旧程度影响（负相关）

3. 梯度计算原理

对于线性回归模型：

text

复制代码

prediction = w1*x1 + w2*x2 + w3*x3 + w4*x4 + w5*x5 + b
loss = 1/N * Σ(prediction - true_value)²

梯度计算：

text

复制代码

∂loss/∂w_i = 2/N * Σ(prediction - true_value) * x_i
∂loss/∂b = 2/N * Σ(prediction - true_value)

复制代码

代码：

import torch
import torch.nn as nn
import matplotlib.pyplot as plt

# 设置随机种子以确保结果可重现
torch.manual_seed(42)


class HousePricePredictor:
    def __init__(self, num_features):
        # 初始化权重和偏置，设置requires_grad=True以跟踪梯度
        self.weights = torch.randn(num_features, 1, requires_grad=True, dtype=torch.float32)
        self.bias = torch.randn(1, requires_grad=True, dtype=torch.float32)

    def forward(self, x):
        """前向传播：计算预测价格"""
        return x @ self.weights + self.bias


def generate_synthetic_data(num_samples=100):
    """生成模拟的房屋数据"""
    # 特征：经度, 纬度, 楼层, 面积(平方米), 房龄(年)
    # 设置真实的影响权重
    true_weights = torch.tensor([[0.5], [0.3], [0.2], [0.8], [-0.4]], dtype=torch.float32)
    true_bias = torch.tensor([10.0], dtype=torch.float32)

    # 生成特征数据
    longitude = torch.rand(num_samples, 1) * 2 + 116  # 北京经度范围
    latitude = torch.rand(num_samples, 1) * 0.5 + 39.5  # 北京纬度范围
    floors = torch.randint(1, 31, (num_samples, 1)).float()  # 1-30层
    area = torch.rand(num_samples, 1) * 200 + 50  # 50-250平方米
    age = torch.randint(0, 50, (num_samples, 1)).float()  # 0-50年

    # 组合特征
    features = torch.cat([longitude, latitude, floors, area, age], dim=1)

    # 生成带噪声的真实价格（单位：万元）
    noise = torch.randn(num_samples, 1) * 20  # 添加噪声
    prices = features @ true_weights + true_bias + noise

    return features, prices, true_weights, true_bias


def main():
    print("=== PyTorch自动微分案例：房屋价格预测 ===\n")

    # 生成数据
    features, true_prices, true_weights, true_bias = generate_synthetic_data(100)
    num_features = features.shape[1]

    print(f"数据形状: 特征 {features.shape}, 价格 {true_prices.shape}")
    print(f"真实权重: {true_weights.flatten().tolist()}")
    print(f"真实偏置: {true_bias.item()}\n")

    # 创建模型
    model = HousePricePredictor(num_features)

    print("初始模型参数:")
    print(f"初始权重: {model.weights.flatten().detach().numpy()}")
    print(f"初始偏置: {model.bias.item()}\n")

    # 定义损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD([model.weights, model.bias], lr=0.0001)

    # 训练前的梯度检查
    print("=== 训练前梯度分析 ===")

    # 前向传播
    predictions = model.forward(features)
    loss = criterion(predictions, true_prices)

    print(f"初始损失: {loss.item():.2f}")

    # 手动检查梯度是否存在
    print(f"权重梯度是否存在: {model.weights.grad is None}")
    print(f"偏置梯度是否存在: {model.bias.grad is None}")

    # 反向传播计算梯度
    loss.backward()

    print("\n反向传播后的梯度:")
    print(f"权重梯度: {model.weights.grad.flatten().numpy()}")
    print(f"偏置梯度: {model.bias.grad.numpy()}")

    # 分析每个特征对梯度的贡献
    print("\n=== 各特征梯度贡献分析 ===")
    feature_names = ['经度', '纬度', '楼层', '面积', '房龄']
    for i, name in enumerate(feature_names):
        grad = model.weights.grad[i].item()
        print(f"{name}: 梯度 = {grad:.6f}")

    # 训练过程
    print("\n=== 开始训练 ===")
    losses = []

    for epoch in range(1000):
        # 前向传播
        predictions = model.forward(features)
        loss = criterion(predictions, true_prices)

        # 反向传播
        optimizer.zero_grad()  # 清除之前的梯度
        loss.backward()  # 计算新梯度

        # 记录特定轮次的梯度
        if epoch % 200 == 0:
            print(f"轮次 {epoch}: 损失 = {loss.item():.4f}")
            print(f"  权重梯度范数: {torch.norm(model.weights.grad):.6f}")
            print(f"  偏置梯度: {model.bias.grad.item():.6f}")

        # 更新参数
        optimizer.step()
        #损失
        losses.append(loss.item())

    # 训练结果
    print("\n=== 训练结果 ===")
    print(f"最终损失: {loss.item():.4f}")
    print(f"学习到的权重: {model.weights.flatten().detach().numpy()}")
    print(f"学习到的偏置: {model.bias.item():.4f}")

    # 梯度下降可视化
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(losses)
    plt.title('训练损失下降曲线')
    plt.xlabel('轮次')
    plt.ylabel('MSE损失')
    plt.grid(True)

    plt.subplot(1, 2, 2)
    # 显示最终预测 vs 真实值
    with torch.no_grad():
        final_predictions = model.forward(features)

    plt.scatter(true_prices.numpy(), final_predictions.numpy(), alpha=0.6)
    plt.plot([true_prices.min(), true_prices.max()], [true_prices.min(), true_prices.max()], 'r--')
    plt.title('预测价格 vs 真实价格')
    plt.xlabel('真实价格 (万元)')
    plt.ylabel('预测价格 (万元)')
    plt.grid(True)

    plt.tight_layout()
    plt.show()

    # 梯度计算原理演示
    print("\n=== 梯度计算原理演示 ===")
    print("对于线性模型: price = w1*经度 + w2*纬度 + w3*楼层 + w4*面积 + w5*房龄 + b")
    print("损失函数: L = 1/N * Σ(predicted_price - true_price)²")
    print("权重梯度: ∂L/∂w_i = 2/N * Σ(predicted_price - true_price) * feature_i")
    print("偏置梯度: ∂L/∂b = 2/N * Σ(predicted_price - true_price)")

    # 手动验证一个权重的梯度
    print("\n=== 手动验证梯度计算 ===")
    with torch.no_grad():
        # 选择一个样本进行验证
        sample_idx = 0
        x_sample = features[sample_idx]
        y_true = true_prices[sample_idx]
        y_pred = model.forward(x_sample.unsqueeze(0))

        # 手动计算面积特征的梯度
        error = (y_pred - y_true).item()
        area_feature = x_sample[3].item()  # 面积特征
        manual_grad_area = 2 * error * area_feature / len(features)

        print(f"样本 {sample_idx}: 预测={y_pred.item():.2f}, 真实={y_true.item():.2f}, 误差={error:.2f}")
        print(f"面积特征值: {area_feature:.2f}")
        print(f"手动计算的面积梯度: {manual_grad_area:.6f}")
        print(f"PyTorch计算的面积梯度: {model.weights.grad[3].item():.6f}")


if __name__ == "__main__":
    main()