浅层神经网络示例

输出层采用sigmoid激活,隐藏层采用tanh激活

python 复制代码
import h5py
import numpy as np

from project_02.code.planar_utils import load_planar_dataset, plot_decision_boundary


def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s


def init_parameters(n_x, n_h, n_y):
    """
     待训练参数初始化
     :param n_x: 输入层神经元个数(特征数)
     :param n_h: 隐藏层神经元个数
     :param n_y: 输出层神经元个数
     """
    np.random.seed(2)
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros(shape=(n_h, 1))

    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros(shape=(n_y, 1))
    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }
    return parameters


def forward_propagation(X, parameters):
    """前向传播
    :param X: 输入特征,维度是 (横纵坐标, 样本数)
    :param parameters: 参数w、b
    :return:
    A2 -- The sigmoid output of the second activation
    cache -- a dictionary containing "Z1","A1","Z2" and "A2"
    """
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    cache = {
        "Z1": Z1,
        "A1": A1,
        "Z2": Z2,
        "A2": A2
    }
    return A2, cache


def compute_cost(A2, Y):
    """
    计算成本
    :param A2: -- 输出层的输出结果
    :param Y: -- 标签
    :param parameters: w,b
    """
    m = Y.shape[1]
    logprobs = Y * np.log(A2) + (1 - Y) * np.log(1 - A2)
    cost = -np.sum(logprobs) / m
    return cost


def backward_propagation(X, Y, parameters, cache):
    """
      反向传播
      :param parameters: w,b
      :param cache: Z1, A1, Z2, Z2
      :param X: 输入特征
      :param Y: 标签
    """
    m = X.shape[1]

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = cache["Z1"]
    A1 = cache["A1"]
    Z2 = cache["Z2"]
    A2 = cache["A2"]

    dZ2 = A2 - Y  # [dz1,dz2,dz3... dz.m]
    dW2 = np.dot(dZ2, A1.T) / m  # [dw1,dw2,dw3...dw_n_h]
    db2 = np.sum(dZ2, 1, keepdims=True) / m
    dZ1 = np.dot(W2.T, dZ2) * (1 - A1 ** 2)  # dA1 * g'(Z1)
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m

    grads = {
        "dW1": dW1,
        "db1": db1,
        "dW2": dW2,
        "db2": db2
    }
    return grads


def update_parameters(parameters, grads, learn_rate=1.2):
    """
    梯度下降
    :param parameters: w,b
    :param grads: 梯度dw,db
    :param learning_rate: 学习率
    :return:
    """
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']

    W1 = W1 - learn_rate * dW1
    b1 = b1 - learn_rate * db1
    W2 = W2 - learn_rate * dW2
    b2 = b2 - learn_rate * db2
    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }
    return parameters


def nn_model(X, Y, n_h, num_iterations=2000, print_cost=False):
    np.random.seed(3)
    n_x = X.shape[0]
    n_y = Y.shape[0]
    parameters = init_parameters(n_x, n_h, n_y)
    result_parameters = None
    for i in range(0, num_iterations):
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(A2, Y)
        grads = backward_propagation(X, Y, parameters, cache)
        result_parameters = {
            "W1": parameters["W1"],
            "b1": parameters["b1"],
            "W2": parameters["W2"],
            "b2": parameters["b2"]
        }
        parameters = update_parameters(parameters, grads, learn_rate=1.2)
        if print_cost and i % 1000 == 0:
            print(f"训练{i}次后,成本为:{cost}")
    return result_parameters


def predict(parameters, X):
    """预测
      :param parameters: w,b
      :param X: 特征
      """
    A2, cache = forward_propagation(X, parameters)
    predictions = np.round(A2)
    return predictions


if __name__ == "__main__":
    X, Y = load_planar_dataset()
    parameters = nn_model(X, Y, n_h=4, num_iterations=10000, print_cost=True)
    predictions = predict(parameters, X)
    print("预测准确率是: %d" % float((np.dot(Y, predictions.T) + np.dot(1 - Y, 1 - predictions.T)) / float(Y.size) * 100) + "%")
相关推荐
YSGZJJ19 分钟前
股指期货的套保策略如何精准选择和规避风险?
人工智能·区块链
无脑敲代码,bug漫天飞22 分钟前
COR 损失函数
人工智能·机器学习
HPC_fac130520678161 小时前
以科学计算为切入点:剖析英伟达服务器过热难题
服务器·人工智能·深度学习·机器学习·计算机视觉·数据挖掘·gpu算力
小陈phd4 小时前
OpenCV从入门到精通实战(九)——基于dlib的疲劳监测 ear计算
人工智能·opencv·计算机视觉
Guofu_Liao5 小时前
大语言模型---LoRA简介;LoRA的优势;LoRA训练步骤;总结
人工智能·语言模型·自然语言处理·矩阵·llama
秀儿还能再秀8 小时前
神经网络(系统性学习三):多层感知机(MLP)
神经网络·学习笔记·mlp·多层感知机
ZHOU_WUYI9 小时前
3.langchain中的prompt模板 (few shot examples in chat models)
人工智能·langchain·prompt
如若1239 小时前
主要用于图像的颜色提取、替换以及区域修改
人工智能·opencv·计算机视觉
老艾的AI世界9 小时前
AI翻唱神器,一键用你喜欢的歌手翻唱他人的曲目(附下载链接)
人工智能·深度学习·神经网络·机器学习·ai·ai翻唱·ai唱歌·ai歌曲
DK221519 小时前
机器学习系列----关联分析
人工智能·机器学习