输出层采用sigmoid激活,隐藏层采用tanh激活
python
import h5py
import numpy as np
from project_02.code.planar_utils import load_planar_dataset, plot_decision_boundary
def sigmoid(z):
s = 1 / (1 + np.exp(-z))
return s
def init_parameters(n_x, n_h, n_y):
"""
待训练参数初始化
:param n_x: 输入层神经元个数(特征数)
:param n_h: 隐藏层神经元个数
:param n_y: 输出层神经元个数
"""
np.random.seed(2)
W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros(shape=(n_h, 1))
W2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.zeros(shape=(n_y, 1))
parameters = {
"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2
}
return parameters
def forward_propagation(X, parameters):
"""前向传播
:param X: 输入特征,维度是 (横纵坐标, 样本数)
:param parameters: 参数w、b
:return:
A2 -- The sigmoid output of the second activation
cache -- a dictionary containing "Z1","A1","Z2" and "A2"
"""
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
Z1 = np.dot(W1, X) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = sigmoid(Z2)
cache = {
"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2
}
return A2, cache
def compute_cost(A2, Y):
"""
计算成本
:param A2: -- 输出层的输出结果
:param Y: -- 标签
:param parameters: w,b
"""
m = Y.shape[1]
logprobs = Y * np.log(A2) + (1 - Y) * np.log(1 - A2)
cost = -np.sum(logprobs) / m
return cost
def backward_propagation(X, Y, parameters, cache):
"""
反向传播
:param parameters: w,b
:param cache: Z1, A1, Z2, Z2
:param X: 输入特征
:param Y: 标签
"""
m = X.shape[1]
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
Z1 = cache["Z1"]
A1 = cache["A1"]
Z2 = cache["Z2"]
A2 = cache["A2"]
dZ2 = A2 - Y # [dz1,dz2,dz3... dz.m]
dW2 = np.dot(dZ2, A1.T) / m # [dw1,dw2,dw3...dw_n_h]
db2 = np.sum(dZ2, 1, keepdims=True) / m
dZ1 = np.dot(W2.T, dZ2) * (1 - A1 ** 2) # dA1 * g'(Z1)
dW1 = np.dot(dZ1, X.T) / m
db1 = np.sum(dZ1, axis=1, keepdims=True) / m
grads = {
"dW1": dW1,
"db1": db1,
"dW2": dW2,
"db2": db2
}
return grads
def update_parameters(parameters, grads, learn_rate=1.2):
"""
梯度下降
:param parameters: w,b
:param grads: 梯度dw,db
:param learning_rate: 学习率
:return:
"""
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
dW1 = grads['dW1']
db1 = grads['db1']
dW2 = grads['dW2']
db2 = grads['db2']
W1 = W1 - learn_rate * dW1
b1 = b1 - learn_rate * db1
W2 = W2 - learn_rate * dW2
b2 = b2 - learn_rate * db2
parameters = {
"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2
}
return parameters
def nn_model(X, Y, n_h, num_iterations=2000, print_cost=False):
np.random.seed(3)
n_x = X.shape[0]
n_y = Y.shape[0]
parameters = init_parameters(n_x, n_h, n_y)
result_parameters = None
for i in range(0, num_iterations):
A2, cache = forward_propagation(X, parameters)
cost = compute_cost(A2, Y)
grads = backward_propagation(X, Y, parameters, cache)
result_parameters = {
"W1": parameters["W1"],
"b1": parameters["b1"],
"W2": parameters["W2"],
"b2": parameters["b2"]
}
parameters = update_parameters(parameters, grads, learn_rate=1.2)
if print_cost and i % 1000 == 0:
print(f"训练{i}次后,成本为:{cost}")
return result_parameters
def predict(parameters, X):
"""预测
:param parameters: w,b
:param X: 特征
"""
A2, cache = forward_propagation(X, parameters)
predictions = np.round(A2)
return predictions
if __name__ == "__main__":
X, Y = load_planar_dataset()
parameters = nn_model(X, Y, n_h=4, num_iterations=10000, print_cost=True)
predictions = predict(parameters, X)
print("预测准确率是: %d" % float((np.dot(Y, predictions.T) + np.dot(1 - Y, 1 - predictions.T)) / float(Y.size) * 100) + "%")