从0开始机器学习--Day17--神经网络反向传播作业

题目:识别数字0-9,做梯度检测来验证是否在梯度下降过程中存在问题,并可视化隐藏层

代码:

import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from scipy.optimize import minimize

def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_derivation(z): # sigmoid函数求导
    return sigmoid(z)*(1-sigmoid(z))

def one_hot(raw_y):
    result = []
    for i in raw_y: # 1-10
        y_temp = np.zeros(10)
        y_temp[i-1] = 1

        result.append(y_temp)
    return np.array(result) #返回成数组的格式

def sequence(theta1,theta2): #序列化

    return np.append(theta1.flatten(), theta2.flatten())  # 方便后续调用scipy库方便, minimize要求初始化参数x0
                                                          # 只有1列,


def return_sequence(theta_sequence): #解序列化
    theta1 = theta_sequence[:25*401].reshape(25, 401)  # 解序列化 保证后续矩阵运算维度是一致的
    theta2 = theta_sequence[25*401:].reshape(10, 26)
    return theta1, theta2

def forward_propagation(theta_sequence, X):
    theta1, theta2 = return_sequence(theta_sequence)
    a1 = X
    z2 = a1@theta1.T
    a2 = sigmoid(z2)
    a2 = np.insert(a2, 0, values=1, axis=1)
    z3 = a2@theta2.T
    h = sigmoid(z3)
    return a1, z2, a2, z3, h

def cost_function(theta_sequence, X, y):
    a1, z2, a2, z3, h = forward_propagation(theta_sequence, X)
    J = (-np.sum(y*np.log(h)+(1-y)*np.log(1-h)))/len(X)
    return J

def reg_cost_function(theta_sequence, X, y, l=1):
    first = np.sum(np.power(theta1[:, 1:], 2))
    second =np.sum(np.power(theta2[:, 1:], 2))
    reg = (first + second) * l / (2 * len(X))
    return reg + cost_function(theta_sequence, X, y)


def gradient(theta_sequence, X, y): # 反向传播计算误差delta
    theta1, theta2 = return_sequence(theta_sequence)
    a1, z2, a2, z3, h = forward_propagation(theta_sequence, X)
    d3 = h-y
    d2 = d3@theta2[:,1:]*sigmoid_derivation(z2)
    D2 = (d3.T@a2) / len(X)
    D1 = (d2.T@a1) / len(X)
    return sequence(D1, D2)

def reg_gradient(theta_sequence, X, y, l=1): # 正则化
    D = gradient(theta_sequence, X, y)
    D1, D2 = return_sequence(D)
    theta1, theta2 = return_sequence(theta_sequence)

    D1[:, 1:] = D1[:, 1:] + theta1[:, 1:] * l / len(X)
    D2[:, 1:] = D2[:, 1:] + theta2[:, 1:] * l / len(X)
    return sequence(D1, D2)

def neutral_network(X, y, l):

    init_theta = np.random.uniform(-0.5, 0.5, 10285) # 随机化初始值,避免全为0结果只有一个特征
    res = minimize(fun=reg_cost_function,
                   x0=init_theta,
                   args=(X, y, l),
                   method='TNC',
                   jac=reg_gradient,
                   options={'maxiter': 300}) # 设置最大迭代次数为300
    return res




data = sio.loadmat('ex4data1.mat')
raw_x = data['X']
raw_y = data['y']
print(raw_y)
X = np.insert(raw_x, 0, values=1, axis=1) # 添加偏置单元
print(X.shape)

y = one_hot(raw_y)
print(y)
print(y.shape)

theta = sio.loadmat('ex4weights.mat')
theta1 = theta['Theta1']
theta2 = theta['Theta2']
print(theta1.shape)
print(theta2.shape)

theta_sequence = sequence(theta1, theta2)

print(reg_cost_function(theta_sequence, X, y, l=1))

l = 10
res = neutral_network(X, y, l)
raw_y = data['y'].reshape(5000) # 降为一维方便后面进行梯度检验时的比较
a1, z2, a2, z3, h = forward_propagation(res.x, X)
y_pred = np.argmax(h, axis=1)+1 # 取最大
accrancy = np.mean(y_pred == raw_y)
print(accrancy)

def hidden_layer(theta):
    theta1, theta2 = return_sequence(theta)
    hidden_layer = theta1[:, 1:]

    fig, ax = plt.subplots(nrows=5, ncols=5, figsize=(8, 8), sharex=True, sharey=True)

    for r in range(5):
        for c in range(5):
            ax[r, c].imshow(hidden_layer[5 * r + c].reshape(20, 20).T,
                                   cmap='gray_r')
    plt.xticks([])
    plt.yticks([])
    plt.show()

hidden_layer(res.x)

输出:

[[10]
 [10]
 [10]
 ...
 [ 9]
 [ 9]
 [ 9]]
(5000, 401)
[[0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 1. 0.]]
(5000, 10)
(25, 401)
(10, 26)
0.38376985909092365
0.9394

进程已结束,退出代码0

可视化隐藏层

总结:与之前相比,这次代码中数学的运算多了很多,尤其是偏导部分;注意写代码前要多推导数学运算的过程不要出现差错;有所改进的是跟之前在minimize中加flatten相比,直接添加了一个函数对参数进行序列化操作来方便调用scipy库。

作业订正参考:【作业讲解】编程作业4:神经网络(2)(上)_哔哩哔哩_bilibili

相关推荐
AI浩15 分钟前
ShuffleNet:一种为移动设备设计的极致高效的卷积神经网络
人工智能·神经网络·cnn
爱吃土豆的程序员27 分钟前
深入理解 prompt提示词 原理及使用技巧
人工智能·深度学习·机器学习·prompt
宋一诺331 小时前
机器学习—迁移学习:使用其他任务中的数据
人工智能·机器学习·迁移学习
运维笑谈1 小时前
【python系列】Python数据类型转换详解
python
江-小北2 小时前
Java基础面试题05:简述快速失败(fail-fast)和安全失败(fail-safe)的区别 ?
java·开发语言·python
种花家的n次方2 小时前
pymysql模块
数据库·python
劳工小朋友2 小时前
为什么神经网络的可解释性差?
人工智能·深度学习·神经网络
炸膛坦客2 小时前
神经网络入门实战:(一)神经网络解决的两种问题,以及AI、机器学习、深度学习三者之间的逻辑关系
人工智能·深度学习·机器学习
CoderIsArt2 小时前
基于 RBF 神经网络整定的 PID 控制
人工智能·深度学习·神经网络
友艺3 小时前
CatBoost 模型实践:回归与分类的全流程解析
机器学习