import numpy as np
sigmoid = lambda x:1/(1 + np.exp(-x))
relu = lambda x:(x>0).astype(float)*x
weights = np.array([[1,4],[4,1]])
activation = sigmoid(np.array([1,0.01]))
print("Activations")
activations = list()
for iter in range(10):
activation = sigmoid(activation.dot(weights))
activations.append(activation)
print(activation)
print("\nGradients")
gradient = np.ones_like(activation)
for activation in reversed(activations):
gradient = (activation * (1 - activation) * gradient)
gradient = gradient.dot(weights.transpose())
print(gradient)
print("Relu Activations")
activations = list()
for iter in range(10):
activation = relu(activation.dot(weights))
activations.append(activation)
print(activation)
print("\nRelu Gradients")
gradient = np.ones_like(activation)
for activation in reversed(activations):
gradient = ((activation > 0) * gradient).dot(weights.transpose())
print(gradient)
'''
Activations
[0.93940638 0.96852968]
[0.9919462 0.99121735]
[0.99301385 0.99302901]
[0.9930713 0.99307098]
[0.99307285 0.99307285]
[0.99307291 0.99307291]
[0.99307291 0.99307291]
[0.99307291 0.99307291]
[0.99307291 0.99307291]
[0.99307291 0.99307291]
Gradients
[0.03439552 0.03439552]
[0.00118305 0.00118305]
[4.06916726e-05 4.06916726e-05]
[1.39961115e-06 1.39961115e-06]
[4.81403643e-08 4.81403637e-08]
[1.65582672e-09 1.65582765e-09]
[5.69682675e-11 5.69667160e-11]
[1.97259346e-12 1.97517920e-12]
[8.45387597e-14 8.02306381e-14]
[1.45938177e-14 2.16938983e-14]
Relu Activations
[4.8135251 4.72615519]
[23.71814585 23.98025559]
[119.63916823 118.852839 ]
[595.05052421 597.40951192]
[2984.68857188 2977.61160877]
[14895.13500696 14916.36589628]
[74560.59859209 74496.90592414]
[372548.22228863 372739.30029248]
[1863505.42345854 1862932.18944699]
[9315234.18124649 9316953.88328115]
Relu Gradients
[5. 5.]
[25. 25.]
[125. 125.]
[625. 625.]
[3125. 3125.]
[15625. 15625.]
[78125. 78125.]
[390625. 390625.]
[1953125. 1953125.]
[9765625. 9765625.]
'''
54、深度学习-自学之路-自己搭建深度学习框架-15、解释梯度消失和梯度爆炸的问题。
小宇爱2025-03-03 2:02
相关推荐
开源技术2 分钟前
Claude Opus 4.6 发布,100万上下文窗口,越贵越好用聆风吟º13 分钟前
CANN hccl 深度解析:异构计算集群通信库的跨节点通信与资源管控实现逻辑狸奴算君20 分钟前
告别机械回复:三步微调AI模型,打造会“读心”的智能客服七夜zippoe21 分钟前
脉向AI|当豆包手机遭遇“全网封杀“:GUI Agent是通向AGI的必经之路吗?木非哲22 分钟前
机器学习--随机森林--从一棵树的直觉到一片林的哲学神的泪水25 分钟前
CANN 系列底层篇:基于 shmem 实现 NPU 设备内存的高效共享皮卡丘不断更27 分钟前
手搓本地 RAG:我用 Python 和 Spring Boot 给 AI 装上了“实时代码监控”浪子小院38 分钟前
ModelEngine 智能体全流程开发实战:从 0 到 1 搭建多协作办公助手程序员打怪兽41 分钟前
详解YOLOv8网络结构Yuer202541 分钟前
全国首例“AI 幻觉”侵权案判了:这不是 AI 准不准的问题,而是谁该为 AI 负责