Ref
概述
主要是Deep-ML中基于Transformer场景的一些题目
SelfAttention
基于numpy
py
import numpy as np
def self_attention(Q, K, V):
scores = Q @ K.T
d_k = Q.shape[-1]
scaled_scores = scores / np.sqrt(d_k)
exp_scores = np.exp(scaled_scores - np.max(scaled_scores, axis=-1, keepdims=True))
atten_weights = exp_scores / np.sum(exp_scores, axis=-1, keepdims=True)
atten_out = atten_weights @ V
return atten_out
def compute_qkv(X, W_q, W_k, W_v):
Q = X @ W_q
K = X @ W_k
V = X @ W_v
return Q, K, V