import numpy as np
class Tensor(object):
def __init__(self, data,
autograd=False,
creators=None,
creation_op=None,
id=None):
self.data = np.array(data)
self.autograd = autograd
self.grad = None
if (id is None):
self.id = np.random.randint(0, 1000000000)
else:
self.id = id
self.creators = creators
self.creation_op = creation_op
self.children = {}
if (creators is not None):
for c in creators:
if (self.id not in c.children):
c.children[self.id] = 1
else:
c.children[self.id] += 1
def all_children_grads_accounted_for(self):
for id, cnt in self.children.items():
if (cnt != 0):
return False
return True
def backward(self, grad=None, grad_origin=None):
if (self.autograd):
if (grad is None):
grad = Tensor(np.ones_like(self.data))
if (grad_origin is not None):
if (self.children[grad_origin.id] == 0):
return
print(self.id)
print(self.creation_op)
print(len(self.creators))
for c in self.creators:
print(c.creation_op)
raise Exception("cannot backprop more than once")
else:
self.children[grad_origin.id] -= 1
if (self.grad is None):
self.grad = grad
else:
self.grad += grad
# grads must not have grads of their own
assert grad.autograd == False
# only continue backpropping if there's something to
# backprop into and if all gradients (from children)
# are accounted for override waiting for children if
# "backprop" was called on this variable directly
if (self.creators is not None and
(self.all_children_grads_accounted_for() or
grad_origin is None)):
if (self.creation_op == "add"):
self.creators[0].backward(self.grad, self)
self.creators[1].backward(self.grad, self)
if (self.creation_op == "sub"):
self.creators[0].backward(Tensor(self.grad.data), self)
self.creators[1].backward(Tensor(self.grad.__neg__().data), self)
if (self.creation_op == "mul"):
new = self.grad * self.creators[1]
self.creators[0].backward(new, self)
new = self.grad * self.creators[0]
self.creators[1].backward(new, self)
if (self.creation_op == "mm"):
c0 = self.creators[0]
c1 = self.creators[1]
new = self.grad.mm(c1.transpose())
c0.backward(new)
new = self.grad.transpose().mm(c0).transpose()
c1.backward(new)
if (self.creation_op == "transpose"):
self.creators[0].backward(self.grad.transpose())
if ("sum" in self.creation_op):
dim = int(self.creation_op.split("_")[1])
self.creators[0].backward(self.grad.expand(dim,
self.creators[0].data.shape[dim]))
if ("expand" in self.creation_op):
dim = int(self.creation_op.split("_")[1])
self.creators[0].backward(self.grad.sum(dim))
if (self.creation_op == "neg"):
self.creators[0].backward(self.grad.__neg__())
if (self.creation_op == "sigmoid"):
ones = Tensor(np.ones_like(self.grad.data))
self.creators[0].backward(self.grad * (self * (ones - self)))
if (self.creation_op == "tanh"):
ones = Tensor(np.ones_like(self.grad.data))
self.creators[0].backward(self.grad * (ones - (self * self)))
if (self.creation_op == "index_select"):
new_grad = np.zeros_like(self.creators[0].data)
indices_ = self.index_select_indices.data.flatten()
grad_ = grad.data.reshape(len(indices_), -1)
for i in range(len(indices_)):
new_grad[indices_[i]] += grad_[i]
self.creators[0].backward(Tensor(new_grad))
if (self.creation_op == "cross_entropy"):
dx = self.softmax_output - self.target_dist
self.creators[0].backward(Tensor(dx))
def __add__(self, other):
if (self.autograd and other.autograd):
return Tensor(self.data + other.data,
autograd=True,
creators=[self, other],
creation_op="add")
return Tensor(self.data + other.data)
def __neg__(self):
if (self.autograd):
return Tensor(self.data * -1,
autograd=True,
creators=[self],
creation_op="neg")
return Tensor(self.data * -1)
def __sub__(self, other):
if (self.autograd and other.autograd):
return Tensor(self.data - other.data,
autograd=True,
creators=[self, other],
creation_op="sub")
return Tensor(self.data - other.data)
def __mul__(self, other):
if (self.autograd and other.autograd):
return Tensor(self.data * other.data,
autograd=True,
creators=[self, other],
creation_op="mul")
return Tensor(self.data * other.data)
def sum(self, dim):
if (self.autograd):
return Tensor(self.data.sum(dim),
autograd=True,
creators=[self],
creation_op="sum_" + str(dim))
return Tensor(self.data.sum(dim))
def expand(self, dim, copies):
trans_cmd = list(range(0, len(self.data.shape)))
trans_cmd.insert(dim, len(self.data.shape))
new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)
if (self.autograd):
return Tensor(new_data,
autograd=True,
creators=[self],
creation_op="expand_" + str(dim))
return Tensor(new_data)
def transpose(self):
if (self.autograd):
return Tensor(self.data.transpose(),
autograd=True,
creators=[self],
creation_op="transpose")
return Tensor(self.data.transpose())
def mm(self, x):
if (self.autograd):
return Tensor(self.data.dot(x.data),
autograd=True,
creators=[self, x],
creation_op="mm")
return Tensor(self.data.dot(x.data))
def sigmoid(self):
if (self.autograd):
return Tensor(1 / (1 + np.exp(-self.data)),
autograd=True,
creators=[self],
creation_op="sigmoid")
return Tensor(1 / (1 + np.exp(-self.data)))
def tanh(self):
if (self.autograd):
return Tensor(np.tanh(self.data),
autograd=True,
creators=[self],
creation_op="tanh")
return Tensor(np.tanh(self.data))
def index_select(self, indices):
if (self.autograd):
new = Tensor(self.data[indices.data],
autograd=True,
creators=[self],
creation_op="index_select")
new.index_select_indices = indices
return new
return Tensor(self.data[indices.data])
def softmax(self):
temp = np.exp(self.data)
softmax_output = temp / np.sum(temp,
axis=len(self.data.shape) - 1,
keepdims=True)
return softmax_output
def cross_entropy(self, target_indices):
temp = np.exp(self.data)
softmax_output = temp / np.sum(temp,
axis=len(self.data.shape) - 1,
keepdims=True)
t = target_indices.data.flatten()
p = softmax_output.reshape(len(t), -1)
target_dist = np.eye(p.shape[1])[t]
loss = -(np.log(p) * (target_dist)).sum(1).mean()
if (self.autograd):
out = Tensor(loss,
autograd=True,
creators=[self],
creation_op="cross_entropy")
out.softmax_output = softmax_output
out.target_dist = target_dist
return out
return Tensor(loss)
def __repr__(self):
return str(self.data.__repr__())
def __str__(self):
return str(self.data.__str__())
class Layer(object):
def __init__(self):
self.parameters = list()
def get_parameters(self):
return self.parameters
class SGD(object):
def __init__(self, parameters, alpha=0.1):
self.parameters = parameters
self.alpha = alpha
def zero(self):
for p in self.parameters:
p.grad.data *= 0
def step(self, zero=True):
for p in self.parameters:
p.data -= p.grad.data * self.alpha
if (zero):
p.grad.data *= 0
class Linear(Layer):
def __init__(self, n_inputs, n_outputs, bias=True):
super().__init__()
self.use_bias = bias
W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / (n_inputs))
self.weight = Tensor(W, autograd=True)
if (self.use_bias):
self.bias = Tensor(np.zeros(n_outputs), autograd=True)
self.parameters.append(self.weight)
if (self.use_bias):
self.parameters.append(self.bias)
def forward(self, input):
if (self.use_bias):
return input.mm(self.weight) + self.bias.expand(0, len(input.data))
return input.mm(self.weight)
class Sequential(Layer):
def __init__(self, layers=list()):
super().__init__()
self.layers = layers
def add(self, layer):
self.layers.append(layer)
def forward(self, input):
for layer in self.layers:
input = layer.forward(input)
return input
def get_parameters(self):
params = list()
for l in self.layers:
params += l.get_parameters()
return params
class Embedding(Layer):
def __init__(self, vocab_size, dim):
super().__init__()
self.vocab_size = vocab_size
self.dim = dim
# this random initialiation style is just a convention from word2vec
self.weight = Tensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True)
self.parameters.append(self.weight)
def forward(self, input):
return self.weight.index_select(input)
class Tanh(Layer):
def __init__(self):
super().__init__()
def forward(self, input):
return input.tanh()
class Sigmoid(Layer):
def __init__(self):
super().__init__()
def forward(self, input):
return input.sigmoid()
class CrossEntropyLoss(object):
def __init__(self):
super().__init__()
def forward(self, input, target):
return input.cross_entropy(target)
class RNNCell(Layer):
def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):
super().__init__()
self.n_inputs = n_inputs
self.n_hidden = n_hidden
self.n_output = n_output
if (activation == 'sigmoid'):
self.activation = Sigmoid()
elif (activation == 'tanh'):
self.activation == Tanh()
else:
raise Exception("Non-linearity not found")
self.w_ih = Linear(n_inputs, n_hidden)
self.w_hh = Linear(n_hidden, n_hidden)
self.w_ho = Linear(n_hidden, n_output)
self.parameters += self.w_ih.get_parameters()
self.parameters += self.w_hh.get_parameters()
self.parameters += self.w_ho.get_parameters()
def forward(self, input, hidden):
from_prev_hidden = self.w_hh.forward(hidden)
combined = self.w_ih.forward(input) + from_prev_hidden
new_hidden = self.activation.forward(combined)
output = self.w_ho.forward(new_hidden)
return output, new_hidden
def init_hidden(self, batch_size=1):
return Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
class LSTMCell(Layer):
def __init__(self, n_inputs, n_hidden, n_output):
super().__init__()
self.n_inputs = n_inputs
self.n_hidden = n_hidden
self.n_output = n_output
self.xf = Linear(n_inputs, n_hidden)
self.xi = Linear(n_inputs, n_hidden)
self.xo = Linear(n_inputs, n_hidden)
self.xc = Linear(n_inputs, n_hidden)
self.hf = Linear(n_hidden, n_hidden, bias=False)
self.hi = Linear(n_hidden, n_hidden, bias=False)
self.ho = Linear(n_hidden, n_hidden, bias=False)
self.hc = Linear(n_hidden, n_hidden, bias=False)
self.w_ho = Linear(n_hidden, n_output, bias=False)
self.parameters += self.xf.get_parameters()
self.parameters += self.xi.get_parameters()
self.parameters += self.xo.get_parameters()
self.parameters += self.xc.get_parameters()
self.parameters += self.hf.get_parameters()
self.parameters += self.hi.get_parameters()
self.parameters += self.ho.get_parameters()
self.parameters += self.hc.get_parameters()
self.parameters += self.w_ho.get_parameters()
def forward(self, input, hidden):
prev_hidden = hidden[0]
prev_cell = hidden[1]
f = (self.xf.forward(input) + self.hf.forward(prev_hidden)).sigmoid()
i = (self.xi.forward(input) + self.hi.forward(prev_hidden)).sigmoid()
o = (self.xo.forward(input) + self.ho.forward(prev_hidden)).sigmoid()
g = (self.xc.forward(input) + self.hc.forward(prev_hidden)).tanh()
c = (f * prev_cell) + (i * g)
h = o * c.tanh()
output = self.w_ho.forward(h)
return output, (h, c)
def init_hidden(self, batch_size=1):
init_hidden = Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
init_cell = Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
init_hidden.data[:, 0] += 1
init_cell.data[:, 0] += 1
return (init_hidden, init_cell)
import sys,random,math
from collections import Counter
import numpy as np
import sys
np.random.seed(0)
# dataset from http://karpathy.github.io/2015/05/21/rnn-effectiveness/
#f = open('shakespear.txt','r')
f = open('shakesper.txt', 'r')
raw = f.read()
f.close()
vocab = list(set(raw))
word2index = {}
for i,word in enumerate(vocab):
word2index[word]=i
indices = np.array(list(map(lambda x:word2index[x], raw)))
embed = Embedding(vocab_size=len(vocab),dim=512)
model = LSTMCell(n_inputs=512, n_hidden=512, n_output=len(vocab))
model.w_ho.weight.data *= 0
criterion = CrossEntropyLoss()
optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.05)
def generate_sample(n=30, init_char=' '):
s = ""
hidden = model.init_hidden(batch_size=1)
input = Tensor(np.array([word2index[init_char]]))
for i in range(n):
rnn_input = embed.forward(input)
output, hidden = model.forward(input=rnn_input, hidden=hidden)
# output.data *= 25
# temp_dist = output.softmax()
# temp_dist /= temp_dist.sum()
# m = (temp_dist > np.random.rand()).argmax()
m = output.data.argmax()
c = vocab[m]
input = Tensor(np.array([m]))
s += c
return s
batch_size = 16
bptt = 25
n_batches = int((indices.shape[0] / (batch_size)))
trimmed_indices = indices[:n_batches*batch_size]
batched_indices = trimmed_indices.reshape(batch_size, n_batches).transpose()
input_batched_indices = batched_indices[0:-1]
target_batched_indices = batched_indices[1:]
n_bptt = int(((n_batches-1) / bptt))
input_batches = input_batched_indices[:n_bptt*bptt].reshape(n_bptt,bptt,batch_size)
target_batches = target_batched_indices[:n_bptt*bptt].reshape(n_bptt, bptt, batch_size)
min_loss = 1000
def train(iterations=40): #iterations=400
for iter in range(iterations):
global min_loss # 声明 min_loss 是全局变量
total_loss = 0
n_loss = 0
hidden = model.init_hidden(batch_size=batch_size)
batches_to_train = len(input_batches)
# batches_to_train = 32
for batch_i in range(batches_to_train):
hidden = (Tensor(hidden[0].data, autograd=True), Tensor(hidden[1].data, autograd=True))
losses = list()
for t in range(bptt):
input = Tensor(input_batches[batch_i][t], autograd=True)
rnn_input = embed.forward(input=input)
output, hidden = model.forward(input=rnn_input, hidden=hidden)
target = Tensor(target_batches[batch_i][t], autograd=True)
batch_loss = criterion.forward(output, target)
if(t == 0):
losses.append(batch_loss)
else:
losses.append(batch_loss + losses[-1])
loss = losses[-1]
loss.backward()
optim.step()
total_loss += loss.data / bptt
epoch_loss = np.exp(total_loss / (batch_i+1))
if(epoch_loss < min_loss):
min_loss = epoch_loss
print()
log = "\r Iter:" + str(iter)
log += " - Alpha:" + str(optim.alpha)[0:5]
log += " - Batch "+str(batch_i+1)+"/"+str(len(input_batches))
log += " - Min Loss:" + str(min_loss)[0:5]
log += " - Loss:" + str(epoch_loss)
if(batch_i == 0):
log += " - " + generate_sample(n=70, init_char='T').replace("\n"," ")
if(batch_i % 1 == 0):
sys.stdout.write(log)
optim.alpha *= 0.99
train(5) #train(10)
print(generate_sample(n=500, init_char='\n'))
'''
Iter:0 - Alpha:0.05 - Batch 2/2788 - Min Loss:64.99 - Loss:65.00011100353602
Iter:0 - Alpha:0.05 - Batch 3/2788 - Min Loss:64.97 - Loss:64.97125482131345
Iter:0 - Alpha:0.05 - Batch 4/2788 - Min Loss:64.92 - Loss:64.9255137563867
Iter:0 - Alpha:0.05 - Batch 5/2788 - Min Loss:64.81 - Loss:64.81780538122808
Iter:0 - Alpha:0.05 - Batch 6/2788 - Min Loss:64.59 - Loss:64.59206297117493
Iter:0 - Alpha:0.05 - Batch 7/2788 - Min Loss:64.21 - Loss:64.21087681480775
Iter:0 - Alpha:0.05 - Batch 8/2788 - Min Loss:63.38 - Loss:63.3836649986421
Iter:0 - Alpha:0.05 - Batch 9/2788 - Min Loss:61.87 - Loss:61.87748115624768
Iter:0 - Alpha:0.05 - Batch 10/2788 - Min Loss:59.30 - Loss:59.30033165407521
Iter:0 - Alpha:0.05 - Batch 11/2788 - Min Loss:56.62 - Loss:56.62692002039265
Iter:0 - Alpha:0.05 - Batch 12/2788 - Min Loss:53.64 - Loss:53.64444557804619
Iter:0 - Alpha:0.05 - Batch 13/2788 - Min Loss:51.78 - Loss:51.780488768462384
Iter:0 - Alpha:0.05 - Batch 14/2788 - Min Loss:51.76 - Loss:51.76064561154201
Iter:0 - Alpha:0.05 - Batch 15/2788 - Min Loss:50.75 - Loss:50.75290221773698
Iter:0 - Alpha:0.05 - Batch 16/2788 - Min Loss:49.03 - Loss:49.03450085728163
Iter:0 - Alpha:0.05 - Batch 17/2788 - Min Loss:47.77 - Loss:47.77659027936774
Iter:0 - Alpha:0.05 - Batch 18/2788 - Min Loss:47.76 - Loss:47.769863520076136
Iter:0 - Alpha:0.05 - Batch 19/2788 - Min Loss:46.88 - Loss:46.8847505554376
Iter:0 - Alpha:0.05 - Batch 20/2788 - Min Loss:45.48 - Loss:45.48606082600269
Iter:0 - Alpha:0.05 - Batch 22/2788 - Min Loss:44.50 - Loss:44.60101243058189
Iter:0 - Alpha:0.05 - Batch 23/2788 - Min Loss:43.69 - Loss:43.696149757572336
Iter:0 - Alpha:0.05 - Batch 24/2788 - Min Loss:42.92 - Loss:42.929997147269525
Iter:0 - Alpha:0.05 - Batch 25/2788 - Min Loss:42.70 - Loss:42.701717195302756
Iter:0 - Alpha:0.05 - Batch 26/2788 - Min Loss:42.21 - Loss:42.21611881004273
Iter:0 - Alpha:0.05 - Batch 27/2788 - Min Loss:41.38 - Loss:41.38657873810713
Iter:0 - Alpha:0.05 - Batch 28/2788 - Min Loss:41.24 - Loss:41.24750360990076
Iter:0 - Alpha:0.05 - Batch 29/2788 - Min Loss:40.68 - Loss:40.68153692034208
Iter:0 - Alpha:0.05 - Batch 30/2788 - Min Loss:40.03 - Loss:40.03476921197549
Iter:0 - Alpha:0.05 - Batch 31/2788 - Min Loss:39.54 - Loss:39.54761249019825
Iter:0 - Alpha:0.05 - Batch 32/2788 - Min Loss:39.36 - Loss:39.362854145263974
Iter:0 - Alpha:0.05 - Batch 33/2788 - Min Loss:38.98 - Loss:38.983527840348266
Iter:0 - Alpha:0.05 - Batch 34/2788 - Min Loss:38.86 - Loss:38.86414351625717
Iter:0 - Alpha:0.05 - Batch 35/2788 - Min Loss:38.36 - Loss:38.36348682993047
Iter:0 - Alpha:0.05 - Batch 36/2788 - Min Loss:38.00 - Loss:38.00899719388536
Iter:0 - Alpha:0.05 - Batch 37/2788 - Min Loss:37.66 - Loss:37.66470420694178
Iter:0 - Alpha:0.05 - Batch 38/2788 - Min Loss:37.58 - Loss:37.58118032354363
Iter:0 - Alpha:0.05 - Batch 39/2788 - Min Loss:37.24 - Loss:37.24981887831019
Iter:0 - Alpha:0.05 - Batch 40/2788 - Min Loss:36.97 - Loss:36.97107029041341
Iter:0 - Alpha:0.05 - Batch 41/2788 - Min Loss:36.96 - Loss:36.96131096412938
Iter:0 - Alpha:0.05 - Batch 42/2788 - Min Loss:36.76 - Loss:36.76969259672902
Iter:0 - Alpha:0.05 - Batch 43/2788 - Min Loss:36.52 - Loss:36.52076641120323
Iter:0 - Alpha:0.05 - Batch 44/2788 - Min Loss:36.42 - Loss:36.42610153555301
Iter:0 - Alpha:0.05 - Batch 45/2788 - Min Loss:36.23 - Loss:36.23380831357556
Iter:0 - Alpha:0.05 - Batch 46/2788 - Min Loss:36.04 - Loss:36.044056917178
Iter:0 - Alpha:0.05 - Batch 47/2788 - Min Loss:35.77 - Loss:35.77064092517939
Iter:0 - Alpha:0.05 - Batch 48/2788 - Min Loss:35.56 - Loss:35.56084237253776
Iter:0 - Alpha:0.05 - Batch 49/2788 - Min Loss:35.37 - Loss:35.37191870559778
Iter:0 - Alpha:0.05 - Batch 50/2788 - Min Loss:35.29 - Loss:35.291635003489695
Iter:0 - Alpha:0.05 - Batch 51/2788 - Min Loss:35.16 - Loss:35.16507128305489
Iter:0 - Alpha:0.05 - Batch 52/2788 - Min Loss:34.93 - Loss:34.932042536211426
Iter:0 - Alpha:0.05 - Batch 53/2788 - Min Loss:34.66 - Loss:34.66460440344218
Iter:0 - Alpha:0.05 - Batch 54/2788 - Min Loss:34.43 - Loss:34.43088580889503
Iter:0 - Alpha:0.05 - Batch 55/2788 - Min Loss:34.17 - Loss:34.17781863800815
Iter:0 - Alpha:0.05 - Batch 56/2788 - Min Loss:34.00 - Loss:34.00397143855258
Iter:0 - Alpha:0.05 - Batch 57/2788 - Min Loss:33.78 - Loss:33.78768619711431
Iter:0 - Alpha:0.05 - Batch 58/2788 - Min Loss:33.54 - Loss:33.540391102459814
Iter:0 - Alpha:0.05 - Batch 59/2788 - Min Loss:33.41 - Loss:33.41414268739367
Iter:0 - Alpha:0.05 - Batch 60/2788 - Min Loss:33.28 - Loss:33.287004981875725
Iter:0 - Alpha:0.05 - Batch 61/2788 - Min Loss:33.11 - Loss:33.119700025458904
Iter:0 - Alpha:0.05 - Batch 62/2788 - Min Loss:32.93 - Loss:32.93274327439954
Iter:0 - Alpha:0.05 - Batch 63/2788 - Min Loss:32.75 - Loss:32.7517268790668
Iter:0 - Alpha:0.05 - Batch 64/2788 - Min Loss:32.52 - Loss:32.52533806696676
Iter:0 - Alpha:0.05 - Batch 65/2788 - Min Loss:32.32 - Loss:32.32143594083988
Iter:0 - Alpha:0.05 - Batch 66/2788 - Min Loss:32.25 - Loss:32.25157933217092
Iter:0 - Alpha:0.05 - Batch 67/2788 - Min Loss:32.09 - Loss:32.09264863723831
Iter:0 - Alpha:0.05 - Batch 68/2788 - Min Loss:31.97 - Loss:31.97688730252501
Iter:0 - Alpha:0.05 - Batch 69/2788 - Min Loss:31.82 - Loss:31.82220380310978
Iter:0 - Alpha:0.05 - Batch 70/2788 - Min Loss:31.69 - Loss:31.690526212828654
Iter:0 - Alpha:0.05 - Batch 71/2788 - Min Loss:31.63 - Loss:31.632962228439784
Iter:0 - Alpha:0.05 - Batch 72/2788 - Min Loss:31.48 - Loss:31.486156544089567
Iter:0 - Alpha:0.05 - Batch 73/2788 - Min Loss:31.24 - Loss:31.24954104552075
Iter:0 - Alpha:0.05 - Batch 74/2788 - Min Loss:31.08 - Loss:31.084010394152997
Iter:0 - Alpha:0.05 - Batch 75/2788 - Min Loss:30.93 - Loss:30.930785630062335
Iter:0 - Alpha:0.05 - Batch 76/2788 - Min Loss:30.77 - Loss:30.770844418083385
Iter:0 - Alpha:0.05 - Batch 77/2788 - Min Loss:30.55 - Loss:30.553141536884446
Iter:0 - Alpha:0.05 - Batch 79/2788 - Min Loss:30.44 - Loss:30.446381601158148
Iter:0 - Alpha:0.05 - Batch 80/2788 - Min Loss:30.31 - Loss:30.313430659093218
Iter:0 - Alpha:0.05 - Batch 81/2788 - Min Loss:30.17 - Loss:30.17685423526397
Iter:0 - Alpha:0.05 - Batch 82/2788 - Min Loss:30.03 - Loss:30.034068418194238
Iter:0 - Alpha:0.05 - Batch 83/2788 - Min Loss:29.86 - Loss:29.869799763537227
Iter:0 - Alpha:0.05 - Batch 84/2788 - Min Loss:29.71 - Loss:29.71315410265161
Iter:0 - Alpha:0.05 - Batch 85/2788 - Min Loss:29.62 - Loss:29.626194150081712
Iter:0 - Alpha:0.05 - Batch 86/2788 - Min Loss:29.51 - Loss:29.51259555618696
Iter:0 - Alpha:0.05 - Batch 88/2788 - Min Loss:29.42 - Loss:29.42060495535658
Iter:0 - Alpha:0.05 - Batch 89/2788 - Min Loss:29.32 - Loss:29.32396757332214
Iter:0 - Alpha:0.05 - Batch 90/2788 - Min Loss:29.12 - Loss:29.127034538647223
Iter:0 - Alpha:0.05 - Batch 91/2788 - Min Loss:28.99 - Loss:28.99104613092588
Iter:0 - Alpha:0.05 - Batch 92/2788 - Min Loss:28.93 - Loss:28.93157053340792
Iter:0 - Alpha:0.05 - Batch 93/2788 - Min Loss:28.85 - Loss:28.850017228708708
Iter:0 - Alpha:0.05 - Batch 94/2788 - Min Loss:28.72 - Loss:28.72971036113448
Iter:0 - Alpha:0.05 - Batch 95/2788 - Min Loss:28.57 - Loss:28.572422584455435
Iter:0 - Alpha:0.05 - Batch 96/2788 - Min Loss:28.44 - Loss:28.444473832731003
Iter:0 - Alpha:0.05 - Batch 97/2788 - Min Loss:28.32 - Loss:28.329838082768866
Iter:0 - Alpha:0.05 - Batch 98/2788 - Min Loss:28.22 - Loss:28.227999934535212
Iter:0 - Alpha:0.05 - Batch 99/2788 - Min Loss:28.15 - Loss:28.156615920158814
Iter:0 - Alpha:0.05 - Batch 100/2788 - Min Loss:28.08 - Loss:28.084682372851002
Iter:1 - Alpha:0.049 - Batch 2788/2788 - Min Loss:7.869 - Loss:7.869545419657005
Iter:2 - Alpha:0.049 - Batch 2788/2788 - Min Loss:7.125 - Loss:7.133998443189006
Iter:3 - Alpha:0.048 - Batch 2788/2788 - Min Loss:6.584 - Loss:6.771658681459316
Iter:4 - Alpha:0.048 - Batch 2788/2788 - Min Loss:6.482 - Loss:6.5734656052792495The forther with my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my for the make my
进程已结束,退出代码为 0
'''
55、深度学习-自学之路-自己搭建深度学习框架-16、使用LSTM解决RNN梯度消失和梯度爆炸的问题,重写莎士比亚风格文章。
小宇爱2025-02-28 14:31
相关推荐
阿三0812几秒前
企业级AI办公落地实践:基于钉钉/飞书的标准产品解决方案sinat_2869451919 分钟前
一种简单的ReAct agent演示Cherry Xie29 分钟前
阿里开源正式开园文生视频、图生视频模型-通义万相 WanX2.1热爱编程的OP34 分钟前
第九章:多模态大语言模型zxfeng~43 分钟前
深度学习之“雅可比矩阵与黑塞矩阵”MuLogin_Browser1 小时前
矩阵营销的 AI 进化:DeepSeek 如何助力批量运营账号?AllYoung_3621 小时前
WebUI 部署 Ollama 可视化对话界面yuweififi1 小时前
pytorch基础-比较矩阵是否相等洛北辰南1 小时前
【论文学习】DeepSeek-V3 总结俊哥V1 小时前
[思考记录.AI]面对当下大模型,自己的一些感受