机器学习入门实践
实现一个简单的CNN网络模型,做图片分类任务。
基础知识
误差损失函数
损失函数(loss function)就是用来度量模型的预测值f(x)与真实值Y的差异程度的运算函数,它是一个非负实值函数,通常使用L(Y, f(x))来表示,损失函数越小,模型的鲁棒性就越好。
当我们实现了一个神经网络,输入数据经过这个网络模型得到了输出数据,但是输出数据与真实值的偏差要如何去定义呢?可以通过损失函数来求得这个差距。 主流的误差函数有以下几种:
-
均方误差
<math xmlns="http://www.w3.org/1998/Math/MathML"> E = 1 2 ∑ i = 1 k ( y i − t ^ i ) 2 E = \frac{1}{2} \sum_{i=1}^{k} (y_i - \hat{t}_i)^2 </math>E=21∑i=1k(yi−t^i)2 -
交叉熵误差
<math xmlns="http://www.w3.org/1998/Math/MathML"> E = − ∑ i = 1 k t i log y i E = - \sum_{i=1}^{k} t_i \log y_i </math>E=−∑i=1ktilogyi
其中,Y 是模型输出,T是真实值。
求梯度
梯度(英语:gradient)是一种关于多元导数的概括。平常的一元(单变量)函数的导数是标量值函数,而多元函数的梯度是向量值函数。多元可微函数f在点P上的梯度,是以f在P上的偏导数为分量的向量。
有了前文所说的损失函数,通过损失函数,我们知道了输出数据与真实值的偏差。但是,我们要如何去调整网络中每一层的参数才能使得这个损失函数减小呢?针对某个特定的参数,我们要减小还是增大呢?
这时候针对损失函数求得各个方向上的导数,也就是梯度,再根据梯度逐步调整各参数的值。
正向(前向)传播
前向传播就是从input,经过一层层的layer,不断计算每一层的输出值和激活值,最后得到输出Y 的过程,计算出了Y,就可以根据它和真实值y的差别来计算损失(loss)。
反向传播
反向传播就是根据损失函数L(Y ,T)来反方向地计算每一层的输出、激活值、参数、偏置的偏导数(梯度),从而更新参数。
传统求导来得到梯度,再更新参数的方式虽然简单,但是计算量比较大,训练起来速度慢。通过记录正向传播时每一层的计算结果,再利用反向传播的方法来更新参数,大大减小了计算量,提高了训练速度。
机器学习workflow
机器学习的workflow如下:
- 准备训练集数据,测试集数据。
- 构建网络。
- 分批次输入训练集数据,经过正向传播,计算损失函数。
- 反向传播更新网络各层参数。
- 用测试集测试网络的精度。
- 重复2~5.
- 训练完成,保存模型(参数)。
实现CNN网络模型
数据集下载:www.kaggle.com/datasets/mb...
代码如下
load_data.py:获取数据集工具方法
python
import os
from PIL import Image
import numpy as np
img_file_path_train = "/home/cyx/document/archive/train_zip"
img_file_path_test = "/home/cyx/document/archive/test_zip"
image_shape = (100, 100)
# 加载数据集
def load_data(normalize=False, one_hot_label=False):
test_data, test_label = _load_data(img_file_path_test)
train_data, train_label = _load_data(img_file_path_train)
print("test_data.shape:", test_data.shape)
print("test_label.shape:", test_label.shape)
print("train_data.shape:", train_data.shape)
print("train_label.shape:", train_label.shape)
if one_hot_label:
test_label = _change_one_hot_label(test_label)
train_label = _change_one_hot_label(train_label)
if normalize:
test_data = test_data.astype("float32") / 255.0
train_data = train_data.astype("float32") / 255.0
return (train_data, train_label), (test_data, test_label)
# 从文件中加载数据集
def _load_data(file_path):
img_path = []
for file in _walk_dir(file_path):
if _is_image(file):
img_path.append(file)
data_list = []
label_list = []
for path in img_path:
data = _read_img(path)
# if not _valid_image(data):
# continue
data_list.append(data)
label_list.append(_read_label(path))
data_list = np.stack(data_list, axis=0)
label_list = np.stack(label_list, axis=0)
return data_list, label_list
def _read_img(file_name):
# 读取图片
image = Image.open(file_name)
# image = image.resize(image_shape, Image.Resampling.BILINEAR)
image = image.resize(image_shape)
image = image.convert("RGB")
# 将图片转换为numpy数组
image_array = np.array(image)
image_array = image_array.transpose(2, 0, 1)
# print(image_array.shape)
# 将三维数组展平为一维数组
# flat_image_array = image_array.flatten()
return image_array
# 遍历文件
def _walk_dir(file_path):
for root, dirs, files in os.walk(img_file_path_test):
for file in files:
yield root + "/" + file
# 判断是否是图片
def _is_image(file_name):
return "png" in file_name or "jpg" in file_name
def _read_label(file_name):
if "apple" in file_name:
return 0
if "banana" in file_name:
return 1
if "orange" in file_name:
return 2
if "mixed" in file_name:
return 3
def _change_one_hot_label(X):
T = np.zeros((X.size, 4))
for idx, row in enumerate(T):
row[X[idx]] = 1
return T
def _valid_image(data):
return data.shape[0] / image_shape[0] / image_shape[1] == 3.0
def read_img(file_name):
return _read_img(file_name)
layers.py:cnn、全连接层实现
python
# coding: utf-8
import numpy as np
class Relu:
def __init__(self):
self.mask = None
def forward(self, x):
self.mask = (x <= 0)
out = x.copy()
out[self.mask] = 0
return out
def backward(self, dout):
dout[self.mask] = 0
dx = dout
return dx
class Sigmoid:
def __init__(self):
self.out = None
def forward(self, x):
out = sigmoid(x)
self.out = out
return out
def backward(self, dout):
dx = dout * (1.0 - self.out) * self.out
return dx
class Affine:
def __init__(self, W, b):
self.W =W
self.b = b
self.x = None
self.original_x_shape = None
# 权重和偏置参数的导数
self.dW = None
self.db = None
def forward(self, x):
# 对应张量
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
out = np.dot(self.x, self.W) + self.b
return out
def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.x.T, dout)
self.db = np.sum(dout, axis=0)
dx = dx.reshape(*self.original_x_shape) # 还原输入数据的形状(对应张量)
return dx
class SoftmaxWithLoss:
def __init__(self):
self.loss = None
self.y = None # softmax的输出
self.t = None # 监督数据
def forward(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def backward(self, dout=1):
batch_size = self.t.shape[0]
if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况
dx = (self.y - self.t) / batch_size
else:
dx = self.y.copy()
dx[np.arange(batch_size), self.t] -= 1
dx = dx / batch_size
return dx
class Convolution:
def __init__(self, W, b, stride=1, pad=0):
self.W = W
self.b = b
self.stride = stride
self.pad = pad
# 中间数据(backward时使用)
self.x = None
self.col = None
self.col_W = None
# 权重和偏置参数的梯度
self.dW = None
self.db = None
def forward(self, x):
FN, C, FH, FW = self.W.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = self.W.reshape(FN, -1).T
out = np.dot(col, col_W) + self.b
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
self.x = x
self.col = col
self.col_W = col_W
return out
def backward(self, dout):
FN, C, FH, FW = self.W.shape
dout = dout.transpose(0,2,3,1).reshape(-1, FN)
self.db = np.sum(dout, axis=0)
self.dW = np.dot(self.col.T, dout)
self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
dcol = np.dot(dout, self.col_W.T)
dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
return dx
class Pooling:
def __init__(self, pool_h, pool_w, stride=1, pad=0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.x = None
self.arg_max = None
def forward(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
return out
def backward(self, dout):
dout = dout.transpose(0, 2, 3, 1)
pool_size = self.pool_h * self.pool_w
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
dmax = dmax.reshape(dout.shape + (pool_size,))
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
return dx
def identity_function(x):
return x
def step_function(x):
return np.array(x > 0, dtype=np.int)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_grad(x):
return (1.0 - sigmoid(x)) * sigmoid(x)
def relu(x):
return np.maximum(0, x)
def relu_grad(x):
grad = np.zeros(x)
grad[x>=0] = 1
return grad
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x) # 溢出对策
return np.exp(x) / np.sum(np.exp(x))
def mean_squared_error(y, t):
return 0.5 * np.sum((y-t)**2)
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
# 监督数据是one-hot-vector的情况下,转换为正确解标签的索引
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def softmax_loss(X, t):
y = softmax(X)
return cross_entropy_error(y, t)
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
"""
Parameters
----------
input_data : 由(数据量, 通道, 高, 长)的4维数组构成的输入数据
filter_h : 滤波器的高
filter_w : 滤波器的长
stride : 步幅
pad : 填充
Returns
-------
col : 2维数组
"""
N, C, H, W = input_data.shape
out_h = (H + 2*pad - filter_h)//stride + 1
out_w = (W + 2*pad - filter_w)//stride + 1
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
for y in range(filter_h):
y_max = y + stride*out_h
for x in range(filter_w):
x_max = x + stride*out_w
col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
return col
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
"""
Parameters
----------
col :
input_shape : 输入数据的形状(例:(10, 1, 28, 28))
filter_h :
filter_w
stride
pad
Returns
-------
"""
N, C, H, W = input_shape
out_h = (H + 2*pad - filter_h)//stride + 1
out_w = (W + 2*pad - filter_w)//stride + 1
col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
for y in range(filter_h):
y_max = y + stride*out_h
for x in range(filter_w):
x_max = x + stride*out_w
img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]
return img[:, :, pad:H + pad, pad:W + pad]
def _numerical_gradient_1d(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 还原值
return grad
def numerical_gradient_2d(f, X):
if X.ndim == 1:
return _numerical_gradient_1d(f, X)
else:
grad = np.zeros_like(X)
for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_1d(f, x)
return grad
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 还原值
it.iternext()
return grad
train.py:训练方法
python
# coding: utf-8
import numpy as np
class Trainer:
"""进行神经网络的训练的类
"""
def __init__(self, network, x_train, t_train, x_test, t_test,
epochs=20, mini_batch_size=100,
optimizer='SGD', optimizer_param={'lr':0.01},
evaluate_sample_num_per_epoch=None, verbose=True):
self.network = network
self.verbose = verbose
self.x_train = x_train
self.t_train = t_train
self.x_test = x_test
self.t_test = t_test
self.epochs = epochs
self.batch_size = mini_batch_size
self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch
# optimzer
optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam}
self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
self.train_size = x_train.shape[0]
self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
self.max_iter = int(epochs * self.iter_per_epoch)
self.current_iter = 0
self.current_epoch = 0
self.train_loss_list = []
self.train_acc_list = []
self.test_acc_list = []
def train_step(self):
batch_mask = np.random.choice(self.train_size, self.batch_size)
x_batch = self.x_train[batch_mask]
t_batch = self.t_train[batch_mask]
grads = self.network.gradient(x_batch, t_batch)
self.optimizer.update(self.network.params, grads)
loss = self.network.loss(x_batch, t_batch)
self.train_loss_list.append(loss)
if self.verbose: print("train loss:" + str(loss))
if self.current_iter % self.iter_per_epoch == 0:
self.current_epoch += 1
x_train_sample, t_train_sample = self.x_train, self.t_train
x_test_sample, t_test_sample = self.x_test, self.t_test
if not self.evaluate_sample_num_per_epoch is None:
t = self.evaluate_sample_num_per_epoch
x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
train_acc = self.network.accuracy(x_train_sample, t_train_sample)
test_acc = self.network.accuracy(x_test_sample, t_test_sample)
self.train_acc_list.append(train_acc)
self.test_acc_list.append(test_acc)
if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
self.current_iter += 1
def train(self):
for i in range(self.max_iter):
self.train_step()
test_acc = self.network.accuracy(self.x_test, self.t_test)
if self.verbose:
print("=============== Final Test Accuracy ===============")
print("test acc:" + str(test_acc))
my_network.py: 网络实现
python
# 分类水果
from load_data import load_data, read_img
import os, sys
import numpy as np
from layers import *
from collections import OrderedDict
from trainer import Trainer
import pickle
image_shape = (100, 100)
input_size = image_shape[0] * image_shape[1] * 3
class SimpleConvNet:
"""简单的ConvNet
conv - relu - pool - affine - relu - affine - softmax
Parameters
----------
input_size : 输入大小(MNIST的情况下为784)
hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
output_size : 输出大小(MNIST的情况下为10)
activation : 'relu' or 'sigmoid'
weight_init_std : 指定权重的标准差(e.g. 0.01)
指定'relu'或'he'的情况下设定"He的初始值"
指定'sigmoid'或'xavier'的情况下设定"Xavier的初始值"
"""
def __init__(
self,
input_dim=(1, 28, 28),
conv_param={"filter_num": 30, "filter_size": 5, "pad": 0, "stride": 1},
hidden_size=100,
output_size=10,
weight_init_std=0.01,
):
filter_num = conv_param["filter_num"]
filter_size = conv_param["filter_size"]
filter_pad = conv_param["pad"]
filter_stride = conv_param["stride"]
input_size = input_dim[1]
conv_output_size = (
input_size - filter_size + 2 * filter_pad
) / filter_stride + 1
pool_output_size = int(
filter_num * (conv_output_size / 2) * (conv_output_size / 2)
)
# 初始化权重
self.params = {}
self.params["W1"] = weight_init_std * np.random.randn(
filter_num, input_dim[0], filter_size, filter_size
)
self.params["b1"] = np.zeros(filter_num)
self.params["W2"] = weight_init_std * np.random.randn(
pool_output_size, hidden_size
)
self.params["b2"] = np.zeros(hidden_size)
self.params["W3"] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params["b3"] = np.zeros(output_size)
# 生成层
self.layers = OrderedDict()
self.layers["Conv1"] = Convolution(
self.params["W1"],
self.params["b1"],
conv_param["stride"],
conv_param["pad"],
)
self.layers["Relu1"] = Relu()
self.layers["Pool1"] = Pooling(pool_h=2, pool_w=2, stride=2)
self.layers["Affine1"] = Affine(self.params["W2"], self.params["b2"])
self.layers["Relu2"] = Relu()
self.layers["Affine2"] = Affine(self.params["W3"], self.params["b3"])
self.last_layer = SoftmaxWithLoss()
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def loss(self, x, t):
"""求损失函数
参数x是输入数据、t是教师标签
"""
y = self.predict(x)
return self.last_layer.forward(y, t)
def accuracy(self, x, t, batch_size=10):
if t.ndim != 1:
t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i * batch_size : (i + 1) * batch_size]
tt = t[i * batch_size : (i + 1) * batch_size]
y = self.predict(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def numerical_gradient(self, x, t):
"""求梯度(数值微分)
Parameters
----------
x : 输入数据
t : 教师标签
Returns
-------
具有各层的梯度的字典变量
grads['W1']、grads['W2']、...是各层的权重
grads['b1']、grads['b2']、...是各层的偏置
"""
loss_w = lambda w: self.loss(x, t)
grads = {}
for idx in (1, 2, 3):
grads["W" + str(idx)] = numerical_gradient(
loss_w, self.params["W" + str(idx)]
)
grads["b" + str(idx)] = numerical_gradient(
loss_w, self.params["b" + str(idx)]
)
return grads
def gradient(self, x, t):
"""求梯度(误差反向传播法)
Parameters
----------
x : 输入数据
t : 教师标签
Returns
-------
具有各层的梯度的字典变量
grads['W1']、grads['W2']、...是各层的权重
grads['b1']、grads['b2']、...是各层的偏置
"""
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
# 设定
grads = {}
grads["W1"], grads["b1"] = self.layers["Conv1"].dW, self.layers["Conv1"].db
grads["W2"], grads["b2"] = self.layers["Affine1"].dW, self.layers["Affine1"].db
grads["W3"], grads["b3"] = self.layers["Affine2"].dW, self.layers["Affine2"].db
return grads
def save_params(self, file_name="params.pkl"):
params = {}
for key, val in self.params.items():
params[key] = val
with open(file_name, "wb") as f:
pickle.dump(params, f)
def load_params(self, file_name="params.pkl"):
with open(file_name, "rb") as f:
params = pickle.load(f)
for key, val in params.items():
self.params[key] = val
for i, key in enumerate(["Conv1", "Affine1", "Affine2"]):
self.layers[key].W = self.params["W" + str(i + 1)]
self.layers[key].b = self.params["b" + str(i + 1)]
class SimpleConvNet:
"""简单的ConvNet
conv - relu - pool - affine - relu - affine - softmax
Parameters
----------
input_size : 输入大小(MNIST的情况下为784)
hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
output_size : 输出大小(MNIST的情况下为10)
activation : 'relu' or 'sigmoid'
weight_init_std : 指定权重的标准差(e.g. 0.01)
指定'relu'或'he'的情况下设定"He的初始值"
指定'sigmoid'或'xavier'的情况下设定"Xavier的初始值"
"""
def __init__(
self,
input_dim=(1, 28, 28),
conv_param={"filter_num": 30, "filter_size": 5, "pad": 0, "stride": 1},
hidden_size=100,
output_size=10,
weight_init_std=0.01,
):
filter_num = conv_param["filter_num"]
filter_size = conv_param["filter_size"]
filter_pad = conv_param["pad"]
filter_stride = conv_param["stride"]
input_size = input_dim[1]
conv_output_size = (
input_size - filter_size + 2 * filter_pad
) / filter_stride + 1
pool_output_size = int(
filter_num * (conv_output_size / 2) * (conv_output_size / 2)
)
# 初始化权重
self.params = {}
self.params["W1"] = weight_init_std * np.random.randn(
filter_num, input_dim[0], filter_size, filter_size
)
self.params["b1"] = np.zeros(filter_num)
self.params["W2"] = weight_init_std * np.random.randn(
pool_output_size, hidden_size
)
self.params["b2"] = np.zeros(hidden_size)
self.params["W3"] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params["b3"] = np.zeros(output_size)
# 生成层
self.layers = OrderedDict()
self.layers["Conv1"] = Convolution(
self.params["W1"],
self.params["b1"],
conv_param["stride"],
conv_param["pad"],
)
self.layers["Relu1"] = Relu()
self.layers["Pool1"] = Pooling(pool_h=2, pool_w=2, stride=2)
self.layers["Affine1"] = Affine(self.params["W2"], self.params["b2"])
self.layers["Relu2"] = Relu()
self.layers["Affine2"] = Affine(self.params["W3"], self.params["b3"])
self.last_layer = SoftmaxWithLoss()
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def loss(self, x, t):
"""求损失函数
参数x是输入数据、t是教师标签
"""
y = self.predict(x)
return self.last_layer.forward(y, t)
def accuracy(self, x, t, batch_size=10):
if t.ndim != 1:
t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i * batch_size : (i + 1) * batch_size]
tt = t[i * batch_size : (i + 1) * batch_size]
y = self.predict(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def numerical_gradient(self, x, t):
"""求梯度(数值微分)
Parameters
----------
x : 输入数据
t : 教师标签
Returns
-------
具有各层的梯度的字典变量
grads['W1']、grads['W2']、...是各层的权重
grads['b1']、grads['b2']、...是各层的偏置
"""
loss_w = lambda w: self.loss(x, t)
grads = {}
for idx in (1, 2, 3):
grads["W" + str(idx)] = numerical_gradient(
loss_w, self.params["W" + str(idx)]
)
grads["b" + str(idx)] = numerical_gradient(
loss_w, self.params["b" + str(idx)]
)
return grads
def gradient(self, x, t):
"""求梯度(误差反向传播法)
Parameters
----------
x : 输入数据
t : 教师标签
Returns
-------
具有各层的梯度的字典变量
grads['W1']、grads['W2']、...是各层的权重
grads['b1']、grads['b2']、...是各层的偏置
"""
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
# 设定
grads = {}
grads["W1"], grads["b1"] = self.layers["Conv1"].dW, self.layers["Conv1"].db
grads["W2"], grads["b2"] = self.layers["Affine1"].dW, self.layers["Affine1"].db
grads["W3"], grads["b3"] = self.layers["Affine2"].dW, self.layers["Affine2"].db
return grads
def save_params(self, file_name="params.pkl"):
params = {}
for key, val in self.params.items():
params[key] = val
with open(file_name, "wb") as f:
pickle.dump(params, f)
def load_params(self, file_name="params.pkl"):
with open(file_name, "rb") as f:
params = pickle.load(f)
for key, val in params.items():
self.params[key] = val
for i, key in enumerate(["Conv1", "Affine1", "Affine2"]):
self.layers[key].W = self.params["W" + str(i + 1)]
self.layers[key].b = self.params["b" + str(i + 1)]
def train():
(x_train, t_train), (x_test, t_test) = load_data(one_hot_label=True, normalize=True)
input_dim = x_train[0].shape
conv_param = {
"filter_num": 30,
"filter_size": 15,
"pad": 0,
"stride": 1,
}
hidden_size = 100
output_size = 4
network = SimpleConvNet(
input_dim=input_dim,
conv_param=conv_param,
hidden_size=hidden_size,
output_size=output_size,
)
max_epochs = 20
trainer = Trainer(
network,
x_train,
t_train,
x_test,
t_test,
epochs=max_epochs,
mini_batch_size=5,
optimizer="Adam",
optimizer_param={"lr": 0.001},
evaluate_sample_num_per_epoch=20,
)
trainer.train()
network.save_params("params.pkl")
print("Save model.")
def result_to_label(result):
idx = np.argmax(result)
if idx == 0:
return "apple"
if idx == 1:
return "banana"
if idx == 2:
return "orange"
if idx == 3:
return "mixed"
def predict():
# 选一张图片
image_path = "./mixed.jpeg"
# 初始化network
network = SimpleConvNet()
network.load_params("params.pkl")
# 准备数据
x = read_img(image_path)
x = x.reshape(1, 3, 100, 100)
y = network.predict(x)
print(result_to_label(y))
if __name__ == "__main__":
# train()
predict()
借助Pytorch实现网络模型
实现一个与上文一样的网络结构,但用pytorch来实现。
代码如下:
pytorch_network.py
python
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch import nn
from load_data import load_data, read_img
device = (
"cuda"
if torch.cuda.is_available()
else "cpu"
)
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Sequential( # 输入(3, 100, 100)
nn.Conv2d(
in_channels=3,
out_channels=30,
kernel_size=5,
padding=0,
stride=1,
), # 经过卷积层(30, 96, 96)
nn.ReLU(),
nn.MaxPool2d(kernel_size=2), # 经过池化层(30, 48, 48)
)
self.out = nn.Linear(30*48*48, 4)
def forward(self, x):
x = self.conv(x)
x = x.view(x.size(0), -1)
logits = self.out(x)
return logits
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
loss.backward() # 这一步会通过方向传播计算出每一层的梯度
optimizer.step()
optimizer.zero_grad()
# if batch % 100 == 0:
loss, current = loss.item(), (batch + 1) * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
def optimize():
(x_train, t_train), (x_test, t_test) = load_data(one_hot_label=False, normalize=True)
x_train_tensor, t_train_tensor, x_test_tensor, t_test_tensor = torch.tensor(x_train), torch.tensor(t_train), torch.tensor(x_test), torch.tensor(t_test)
train_data = TensorDataset(x_train_tensor, t_train_tensor)
test_data = TensorDataset(x_test_tensor, t_test_tensor)
# create dataloader
batch_size= 5
train_dataloader = DataLoader(train_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
# create model
model = NeuralNetwork().to(device)
print(model)
# optmize
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
epochs = 50
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")
# save model
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
def predict():
classes = [
"apple",
"banana",
"orange",
"mixed",
]
# 选一张图片
image_path = "./orange.jpeg"
# 初始化network
model = NeuralNetwork().to(device=device)
model.load_state_dict(torch.load("model.pth"))
# 准备数据
x = read_img(image_path)
x = x.astype("float32") / 255.0
x = x.reshape(1, 3, 100, 100)
x = torch.Tensor(x)
# 推理
model.eval()
with torch.no_grad():
x = x.to(device)
pred = model(x)
predicted = classes[pred[0].argmax(0)]
print(f"predicted: {predicted}")
if __name__ == "__main__":
# optimize()
predict()
推荐书籍
《深度学习入门:基于Python的理论与实践》