张量的可变和不可变

python 复制代码

import numpy as np
import tensorflow as tf

if __name__ == '__main__':
    x = tf.ones(shape=(2, 1))
    x1 = np.ones(shape=(2, 1))
    # 全0的张量
    y = tf.zeros(shape=(2, 1))
    # x 和 x1的写法其实没区别
    print(x)
    print(x1)
    # 唯一的区别是 tf不可赋值，np可以，np是数组
    # 下面这行代码会报错 TypeError: 'tensorflow.python.framework.ops.EagerTensor' object does not support item assignment
    # x[0, 0] = 2
    # np定义的 x1 却可以赋值
    x1[0, 0] = 2
    print(x1)
    print("-------------------------------")
    # 模型要更新状态 不是要更新张量吗，那怎么改变值？
    # 创建一个ts变量即可
    v = tf.Variable(initial_value=tf.random.normal(shape=(3, 1)))
    print(v)
    # 用assign 赋值
    v.assign(tf.ones(shape=(3, 1)))
    print(v)
    # 改变其中某一个元素也是可以的
    v[0, 0].assign(3.)
    print(v)

实现一个线性分类器

可以看这个b站视频了解基本概念

对于我们这个例子来说，其实就是随机生成2个点云。

我们可以先准备一组数据

ini 复制代码

import numpy as np
from matplotlib import pyplot as plt
import numpy as np

if __name__ == '__main__':
    num_cnt = 1000
    # 生成1000个随机点
    # cov代表协方差矩阵
    # 其实就是一个 行数为num_cnt 列数为 2 的矩阵 每一行既然是2列 2个值 其实就是代表的点的坐标
    negative_samples = np.random.multivariate_normal(
        mean=[0, 3],
        cov=[[1, 0.5], [0.5, 1]],
        size=num_cnt
    )
    print("negative_samples:", negative_samples)
    # 这个和上面的不太一样， 主要是均值不同了
    positive_samples = np.random.multivariate_normal(
        mean=[3, 0],
        cov=[[1, 0.5], [0.5, 1]],
        size=num_cnt
    )
    print("positive_samples:", positive_samples)
    # 将2个类别堆叠成一个形状为 2000，2 的数组
    inputs = np.vstack((negative_samples, positive_samples)).astype(np.float32)
    print("inputs:", inputs)
    # 生成对应的目标标签 形状是2000，1  元素要么是1 要么是0
    # inputs[i] 为类别0，则 目标targets[i,0]为0
    # inputs[i] 为类别1，则 目标targets[i,0]为1
    targets = np.vstack((np.zeros((num_cnt, 1), dtype=np.float32), np.ones((num_cnt, 1), dtype=np.float32)))
    print("targets:", targets)


    plt.scatter(inputs[:, 0], inputs[:, 1], c=targets[:, 0])
    plt.show()

剩下的就是实现分类器了

python 复制代码

import tensorflow as tf


# 均方误差损失函数
def square_loss(targets, predictions):
    sample_loss = tf.square(targets - predictions)
    return tf.reduce_mean(sample_loss)


class TrainClass:

    def __init__(self):
        input_dim = 2
        output_dim = 1
        self.W = tf.Variable(initial_value=tf.random.uniform(shape=[input_dim, output_dim]))
        self.b = tf.Variable(initial_value=tf.zeros(shape=[output_dim]))

    # 前向传播函数
    def model(self, inputs):
        return tf.matmul(inputs, self.W) + self.b

    # 完整的训练步骤
    def training_step(self, inputs, targets):
        learning_rate = 0.1
        with tf.GradientTape() as tape:
            predictions = self.model(inputs)
            loss = square_loss(targets, predictions)
        grad_loss_w, grad_loss_b = tape.gradient(loss, [self.W, self.b])
        # 更新权重
        self.W.assign_sub(grad_loss_w * learning_rate)
        self.b.assign_sub(grad_loss_b * learning_rate)
        return loss

然后验证一下效果：

python 复制代码

trc = TrainClass()
for step in range(40):
    loss = trc.training_step(inputs, targets)
    print(f"Loss at step {step} : {loss:.4f}")
# 模型的参数确定以后  就可以直接预测了
pred = trc.model(inputs)
# 由于目标值是0和1 所以按照0.5 为分界线 归为对应的类别
plt.scatter(inputs[:, 0], inputs[:, 1], c=pred[:, 0] > 0.5)
plt.show()

可以看到模型的损失值会慢慢趋于稳定

可以看下我们预测出来的点图和实际的点图是不是差别很小。还是很有意思的

利用tensorflow 来实现 minst的图像预测任务

python 复制代码

import math

import numpy as np
import tensorflow as tf
from keras.src.datasets import mnist


class Dense:
    # 这里的w和b 都是模型参数
    def __init__(self, input_size, output_size, activation):
        self.activation = activation
        w_shape = [input_size, output_size]
        w_initializer = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
        self.w = tf.Variable(w_initializer)

        b_shape = (output_size,)
        b_initializer = tf.zeros(b_shape)
        self.b = tf.Variable(b_initializer)

    # 前向传播
    def __call__(self, x):
        return self.activation(tf.matmul(x, self.w) + self.b)

    # 获取该层权重的便捷方法
    @property
    def weights(self):
        return [self.w, self.b]


# 这个类 就是层列表
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    # 供外部调用的 这个方法按顺序调用输入的层
    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x

    # 记录改层的参数
    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights


# 批量生成器
class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / self.batch_size)

    def next(self):
        images = self.images[self.index:self.index + self.batch_size]
        labels = self.labels[self.index:self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels


# 更新权重
def update_weights(grads, weights):
    # optimizer = optimizers.SGD(learning_rate=1e-3)
    # 1e-3 其实就是 0.001 一般就是学习率了
    optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=1e-3)
    optimizer.apply_gradients(zip(grads, weights))


# 完成一次训练步骤 最关键的其实是这一步
def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        # 计算模型对图像的预测值
        predictions = model(images_batch)
        # 根据实际标签，来计算这些预测值的损失值
        per_sample_loss = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_loss)
    # 计算损失相对于权重的梯度，输入的grad本质上是一个列表，每个元素对应 model.weights中的权重
    grad = tape.gradient(average_loss, model.weights)
    # 更新权重
    update_weights(grad, model.weights)
    return average_loss


# 完成的训练循环
def fit_model(model, images, labels, epochs, batch_size=128):
    for epoch in range(epochs):
        batch_generator = BatchGenerator(images, labels, batch_size)
        for batch_counter in range(batch_generator.num_batches):
            images, labels = batch_generator.next()
            loss = one_training_step(model, images, labels)
            if batch_counter % 100 == 0:
                print(f"loss at batch {batch_counter} : {loss:.2f}")


if __name__ == '__main__':
    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
    train_images = train_images.reshape((60000, 28 * 28))
    train_images = train_images.astype('float32') / 255
    test_images = test_images.reshape((10000, 28 * 28))
    test_images = test_images.astype('float32') / 255
    # 这个其实就是创建了一个与Keras类似的模型
    model = NaiveSequential([
        Dense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
        Dense(input_size=512, output_size=10, activation=tf.nn.softmax)
    ])
    fit_model(model, train_images, train_labels, epochs=10)

    # 验证效果
    pred = model(test_images)
    pred = pred.numpy()
    pred_labels = np.argmax(pred, axis=1)
    matches = pred_labels == test_labels
    print(f"accuracy:{matches.mean():.2f}")

Keras中的基类

python 复制代码

# 实现Layer的子类
class SimpleDense(keras.layers.Layer):
    def __init__(self, units, activation=None):
        super().__init__()
        # 神经元数量
        self.units = units
        # 表示层的激活函数 是啥
        self.activation = activation

    # 在build方法中创建权重
    def build(self, input_shape):
        input_dim = input_shape[-1]
        # add_weight 是创建权重的便捷方法
        # 当然也可以通过 tf.Variable  来创建
        self.W = self.add_weight(shape=(input_dim, self.units), initializer="random_normal")
        self.b = self.add_weight(shape=(self.units,), initializer="zeros")

    # call方法中 定义向前传播
    # 一定要切记 在实现自己的层时，前向传播方法一定要放在call中实现
    def call(self, inputs):
        y = tf.matmul(inputs, self.W) + self.b
        if self.activation is not None:
            y = self.activation(y)
        return y

总结

Keras的核心类是Layer，负责封装权重和一些计算，layer会组成模型
model.compile 最重要的是负责选择优化器，损失函数和指标
fit方法负责运行小批量梯度下降
训练好模型以后可以用predict方法对新的输入进行预测

《python深度学习》读书笔记(2) - Keras和TensorFlow 入门

张量的可变和不可变

实现一个 线性分类器

利用tensorflow 来实现 minst的图像预测任务

Keras中的基类

总结

实现一个线性分类器