深度学习tensorflow的基本内容

Terson类型:

.int(整型) float(浮点型) double(双浮点型)

.bool(布尔型)

.string(浮点型)

创建Tensor的几种方法：

1、将numpy, list类型进行转化成tensor用convert_to_tensor

2、zeros, ones，, zeros_like, ones_like

3、fill ------> 填充

4、random -->normal创建一个正态分布的数据,truncated_normal截断一部分数据, uniform创建一个均匀分布的数据, shuffle随机打散

5、constant--> 创建一个常量

Terson常量的方法:

numpy(): 查看常量的数据， dtype: 查看常量的的类型， shape查看常量的形状， ndim, rank: 查看常量的维度

device: 当前tensor所在的设备的名字

判断是否是Tensor类型的方法: isinstance, is_tensor

把其他类型转化为tensor类型: convert_to_tensor

数据类型的转化: cast

将Tensor常量转化为能求导的Tensor : Variable --> trainable查看是否可求导， name

python 复制代码

import numpy as np
import tensorflow as tf
with tf.device("cpu"):
    a = tf.constant([1])  #在cpu环境下创建一个Tensor
a.device

with tf.device("gpu"):
    b = tf.constant([1])
b.device
aa = a.gpu 
bb = b.cpu

python 复制代码

# numpy
tf.convert_to_tensor(np.ones(shape = [3, 3]))
tf.convert_to_tensor(np.eye(3))
tf.convert_to_tensor(np.linspace(1, 10, 10))
tf.convert_to_tensor(np.random.randint(1, 10, size = (2, 4)))
# list
tf.convert_to_tensor([1, 2, 3, 4, 5, 6, 7])


# tf.ones(shape, dtype)
tf.ones(shape = (3, 4), dtype = tf.float32)
a = tf.constant([1, 2, 3, 4, 5, 6, 6])
tf.ones_like(a)  # 计算a的shape然后按照这个shape创建一个ones

# tf.zeros(shape, dtype)
tf.zeros(shape = (3, 4), dtype = tf.float32)
a = tf.constant([1, 2, 3, 4, 5, 6, 6])
tf.ones_like(a)  # 计算a的shape然后按照这个shape创建一个zeros

#tf.fill(dims, value)
tf.fill(dims = (3, 4), value = 3)

# tf.random.normal(shape, mean, stddev, dtype)
tf.random.normal(shape = (3, 4), mean = 0, stddev = 0.4, dtype = tf.float32)
# tf.random.uniform(shape, minval, maxval, dtype)
tf.random.uniform(shape = (3, 4), minval = 0, maxval = 10, dtype = tf.float32)
# tf.random.shuffle(value)
a = np.arange(10)
tf.random.shuffle(a)
# constant
tf.constant(1.)   #  <tf.Tensor: shape=(), dtype=float32, numpy=1.0>
tf.constant([True, False])  # <tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])>
tf.constant(2)  # <tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])>
tf.constant("hello world") # <tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])>

python 复制代码

a = tf.random.normal(shape = (4, 5), mean = 0, stddev = 0.3, dtype = tf.float32)

a.numpy(), a.dtype, a.shape, a.ndim, tf.rank(a)

isinstance(a, tf.Tensor), tf.is_tensor(a)

tf.cast(a, dtype = tf.float16)

b = tf.Variable(a, name = "input_data")

b.trainable, b.name

a.dtype == tf.float32  # 判断两个类型相同

python 复制代码

# loss的例子
out = tf.random.uniform([4, 10])
out = tf.nn.softmax(out)  # softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis, keepdims=True)

y = tf.range(4)
y = tf.one_hot(y, depth = 10)
loss = tf.keras.losses.MSE(y, out)   #  loss = mean(square(y_true - y_pred), axis=-1)
loss = tf.reduce_mean(loss)
loss

python 复制代码

"""
.Bias
    .[out_dim]
"""
net = tf.keras.layers.Dense(10)   # 全连接神经网络
net.build(input_shape = (4, 10))  # 建立网络输入数据
net.kernel  # 网络中参数w
net.bias    # 网络中偏置b

python 复制代码

"""
matrix(矩阵)
    .input x: [b, vec_dim] b： 多少张照片, vec_dim: 照片的长宽
    .weight:[input_dim, out_dim]   # w权值的
"""
x = tf.random.normal([4, 784])
net = tf.layers.Dense(10)
net.build(input_shape = [4, 784])

net(x).shape
net.kernel.shape, net.bias.shape

python 复制代码

"""
Dim = 3 Tensor
x: [b, seq_dim, word_dim]  # b:多少句子, seq_dim: 单词数量, word_dim: 编码长度
"""
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words = 10000)
# num_words 最多考虑的单词的数量 设置为10000表示只关注最常用的前10000个词汇，其他较少使用的忽略
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen = 80)   #maxlen每一个句子最长有多少单词，然后其他单词截断
emb = tf.keras.layers.Embedding(input_dim=10000, output_dim=100, input_length=80)
"""
input_dim=10000  表示输入数据中词汇表的大小，即总共有 10000 个不同的元素（例如 10000 个不同的单词）。
output_dim=100  表示每个输入元素被映射到的嵌入向量的维度大小，即生成的嵌入向量长度为 100。
input_length=80  表示输入序列的长度，即每个输入样本中包含的元素个数为 80。
"""
out = tf.keras.layers.SimpleRNNCell(units, dropout)  #  units 表示输出空间的维度，即隐藏层神经元的数量；

python 复制代码

"""
Dim = 4 Tensor
    .image [b, h, w, 3]
    .feature maps(特征图): [b, h, w, c]
"""
x = tf.random.normal([4, 32, 32, 3])
net = tf.keras.layers.Conv2D(filters, kernel_size, padding, activation)  #filters: 卷积核, kernel_size: 滤波矩阵， padding: 填充， activation 激活函数
net(x)  #调用这个网络

"""
Dim = 5 Tensor
.single task [b, h, w, 3]
.[task_b, b, h, w, 3]
"""

python 复制代码

# 索引
a = tf.random.truncated_normal(shape = [4, 32, 32, 3], mean = 0, stddev = 0.8)
a[0][0].shape, a[0][0][0].shape, a[0][0][0][2]
a[1, 2, 3, 2], a[1, 2].shape, a[1, 3, 4].shape
a[1, ..., 2].shape,

# 切片 start: end: step
a[-1:].shape, a[1:3, 0:14, 1:28:2, 0:2].shape, a[0, 1, :, :].shape
a[0:2, :, :, 2].shape
a[:, ::2, ::2, :].shape
a[::-1].shape

python 复制代码

"""
tf.gather(params, indices, axis)   根据提供的indices(索引)从axis(维度)收集元素
tf.gather_nd(params, indices)      利用indices 收集元素 
tf.boolean_mask(tensor, mask, axis=None)  利用mask(布尔值)从axis(维度)收集元素
"""
tf.gather(a, axis = 0, indices = [2, 3]).shape
tf.gather(a, axis = 0, indices = [2, 1, 3, 0]).shape
tf.gather(a, axis = 1, indices = [2, 3, 4, 5, 7, 9, 12]).shape

tf.gather_nd(a, indices = [0, 1]).shape
tf.gather_nd(a, indices = [0, 1, 2])
tf.gather_nd(a, indices = [[0, 0], [1, 1]]).shape
tf.gather_nd(a, indices = [[0, 0], [1, 1], [2, 2]]).shape
tf.gather_nd(a, indices = [[0, 0, 0], [1, 1, 1], [2, 2, 2]]).shape

tf.boolean_mask(a, mask = [True, True, False, False], axis = 0).shape
tf.boolean_mask(a, mask = [True, False, False], axis = 3).shape

python 复制代码

"""
维度变换:
.shape  形状
.ndim   维度
.reshape(tensor, shape)  改变形状
.expand_dims(input, axis)/squeeze(input, axis) 扩展/压缩维度
.transpose(a, perm)

Broadcasting(广播机制)
.broadcast_to 
.expand
.expand_dims
.without copying data
.tf.tile
"""
# shape, ndim
a1 = tf.random.truncated_normal(shape = (4, 28, 28, 3), mean = 0, stddev = 1)
a1.shape, a1.ndim

# reshape
tf.reshape(a1, shape = (4, 28 * 28, 3)).shape

#expand_dims/squeeze
tf.expand_dims(a1, axis=0).shape, 
a2 = tf.expand_dims(a1, axis = 4)
tf.squeeze(a2).shape

#transpose
tf.transpose(a1, perm = [0, 2, 3, 1])

#Broadcast
b = tf.ones(shape = (3, 3), dtype = tf.float32)
b = tf.expand_dims(b, axis = 2)
b = tf.tile(b, [1, 1, 3])
b.shape

tf.broadcast_to(tf.random.normal([4, 1, 1, 1]), [4, 32, 32, 3]).shape

x = tf.random.uniform([4, 32, 32, 3])
(x + tf.random.uniform([3])).shape
(x + tf.random.uniform([32, 32, 1])).shape
(x + tf.random.uniform([4, 1, 1, 1])).shape

python 复制代码

"""
数学运算
. +, -, *, / --> element_wise
. **, pow, square
.sqrt
.//, %
. exp, math.log 
. @, matmul
.layers-->Dense, Conv2D, MaxPooling2D, Embedding, SimpleRNNCell, LSTM
.losses--> MSE, categorical_crassentropy
dim_wise
.reduce_mean/max/min/sum/variance
"""
a = tf.random.normal(shape = (4, 3))
b = tf.random.normal(shape = (4, 3))
b - a, b + a, b * a, b / a, b ** a, tf.pow(a, 3), a ** 3, tf.square(a), tf.sqrt(a)
tf.exp(a), tf.math.log(a) # 以e为底
tf.math.log(8.) / tf.math.log(2.)  #loge^8/loge^2
tf.math.log(100.) / tf.math.log(10.) # loge^100/loge^10
tf.transpose(a) @ b

python 复制代码

"""
数学运算
. +, -, *, / --> element_wise
. **, pow, square
.sqrt
.//, %
. exp, math.log 
. @, matmul
.layers-->Dense, Conv2D, MaxPooling2D, Embedding, SimpleRNNCell, LSTM
.losses--> MSE, categorical_crassentropy
dim_wise
.reduce_mean/max/min/sum/variance
"""
a = tf.random.normal(shape = (4, 3))
b = tf.random.normal(shape = (4, 3))
b - a, b + a, b * a, b / a, b ** a, tf.pow(a, 3), a ** 3, tf.square(a), tf.sqrt(a)
tf.exp(a), tf.math.log(a) # 以e为底
tf.math.log(8.) / tf.math.log(2.)  #loge^8/loge^2
tf.math.log(100.) / tf.math.log(10.) # loge^100/loge^10
tf.transpose(a) @ b

python 复制代码

"""
.pad(tensor, paddings,  constant_values) paddings对左右上下进行填充[[上行, 下行], [左列, 右列]]   constant_values填充的数字
.expand_dims, tile
.broadcast_to
.where(condition, x, y)
.scatter_nd(indices, updates, shape)
.meshgrid
张量限幅：
tf.clip_by_value(t, clip_value_min, clip_value_max)
tf.nn.relu  max(0, a)
clip_by_norm(t, clip_norm)      (clip_norm * t) / ||t||
maximum
minimum
Gradient clipping : clip_by_global_norm(t_list, clip_norm)               t_list[i] * clip_norm / max(global_norm, clip_norm)
tf.one_hot
"""
a = tf.reshape(tf.range(9), shape = (3, 3))
a = tf.cast(tf.convert_to_tensor(a), dtype = tf.float32)
tf.pad(a, [[1, 0], [1, 1]], constant_values = 1)

aa = tf.expand_dims(a, axis = 0)
tf.tile(aa, [4, 1, 1])

tf.broadcast_to(a, [4, 3, 3])

tf.where(a > 4, 0, 1)

tf.maximum(a, 4)   # 小于四的全部用四代替  
tf.minimum(a, 4)   # 大于四的全部用四代替
tf.clip_by_value(a, 2, 6)   # 小于2用2代替大于6用6代替

tf.nn.relu(a)   # max(0, a)

tf.clip_by_norm(t = a, clip_norm = 5).numpy()

clipped_tensors, global_norm = tf.clip_by_global_norm(t_list = [tf.cast(tf.convert_to_tensor(tf.range(9)), dtype=tf.float32)], clip_norm=4)
clipped_tensors, global_norm

updates = tf.constant([9, 10, 11, 12])
indices = tf.constant([[4], [2], [1], [6]])
shape = tf.constant([8])
tf.scatter_nd(indices, updates, shape)

indices = tf.constant([[0], [2]])
updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]]])
shape = tf.constant([4, 4, 4])
tf.scatter_nd(indices, updates, shape)

x = tf.range(5)
y = tf.range(6)
points_x, points_y = tf.meshgrid(x, y)
points = tf.stack([points_x, points_y], axis = 2)
points

python 复制代码

"""
数据加载
outline:
    .keras.datasets
        .boston_housing.load_data()  波斯顿房价
        .mnist/fashion_mnist.load_data()   手写体/流行照片
        .cifar10/100.load_data()    模糊图像
        .imdb.load_data()   情感分类
.tf.data.Dataset.from_tensor_slices  将输入的张量按照指定的维度进行切片，并将每个切片作为数据集中的一个元素。
    .shuffle
    .map
    .batch
    .repeat
    1.  shuffle(10000) : 对数据集进行随机打乱，缓冲区大小为 10000。这有助于在训练过程中引入随机性，减少数据的顺序相关性。
    2.  map(proprecession) : 对数据集中的每个元素应用自定义的预处理函数  proprecession  ，以进行数据的预处理操作，例如数据增强、归一化等。
    3.  batch(128) : 将数据集分成大小为 128 的批次。这意味着模型在训练时每次处理 128 个样本。
    4.  repeat(2) : 重复整个数据集 2 次。这在训练时可以让数据集被多次遍历。
"""
# 数据预处理的方法
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
def proprecession(x, y):
    x = tf.cast((x - tf.reduce_min(x)) / (tf.reduce_max(x) - tf.reduce_min(x)), dtype = tf.float32)
    y = tf.cast(y, dtype = tf.int32)
    return x, y
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_db = train_db.shuffle(10000).map(proprecession).batch(128).repeat(2)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.shuffle(10000).map(proprecession).batch(128)

反向传播算法

python 复制代码

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets


(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
tf.reduce_max(x_train), tf.reduce_min(x_train), x_train.shape, x_test.shape, tf.reduce_max(x_test), tf.reduce_min(x_test)
# 预处理
x_train = tf.convert_to_tensor(x_train, dtype = tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype = tf.int32)
x_test = tf.convert_to_tensor(x_test, dtype = tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype = tf.int32)
x_train = (x_train - tf.reduce_min(x_train)) / (tf.reduce_max(x_train) - tf.reduce_min(x_train))  # 最大最小值归一化
x_test = (x_test - tf.reduce_mean(x_test)) / tf.sqrt(tf.math.reduce_variance(x_test))   #零均值归一化
# y_test = tf.one_hot(y_test, depth = 10)
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(128)

test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(10000).batch(128)
sample = next(iter(train_db))
print("batch:", sample[0].shape, sample[1].shape)
lr = 0.003
w1 = tf.Variable(tf.random.truncated_normal([784, 256], mean = 0, stddev = 0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], mean = 0, stddev = 0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], mean = 0, stddev = 0.1))
b3 = tf.Variable(tf.zeros([10]))
total_correct = 0
total_num = 0
for epoch in range(10):
    for step, (x, y) in enumerate(train_db):
        x = tf.reshape(x, [-1, 28*28])
        with tf.GradientTape() as tape:
            h1 = x @ w1 + tf.broadcast_to(b1, [x.shape[0], 256])    # [128, 256]  +  [128, 256]
            h1 = tf.nn.relu(h1)
            h2 = h1 @ w2 + b2
            h2 = tf.nn.relu(h2)
            out = h2 @ w3 + b3
            y_onehot = tf.one_hot(y, depth = 10)
            # mean(sum((y - out) ** 2))
            loss = tf.square(y_onehot - out)
            loss = tf.reduce_mean(loss)
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        w1.assign_sub(lr * grads[0])    # w1 - lr * grads[n]
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])
        if step % 100 == 0:
            print("epoch = ", epoch, "step = ", step, "loss:", float(loss))
    for step, (x, y) in enumerate(test_db):
        x = tf.reshape(x, [-1, 28 * 28])
        h1 = tf.nn.relu(x @ w1 + b1) 
        h2 = tf.nn.relu(h1 @ w2 + b2)
        out = h2 @ w3 + b3
        prob = tf.nn.softmax(out, axis = 1)
        prob = tf.argmax(prob, axis = 1)
        prob = tf.cast(prob, dtype = tf.int32)
        correct = tf.cast(tf.equal(prob, y), dtype = tf.int32)
        correct = tf.reduce_sum(correct)
        total_correct += int(correct)
        total_num += x.shape[0]
    acc = total_correct / total_num
    print("test acc:", acc)