Terson类型:
.int(整型) float(浮点型) double(双浮点型)
.bool(布尔型)
.string(浮点型)
创建Tensor的几种方法:
1、将numpy, list类型进行转化成tensor用convert_to_tensor
2、zeros, ones,, zeros_like, ones_like
3、fill ------> 填充
4、random -->normal创建一个正态分布的数据,truncated_normal截断一部分数据, uniform创建一个均匀分布的数据, shuffle随机打散
5、constant--> 创建一个常量
Terson常量的方法:
numpy(): 查看常量的数据, dtype: 查看常量的的类型, shape查看常量的形状, ndim, rank: 查看常量的维度
device: 当前tensor所在的设备的名字
判断是否是Tensor类型的方法: isinstance, is_tensor
把其他类型转化为tensor类型: convert_to_tensor
数据类型的转化: cast
将Tensor常量转化为能求导的Tensor : Variable --> trainable查看是否可求导, name
python
import numpy as np
import tensorflow as tf
with tf.device("cpu"):
a = tf.constant([1]) #在cpu环境下创建一个Tensor
a.device
with tf.device("gpu"):
b = tf.constant([1])
b.device
aa = a.gpu
bb = b.cpu
python
# numpy
tf.convert_to_tensor(np.ones(shape = [3, 3]))
tf.convert_to_tensor(np.eye(3))
tf.convert_to_tensor(np.linspace(1, 10, 10))
tf.convert_to_tensor(np.random.randint(1, 10, size = (2, 4)))
# list
tf.convert_to_tensor([1, 2, 3, 4, 5, 6, 7])
# tf.ones(shape, dtype)
tf.ones(shape = (3, 4), dtype = tf.float32)
a = tf.constant([1, 2, 3, 4, 5, 6, 6])
tf.ones_like(a) # 计算a的shape然后按照这个shape创建一个ones
# tf.zeros(shape, dtype)
tf.zeros(shape = (3, 4), dtype = tf.float32)
a = tf.constant([1, 2, 3, 4, 5, 6, 6])
tf.ones_like(a) # 计算a的shape然后按照这个shape创建一个zeros
#tf.fill(dims, value)
tf.fill(dims = (3, 4), value = 3)
# tf.random.normal(shape, mean, stddev, dtype)
tf.random.normal(shape = (3, 4), mean = 0, stddev = 0.4, dtype = tf.float32)
# tf.random.uniform(shape, minval, maxval, dtype)
tf.random.uniform(shape = (3, 4), minval = 0, maxval = 10, dtype = tf.float32)
# tf.random.shuffle(value)
a = np.arange(10)
tf.random.shuffle(a)
# constant
tf.constant(1.) # <tf.Tensor: shape=(), dtype=float32, numpy=1.0>
tf.constant([True, False]) # <tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])>
tf.constant(2) # <tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])>
tf.constant("hello world") # <tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])>
python
a = tf.random.normal(shape = (4, 5), mean = 0, stddev = 0.3, dtype = tf.float32)
a.numpy(), a.dtype, a.shape, a.ndim, tf.rank(a)
isinstance(a, tf.Tensor), tf.is_tensor(a)
tf.cast(a, dtype = tf.float16)
b = tf.Variable(a, name = "input_data")
b.trainable, b.name
a.dtype == tf.float32 # 判断两个类型相同
python
# loss的例子
out = tf.random.uniform([4, 10])
out = tf.nn.softmax(out) # softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis, keepdims=True)
y = tf.range(4)
y = tf.one_hot(y, depth = 10)
loss = tf.keras.losses.MSE(y, out) # loss = mean(square(y_true - y_pred), axis=-1)
loss = tf.reduce_mean(loss)
loss
python
"""
.Bias
.[out_dim]
"""
net = tf.keras.layers.Dense(10) # 全连接神经网络
net.build(input_shape = (4, 10)) # 建立网络输入数据
net.kernel # 网络中参数w
net.bias # 网络中偏置b
python
"""
matrix(矩阵)
.input x: [b, vec_dim] b: 多少张照片, vec_dim: 照片的长宽
.weight:[input_dim, out_dim] # w权值的
"""
x = tf.random.normal([4, 784])
net = tf.layers.Dense(10)
net.build(input_shape = [4, 784])
net(x).shape
net.kernel.shape, net.bias.shape
python
"""
Dim = 3 Tensor
x: [b, seq_dim, word_dim] # b:多少句子, seq_dim: 单词数量, word_dim: 编码长度
"""
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words = 10000)
# num_words 最多考虑的单词的数量 设置为10000表示只关注最常用的前10000个词汇,其他较少使用的忽略
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen = 80) #maxlen每一个句子最长有多少单词,然后其他单词截断
emb = tf.keras.layers.Embedding(input_dim=10000, output_dim=100, input_length=80)
"""
input_dim=10000 表示输入数据中词汇表的大小,即总共有 10000 个不同的元素(例如 10000 个不同的单词)。
output_dim=100 表示每个输入元素被映射到的嵌入向量的维度大小,即生成的嵌入向量长度为 100。
input_length=80 表示输入序列的长度,即每个输入样本中包含的元素个数为 80。
"""
out = tf.keras.layers.SimpleRNNCell(units, dropout) # units 表示输出空间的维度,即隐藏层神经元的数量;
python
"""
Dim = 4 Tensor
.image [b, h, w, 3]
.feature maps(特征图): [b, h, w, c]
"""
x = tf.random.normal([4, 32, 32, 3])
net = tf.keras.layers.Conv2D(filters, kernel_size, padding, activation) #filters: 卷积核, kernel_size: 滤波矩阵, padding: 填充, activation 激活函数
net(x) #调用这个网络
"""
Dim = 5 Tensor
.single task [b, h, w, 3]
.[task_b, b, h, w, 3]
"""
python
# 索引
a = tf.random.truncated_normal(shape = [4, 32, 32, 3], mean = 0, stddev = 0.8)
a[0][0].shape, a[0][0][0].shape, a[0][0][0][2]
a[1, 2, 3, 2], a[1, 2].shape, a[1, 3, 4].shape
a[1, ..., 2].shape,
# 切片 start: end: step
a[-1:].shape, a[1:3, 0:14, 1:28:2, 0:2].shape, a[0, 1, :, :].shape
a[0:2, :, :, 2].shape
a[:, ::2, ::2, :].shape
a[::-1].shape
python
"""
tf.gather(params, indices, axis) 根据提供的indices(索引)从axis(维度)收集元素
tf.gather_nd(params, indices) 利用indices 收集元素
tf.boolean_mask(tensor, mask, axis=None) 利用mask(布尔值)从axis(维度)收集元素
"""
tf.gather(a, axis = 0, indices = [2, 3]).shape
tf.gather(a, axis = 0, indices = [2, 1, 3, 0]).shape
tf.gather(a, axis = 1, indices = [2, 3, 4, 5, 7, 9, 12]).shape
tf.gather_nd(a, indices = [0, 1]).shape
tf.gather_nd(a, indices = [0, 1, 2])
tf.gather_nd(a, indices = [[0, 0], [1, 1]]).shape
tf.gather_nd(a, indices = [[0, 0], [1, 1], [2, 2]]).shape
tf.gather_nd(a, indices = [[0, 0, 0], [1, 1, 1], [2, 2, 2]]).shape
tf.boolean_mask(a, mask = [True, True, False, False], axis = 0).shape
tf.boolean_mask(a, mask = [True, False, False], axis = 3).shape
python
"""
维度变换:
.shape 形状
.ndim 维度
.reshape(tensor, shape) 改变形状
.expand_dims(input, axis)/squeeze(input, axis) 扩展/压缩维度
.transpose(a, perm)
Broadcasting(广播机制)
.broadcast_to
.expand
.expand_dims
.without copying data
.tf.tile
"""
# shape, ndim
a1 = tf.random.truncated_normal(shape = (4, 28, 28, 3), mean = 0, stddev = 1)
a1.shape, a1.ndim
# reshape
tf.reshape(a1, shape = (4, 28 * 28, 3)).shape
#expand_dims/squeeze
tf.expand_dims(a1, axis=0).shape,
a2 = tf.expand_dims(a1, axis = 4)
tf.squeeze(a2).shape
#transpose
tf.transpose(a1, perm = [0, 2, 3, 1])
#Broadcast
b = tf.ones(shape = (3, 3), dtype = tf.float32)
b = tf.expand_dims(b, axis = 2)
b = tf.tile(b, [1, 1, 3])
b.shape
tf.broadcast_to(tf.random.normal([4, 1, 1, 1]), [4, 32, 32, 3]).shape
x = tf.random.uniform([4, 32, 32, 3])
(x + tf.random.uniform([3])).shape
(x + tf.random.uniform([32, 32, 1])).shape
(x + tf.random.uniform([4, 1, 1, 1])).shape
python
"""
数学运算
. +, -, *, / --> element_wise
. **, pow, square
.sqrt
.//, %
. exp, math.log
. @, matmul
.layers-->Dense, Conv2D, MaxPooling2D, Embedding, SimpleRNNCell, LSTM
.losses--> MSE, categorical_crassentropy
dim_wise
.reduce_mean/max/min/sum/variance
"""
a = tf.random.normal(shape = (4, 3))
b = tf.random.normal(shape = (4, 3))
b - a, b + a, b * a, b / a, b ** a, tf.pow(a, 3), a ** 3, tf.square(a), tf.sqrt(a)
tf.exp(a), tf.math.log(a) # 以e为底
tf.math.log(8.) / tf.math.log(2.) #loge^8/loge^2
tf.math.log(100.) / tf.math.log(10.) # loge^100/loge^10
tf.transpose(a) @ b
python
"""
数学运算
. +, -, *, / --> element_wise
. **, pow, square
.sqrt
.//, %
. exp, math.log
. @, matmul
.layers-->Dense, Conv2D, MaxPooling2D, Embedding, SimpleRNNCell, LSTM
.losses--> MSE, categorical_crassentropy
dim_wise
.reduce_mean/max/min/sum/variance
"""
a = tf.random.normal(shape = (4, 3))
b = tf.random.normal(shape = (4, 3))
b - a, b + a, b * a, b / a, b ** a, tf.pow(a, 3), a ** 3, tf.square(a), tf.sqrt(a)
tf.exp(a), tf.math.log(a) # 以e为底
tf.math.log(8.) / tf.math.log(2.) #loge^8/loge^2
tf.math.log(100.) / tf.math.log(10.) # loge^100/loge^10
tf.transpose(a) @ b
python
"""
.pad(tensor, paddings, constant_values) paddings对左右上下进行填充[[上行, 下行], [左列, 右列]] constant_values填充的数字
.expand_dims, tile
.broadcast_to
.where(condition, x, y)
.scatter_nd(indices, updates, shape)
.meshgrid
张量限幅:
tf.clip_by_value(t, clip_value_min, clip_value_max)
tf.nn.relu max(0, a)
clip_by_norm(t, clip_norm) (clip_norm * t) / ||t||
maximum
minimum
Gradient clipping : clip_by_global_norm(t_list, clip_norm) t_list[i] * clip_norm / max(global_norm, clip_norm)
tf.one_hot
"""
a = tf.reshape(tf.range(9), shape = (3, 3))
a = tf.cast(tf.convert_to_tensor(a), dtype = tf.float32)
tf.pad(a, [[1, 0], [1, 1]], constant_values = 1)
aa = tf.expand_dims(a, axis = 0)
tf.tile(aa, [4, 1, 1])
tf.broadcast_to(a, [4, 3, 3])
tf.where(a > 4, 0, 1)
tf.maximum(a, 4) # 小于四的全部用四代替
tf.minimum(a, 4) # 大于四的全部用四代替
tf.clip_by_value(a, 2, 6) # 小于2用2代替大于6用6代替
tf.nn.relu(a) # max(0, a)
tf.clip_by_norm(t = a, clip_norm = 5).numpy()
clipped_tensors, global_norm = tf.clip_by_global_norm(t_list = [tf.cast(tf.convert_to_tensor(tf.range(9)), dtype=tf.float32)], clip_norm=4)
clipped_tensors, global_norm
updates = tf.constant([9, 10, 11, 12])
indices = tf.constant([[4], [2], [1], [6]])
shape = tf.constant([8])
tf.scatter_nd(indices, updates, shape)
indices = tf.constant([[0], [2]])
updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]]])
shape = tf.constant([4, 4, 4])
tf.scatter_nd(indices, updates, shape)
x = tf.range(5)
y = tf.range(6)
points_x, points_y = tf.meshgrid(x, y)
points = tf.stack([points_x, points_y], axis = 2)
points
python
"""
数据加载
outline:
.keras.datasets
.boston_housing.load_data() 波斯顿房价
.mnist/fashion_mnist.load_data() 手写体/流行照片
.cifar10/100.load_data() 模糊图像
.imdb.load_data() 情感分类
.tf.data.Dataset.from_tensor_slices 将输入的张量按照指定的维度进行切片,并将每个切片作为数据集中的一个元素。
.shuffle
.map
.batch
.repeat
1. shuffle(10000) : 对数据集进行随机打乱,缓冲区大小为 10000。这有助于在训练过程中引入随机性,减少数据的顺序相关性。
2. map(proprecession) : 对数据集中的每个元素应用自定义的预处理函数 proprecession ,以进行数据的预处理操作,例如数据增强、归一化等。
3. batch(128) : 将数据集分成大小为 128 的批次。这意味着模型在训练时每次处理 128 个样本。
4. repeat(2) : 重复整个数据集 2 次。这在训练时可以让数据集被多次遍历。
"""
# 数据预处理的方法
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
def proprecession(x, y):
x = tf.cast((x - tf.reduce_min(x)) / (tf.reduce_max(x) - tf.reduce_min(x)), dtype = tf.float32)
y = tf.cast(y, dtype = tf.int32)
return x, y
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_db = train_db.shuffle(10000).map(proprecession).batch(128).repeat(2)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.shuffle(10000).map(proprecession).batch(128)
反向传播算法
python
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
tf.reduce_max(x_train), tf.reduce_min(x_train), x_train.shape, x_test.shape, tf.reduce_max(x_test), tf.reduce_min(x_test)
# 预处理
x_train = tf.convert_to_tensor(x_train, dtype = tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype = tf.int32)
x_test = tf.convert_to_tensor(x_test, dtype = tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype = tf.int32)
x_train = (x_train - tf.reduce_min(x_train)) / (tf.reduce_max(x_train) - tf.reduce_min(x_train)) # 最大最小值归一化
x_test = (x_test - tf.reduce_mean(x_test)) / tf.sqrt(tf.math.reduce_variance(x_test)) #零均值归一化
# y_test = tf.one_hot(y_test, depth = 10)
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(10000).batch(128)
sample = next(iter(train_db))
print("batch:", sample[0].shape, sample[1].shape)
lr = 0.003
w1 = tf.Variable(tf.random.truncated_normal([784, 256], mean = 0, stddev = 0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], mean = 0, stddev = 0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], mean = 0, stddev = 0.1))
b3 = tf.Variable(tf.zeros([10]))
total_correct = 0
total_num = 0
for epoch in range(10):
for step, (x, y) in enumerate(train_db):
x = tf.reshape(x, [-1, 28*28])
with tf.GradientTape() as tape:
h1 = x @ w1 + tf.broadcast_to(b1, [x.shape[0], 256]) # [128, 256] + [128, 256]
h1 = tf.nn.relu(h1)
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2)
out = h2 @ w3 + b3
y_onehot = tf.one_hot(y, depth = 10)
# mean(sum((y - out) ** 2))
loss = tf.square(y_onehot - out)
loss = tf.reduce_mean(loss)
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
w1.assign_sub(lr * grads[0]) # w1 - lr * grads[n]
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])
if step % 100 == 0:
print("epoch = ", epoch, "step = ", step, "loss:", float(loss))
for step, (x, y) in enumerate(test_db):
x = tf.reshape(x, [-1, 28 * 28])
h1 = tf.nn.relu(x @ w1 + b1)
h2 = tf.nn.relu(h1 @ w2 + b2)
out = h2 @ w3 + b3
prob = tf.nn.softmax(out, axis = 1)
prob = tf.argmax(prob, axis = 1)
prob = tf.cast(prob, dtype = tf.int32)
correct = tf.cast(tf.equal(prob, y), dtype = tf.int32)
correct = tf.reduce_sum(correct)
total_correct += int(correct)
total_num += x.shape[0]
acc = total_correct / total_num
print("test acc:", acc)