import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import mnist
import time
MNIST数据集参数
num_classes = 10 # 数字0到9, 10类
num_features = 784 # 28*28
训练参数
learning_rate = 0.01
training_steps = 1000
batch_size = 256
display_step =50
预处理数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()
转为float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
转为一维向量
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])
[0, 255] 到 [0, 1]
x_train, x_test = x_train / 255, x_test / 255
tf.data.Dataset.from_tensor_slices 是使用x_train, y_train构建数据集
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
将数据集打乱,并设置batch_size大小
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)
权重[748, 10],图片大小28*28,类数
W = tf.Variable(tf.ones([num_features, num_classes]), name="weight")
偏置[10],共10类
b = tf.Variable(tf.zeros([num_classes]), name="bias")
逻辑回归函数
def logistic_regression(x):
return tf.nn.softmax(tf.matmul(x, W) + b)
损失函数
def cross_entropy(y_pred, y_true):
tf.one_hot()函数的作用是将一个值化为一个概率分布的向量
y_true = tf.one_hot(y_true, depth=num_classes)
tf.clip_by_value将y_pred的值控制在1e-9和1.0之间
y_pred = tf.clip_by_value(y_pred, 1e-9, 1.0)
return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
计算精度
def accuracy(y_pred, y_true):
tf.cast作用是类型转换
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
优化器采用随机梯度下降
optimizer = tf.optimizers.SGD(learning_rate)
梯度下降
def run_optimization(x, y):
with tf.GradientTape() as g:
pred = logistic_regression(x)
loss = cross_entropy(pred, y)
计算梯度
gradients = g.gradient(loss, [W, b])
更新梯度
optimizer.apply_gradients(zip(gradients, [W, b]))
开始训练
start = time.perf_counter()
for epoch in range(5):
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
run_optimization(batch_x, batch_y)
if step % display_step == 0:
pred = logistic_regression(batch_x)
loss = cross_entropy(pred, batch_y)
acc = accuracy(pred, batch_y)
print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))
测试模型的准确率
pred = logistic_regression(x_test)
print("Test Accuracy: %f" % accuracy(pred, y_test))
elapsed = (time.perf_counter() - start)
print("Time used:",elapsed)
例3
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
print(tf.version)
%matplotlib inline
mnist = tf.keras.datasets.mnist
(train_images,train_labels),(test_images,test_labels)=mnist.load_data()
total_num=len(train_images)
valid_split=0.2
train_num =int(total_num*(1-valid_split))
train_x=train_images[:train_num]
train_y=train_labels[:train_num]
valid_x=train_images[train_num:]
valid_y=train_labels[train_num:]
test_x=test_images
test_y=test_labels
train_x=train_x.reshape(-1,784)
valid_x=valid_x.reshape(-1,784)
test_x=test_x.reshape(-1,784)
train_x=tf.cast(train_x/255.0,tf.float32)
valid_x=tf.cast(valid_x/255.0,tf.float32)
test_x=tf.cast(test_x/255.0,tf.float32)
train_y=tf.one_hot(train_y,depth=10)
valid_y=tf.one_hot(valid_y,depth=10)
test_y=tf.one_hot(test_y,depth=10)
#定义模型函数
def model(x,w,b):
pred=tf.matmul(x,w)+b
return tf.nn.softmax(pred)
np.random.seed(612)
W = tf.Variable(np.random.randn(784,10),dtype=tf.float32)
B = tf.Variable(np.random.randn(10),dtype=tf.float32)
def loss(x,y,w,b):
pred = model(x,w,b)
loss_=tf.keras.losses.categorical_crossentropy(y_true=y,y_pred=pred)
return tf.reduce_mean(loss_)
#设置迭代次数和学习率
train_epochs = 100
batch_size=50
learning_rate = 0.001
def grad(x,y,w,b):
with tf.GradientTape() as tape:
loss_ = loss(x,y,w,b)
return tape.gradient(loss_,[w,b])
optimizer= tf.keras.optimizers.Adam(learning_rate=learning_rate)
def accuracy(x,y,w,b):
pred = model(x,w,b)
correct_prediction=tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
return tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
#构建线性函数的斜率和截距
total_step=int(train_num/batch_size)
loss_list_train = []
loss_list_valid = []
acc_list_train = []
acc_valid_train = []
training_epochs=100
#开始训练,轮数为epoch,采用SGD随机梯度下降优化方法
for epoch in range(training_epochs):
for step in range(total_step):
xs=train_x[step*batch_size:(step+1)*batch_size]
ys=train_y[step*batch_size:(step+1)*batch_size]
#计算损失,并保存本次损失计算结果
grads=grad(xs,ys,W,B)
optimizer.apply_gradients(zip(grads,[W,B]))
loss_train =loss(train_x,train_y,W,B).numpy()
loss_valid =loss(valid_x,valid_y,W,B).numpy()
acc_train=accuracy(train_x,train_y,W,B).numpy()
acc_valid=accuracy(valid_x,valid_y,W,B).numpy()
loss_list_train.append(loss_train)
loss_list_valid.append(loss_valid)
acc_list_train.append(acc_train)
acc_valid_train.append(acc_valid)
print("epoch={:3d},train_loss={:.4f},train_acc={:.4f},val_loss={:.4f},val_acc={:.4f}".format(epoch+1,loss_train,acc_train,loss_valid,acc_valid))
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.plot(loss_list_train,'blue',label="Train Loss")
plt.plot(loss_list_valid,'red',label="Valid Loss")

plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.plot(acc_list_train,'blue',label="Train Acc")
plt.plot(acc_valid_train,'red',label="Valid Acc")

acc_test=accuracy(test_x,test_y,W,B).numpy
print("Test accuracy:",acc_test)
def predict(x,w,b):
pred=model(x,w,b)
result=tf.argmax(pred,1).numpy
return result
pred_test=predict(test_x,W,B)
pred_test