深度学习Note.5(机器学习2)

多项式回归

1.与线性回归联系:

与线性回归大体相似,代码部分复用性高,不同点:公式中为x的次方,可能要规范化处理。

2.paddle的API

paddle.sin(x, name=None)

功能:计算输入的正弦值。

输入:输入Tensor

输出:Tensor,输入的sin值
paddle.ones(shape, dtype=None)

功能:创建建指定形状且值全为1的Tensor

输入:输出结果形状

输出:值全为1的Tensor
paddle.multiply(x, y, name=None)

功能:逐元素相乘

输入:2个Tensor

输出:Tensor,相乘结果
paddle.concat(x, axis=0, name=None)

功能:对输入沿axis轴进行联结

输入:待联结的Tensorlist域者Tensortuple和运算轴

输出:联结后的Tensor

3.模型--sin(2*pi*x)

3. 1数据集构建

python 复制代码
import math

# sin函数: sin(2 * pi * x)
def sin(x):
    y = paddle.sin(2 * math.pi * x)
    return y

用前面定义的create_toy_data函数来构建训练和测试数据\

python 复制代码
import math
import paddle
import numpy as np
from matplotlib import pyplot as plt

def sin(x):
    y = paddle.sin(2 * math.pi * x)
    return y

def create_toy_data(func, interval, sample_num, noise=0.0, add_outlier=False, outlier_ratio=0.01):
    X = paddle.rand(shape=[sample_num]) * (interval[1] - interval[0]) + interval[0]
    y = func(X)
    epsilon = paddle.normal(0, noise, shape=[y.shape[0]])
    y += epsilon
    y_np = y.numpy()  # 转换为 NumPy 数组
    
    if add_outlier:
        outlier_num = max(1, int(len(y_np) * outlier_ratio))
        outlier_idx = paddle.randint(len(y_np), shape=[outlier_num]).numpy()
        y_np[outlier_idx] *= 5  # 直接操作 NumPy 数组
    
    return X.numpy(), y_np

# 生成数据
func = sin
interval = (0, 1)
train_num = 15
test_num = 10
noise = 0.5

X_train, y_train = create_toy_data(func=func, interval=interval, sample_num=train_num, noise=noise)
X_test, y_test = create_toy_data(func=func, interval=interval, sample_num=test_num, noise=noise)

# 生成用于绘图的基准数据(转换为 NumPy)
X_underlying = paddle.linspace(interval[0], interval[1], num=100).numpy()
y_underlying = sin(paddle.to_tensor(X_underlying)).numpy()

# 绘制图像
plt.rcParams['figure.figsize'] = (8.0, 6.0)
plt.scatter(X_train, y_train, facecolor="none", edgecolor='#e4007f', s=50, label="train data")
plt.plot(X_underlying, y_underlying, c='#000000', label=r"$\sin(2\pi x)$")
plt.legend(fontsize='x-large')
plt.savefig('ml-vis2.pdf')
plt.show()

结果:

3.2模型构建

python 复制代码
def polynomial_basis_function(x, degree = 2):
    """
    输入:
       - x: tensor, 输入的数据,shape=[N,1]
       - degree: int, 多项式的阶数
       example Input: [[2], [3], [4]], degree=2
       example Output: [[2^1, 2^2], [3^1, 3^2], [4^1, 4^2]]
       注意:本案例中,在degree>=1时不生成全为1的一列数据;degree为0时生成形状与输入相同,全1的Tensor
    输出:
       - x_result: tensor
    """
    
    if degree==0:
        return paddle.ones(shape = x.shape,dtype='float32') 

    x_tmp = x
    x_result = x_tmp

    for i in range(2, degree+1):
        x_tmp = paddle.multiply(x_tmp,x) # 逐元素相乘
        x_result = paddle.concat((x_result,x_tmp),axis=-1)

    return x_result

# 简单测试
data = [[2], [3], [4]]
X = paddle.to_tensor(data = data,dtype='float32')
degree = 3
transformed_X = polynomial_basis_function(X,degree=degree)
print("转换前:",X)
print("阶数为",degree,"转换后:",transformed_X)
python 复制代码
转换前: Tensor(shape=[3, 1], dtype=float32, place=CPUPlace, stop_gradient=True,
       [[2.],
        [3.],
        [4.]])
阶数为 3 转换后: Tensor(shape=[3, 3], dtype=float32, place=CPUPlace, stop_gradient=True,
       [[2. , 4. , 8. ],
        [3. , 9. , 27.],
        [4. , 16., 64.]])

3.3模型训练

python 复制代码
plt.rcParams['figure.figsize'] = (12.0, 8.0)

for i, degree in enumerate([0, 1, 3, 8]): # []中为多项式的阶数
    model = Linear(degree)
    X_train_transformed = polynomial_basis_function(X_train.reshape([-1,1]), degree)
    X_underlying_transformed = polynomial_basis_function(X_underlying.reshape([-1,1]), degree)
    
    model = optimizer_lsm(model,X_train_transformed,y_train.reshape([-1,1])) #拟合得到参数

    y_underlying_pred = model(X_underlying_transformed).squeeze()

    print(model.params)
    
    # 绘制图像
    plt.subplot(2, 2, i + 1)
    plt.scatter(X_train, y_train, facecolor="none", edgecolor='#e4007f', s=50, label="train data")
    plt.plot(X_underlying, y_underlying, c='#000000', label=r"$\sin(2\pi x)$")
    plt.plot(X_underlying, y_underlying_pred, c='#f19ec2', label="predicted function")
    plt.ylim(-2, 1.5)
    plt.annotate("M={}".format(degree), xy=(0.95, -1.4))

#plt.legend(bbox_to_anchor=(1.05, 0.64), loc=2, borderaxespad=0.)
plt.legend(loc='lower left', fontsize='x-large')
plt.savefig('ml-vis3.pdf')
plt.show()

分析:当阶数太小,拟合曲线简单,欠拟合。

当阶数太大,拟合曲线复杂,过拟合。

3.4模型评估

python 复制代码
# 训练误差和测试误差
training_errors = []
test_errors = []
distribution_errors = []

# 遍历多项式阶数
for i in range(9):
    model = Linear(i)

    X_train_transformed = polynomial_basis_function(X_train.reshape([-1,1]), i) 
    X_test_transformed = polynomial_basis_function(X_test.reshape([-1,1]), i) 
    X_underlying_transformed = polynomial_basis_function(X_underlying.reshape([-1,1]), i)
    
    optimizer_lsm(model,X_train_transformed,y_train.reshape([-1,1]))
    
    y_train_pred = model(X_train_transformed).squeeze()
    y_test_pred = model(X_test_transformed).squeeze()
    y_underlying_pred = model(X_underlying_transformed).squeeze()

    train_mse = mean_squared_error(y_true=y_train, y_pred=y_train_pred).item()
    training_errors.append(train_mse)

    test_mse = mean_squared_error(y_true=y_test, y_pred=y_test_pred).item()
    test_errors.append(test_mse)

    #distribution_mse = mean_squared_error(y_true=y_underlying, y_pred=y_underlying_pred).item()
    #distribution_errors.append(distribution_mse)

print ("train errors: \n",training_errors)
print ("test errors: \n",test_errors)
#print ("distribution errors: \n", distribution_errors)

# 绘制图片
plt.rcParams['figure.figsize'] = (8.0, 6.0)
plt.plot(training_errors, '-.', mfc="none", mec='#e4007f', ms=10, c='#e4007f', label="Training")
plt.plot(test_errors, '--', mfc="none", mec='#f19ec2', ms=10, c='#f19ec2', label="Test")
#plt.plot(distribution_errors, '-', mfc="none", mec="#3D3D3F", ms=10, c="#3D3D3F", label="Distribution")
plt.legend(fontsize='x-large')
plt.xlabel("degree")
plt.ylabel("MSE")
plt.savefig('ml-mse-error.pdf')
plt.show()

当阶数较低的时候,模型的表示能力有限,训练误差和测试误差都很高,代表模型欠拟合;

当阶数较高的时候,模型表示能力强,但将训练数据中的噪声也作为特征进行学习,一般情况下训练误差继续降低而测试误差显著升高,代表模型过拟合。

如何解决?

引入正则化方法,通过向误差函数中添加一个惩罚项来避免系数倾向于较大的取值

python 复制代码
degree = 8 # 多项式阶数
reg_lambda = 0.0001 # 正则化系数

X_train_transformed = polynomial_basis_function(X_train.reshape([-1,1]), degree)
X_test_transformed = polynomial_basis_function(X_test.reshape([-1,1]), degree)
X_underlying_transformed = polynomial_basis_function(X_underlying.reshape([-1,1]), degree)

model = Linear(degree) 

optimizer_lsm(model,X_train_transformed,y_train.reshape([-1,1]))

y_test_pred=model(X_test_transformed).squeeze()
y_underlying_pred=model(X_underlying_transformed).squeeze()

model_reg = Linear(degree) 

optimizer_lsm(model_reg,X_train_transformed,y_train.reshape([-1,1]),reg_lambda=reg_lambda)

y_test_pred_reg=model_reg(X_test_transformed).squeeze()
y_underlying_pred_reg=model_reg(X_underlying_transformed).squeeze()

mse = mean_squared_error(y_true = y_test, y_pred = y_test_pred).item()
print("mse:",mse)
mes_reg = mean_squared_error(y_true = y_test, y_pred = y_test_pred_reg).item()
print("mse_with_l2_reg:",mes_reg)

# 绘制图像
plt.scatter(X_train, y_train, facecolor="none", edgecolor="#e4007f", s=50, label="train data")
plt.plot(X_underlying, y_underlying, c='#000000', label=r"$\sin(2\pi x)$")
plt.plot(X_underlying, y_underlying_pred, c='#e4007f', linestyle="--", label="$deg. = 8$")
plt.plot(X_underlying, y_underlying_pred_reg, c='#f19ec2', linestyle="-.", label="$deg. = 8, \ell_2 reg$")
plt.ylim(-1.5, 1.5)
plt.annotate("lambda={}".format(reg_lambda), xy=(0.82, -1.4))
plt.legend(fontsize='large')
plt.savefig('ml-vis4.pdf')
plt.show()
相关推荐
@小匠3 小时前
Read Frog:一款开源的 AI 驱动浏览器语言学习扩展
人工智能·学习
山间小僧5 小时前
「AI学习笔记」RNN
机器学习·aigc·ai编程
网教盟人才服务平台6 小时前
“方班预备班盾立方人才培养计划”正式启动!
大数据·人工智能
芯智工坊7 小时前
第15章 Mosquitto生产环境部署实践
人工智能·mqtt·开源
菜菜艾7 小时前
基于llama.cpp部署私有大模型
linux·运维·服务器·人工智能·ai·云计算·ai编程
TDengine (老段)7 小时前
TDengine IDMP 可视化 —— 分享
大数据·数据库·人工智能·时序数据库·tdengine·涛思数据·时序数据
小真zzz7 小时前
搜极星:第三方多平台中立GEO洞察专家全面解析
人工智能·搜索引擎·seo·geo·中立·第三方平台
GreenTea8 小时前
从 Claw-Code 看 AI 驱动的大型项目开发:2 人 + 10 个自治 Agent 如何产出 48K 行 Rust 代码
前端·人工智能·后端
火山引擎开发者社区8 小时前
秒级创建实例,火山引擎 Milvus Serverless 让 AI Agent 开发更快更省
人工智能