人工智能常用的优化算法(梯度下降、牛顿法、拟牛顿法、共轭梯度法、)

一、梯度下降:梯度下降是一种优化算法,用于找到函数的局部最小值。它通过沿着梯度的反方向迭代更新参数,以降低目标函数的值。

代码:

python 复制代码
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target

# 将数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义网格搜索的超参数范围
param_grid = {
    'max_depth': [3, 5, 7, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'criterion': ['gini', 'entropy']
}

# 初始化GridSearchCV
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=param_grid, cv=5)

# 将网格搜索适配到数据集上
grid_search.fit(X_train, y_train)

# 获取网格搜索结果
results = grid_search.cv_results_
param_names = list(param_grid.keys())
params_combinations = results['params']
mean_test_scores = results['mean_test_score']

# 打印出最佳超参数
print("最佳超参数:", grid_search.best_params_)

# 获取最佳模型
best_clf = grid_search.best_estimator_

# 评估最佳模型
y_pred = best_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("准确率:", accuracy)

# 计算混淆矩阵
conf_matrix = confusion_matrix(y_test, y_pred)

# 生成折线图
plt.figure(figsize=(12, 8))

for i, param in enumerate(param_names):
    param_values = [x[param] for x in params_combinations]  # 获取对应超参数的取值
    plt.plot(param_values, mean_test_scores, marker='o', label=f'{param}')

plt.title('Hyperparameter Tuning Results')
plt.xlabel('Hyperparameter Values')
plt.ylabel('Mean Test Score (Accuracy)')
plt.legend()
plt.grid(True)
plt.show()

# 生成真实值和预测值对比的图表
plt.figure(figsize=(10, 6))

plt.scatter(np.arange(len(y_test)), y_test, color='blue', label='True Values', alpha=0.5)
plt.scatter(np.arange(len(y_test)), y_pred, color='red', label='Predicted Values', alpha=1,marker='*')
plt.title('True vs Predicted Values')
plt.xlabel('Sample Index')
plt.ylabel('Class')
plt.legend()
plt.show()

# 输出混淆矩阵
print("混淆矩阵:")
print(conf_matrix)

二、牛顿法:牛顿法是一种迭代优化算法,通过使用函数的二阶导数信息来更新参数,以更快地收敛到函数的极小值点。

代码:

python 复制代码
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target

# 将数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 定义交叉熵损失函数和其梯度
def cross_entropy_loss(theta, X, y):
    # 提取决策树的参数
    max_depth, min_samples_split, min_samples_leaf = theta

    # 将min_samples_leaf转换为整数
    min_samples_leaf = int(min_samples_leaf)

    # 训练决策树
    clf = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split,
                                 min_samples_leaf=min_samples_leaf)
    clf.fit(X, y)

    # 计算交叉熵损失
    y_pred_proba = clf.predict_proba(X)
    loss = -np.sum(np.log(y_pred_proba[np.arange(len(y)), y])) / len(y)

    return loss


def cross_entropy_gradient(theta, X, y):
    # 提取决策树的参数
    max_depth, min_samples_split, min_samples_leaf = theta

    # 将min_samples_leaf转换为整数
    min_samples_leaf = int(min_samples_leaf)

    # 训练决策树
    clf = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split,
                                 min_samples_leaf=min_samples_leaf)
    clf.fit(X, y)

    # 计算预测概率
    y_pred_proba = clf.predict_proba(X)

    # 计算交叉熵损失的梯度
    grad = np.zeros(3)
    for i in range(len(X)):
        for k in range(3):
            if k == y[i]:
                grad[0] += -(1 - y_pred_proba[i, k]) * X[i, 0]
                grad[1] += -(1 - y_pred_proba[i, k]) * X[i, 1]
                grad[2] += -(1 - y_pred_proba[i, k]) * X[i, 2]
            else:
                grad[0] += y_pred_proba[i, k] * X[i, 0]
                grad[1] += y_pred_proba[i, k] * X[i, 1]
                grad[2] += y_pred_proba[i, k] * X[i, 2]

    grad /= len(y)

    return grad


# 初始化参数
theta_initial = [3, 2, 1]

# 使用牛顿法最小化交叉熵损失函数
result = minimize(cross_entropy_loss, theta_initial, args=(X_train, y_train), jac=cross_entropy_gradient,
                  method='Newton-CG')

# 获取最优参数
max_depth_opt, min_samples_split_opt, min_samples_leaf_opt = result.x

# 将最优min_samples_leaf转换为整数
min_samples_leaf_opt = int(min_samples_leaf_opt)

# 构建最优模型
best_clf = DecisionTreeClassifier(max_depth=max_depth_opt, min_samples_split=min_samples_split_opt,
                                  min_samples_leaf=min_samples_leaf_opt)
best_clf.fit(X_train, y_train)

# 评估最佳模型
y_pred = best_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("准确率:", accuracy)

# 计算混淆矩阵
conf_matrix = confusion_matrix(y_test, y_pred)

# 输出混淆矩阵
print("混淆矩阵:")
print(conf_matrix)

三、拟牛顿法:拟牛顿法是一类使用函数的一阶导数信息和历史迭代信息来近似二阶导数的优化算法。它通过更新拟牛顿矩阵来逼近牛顿法的迭代过程,从而减少计算二阶导数的开销。

代码:

python 复制代码
import numpy as np
from scipy.optimize import minimize
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define cross-entropy loss function
def cross_entropy_loss(theta, X, y):
    clf = DecisionTreeClassifier(min_samples_split=int(theta[0]))
    clf.fit(X, y)
    y_pred = clf.predict_proba(X)
    loss = -np.mean(np.log(y_pred[np.arange(len(y)), y]))
    return loss

# Define gradient of cross-entropy loss function
def cross_entropy_gradient(theta, X, y):
    clf = DecisionTreeClassifier(min_samples_split=int(theta[0]))
    clf.fit(X, y)
    y_pred = clf.predict_proba(X)
    grad = np.zeros_like(theta)
    for i in range(len(grad)):
        clf.min_samples_split = int(theta[0] + 1e-6)
        y_pred_plus = clf.predict_proba(X)
        clf.min_samples_split = int(theta[0] - 1e-6)
        y_pred_minus = clf.predict_proba(X)
        grad[i] = -np.mean((y_pred_plus - y_pred_minus) * (y == i)[:, np.newaxis])
    return grad

# Initial guess for theta
theta_initial = np.array([2.0])  # Initial value for min_samples_split

# Perform optimization using BFGS
result = minimize(cross_entropy_loss, theta_initial, args=(X_train, y_train), jac=cross_entropy_gradient,
                  method='BFGS')

# Get the optimal value of theta
optimal_theta = result.x

# Train the classifier with the optimal min_samples_split value
optimal_clf = DecisionTreeClassifier(min_samples_split=int(optimal_theta))
optimal_clf.fit(X_train, y_train)

# Evaluate the classifier on the testing data
accuracy = optimal_clf.score(X_test, y_test)
print("Optimal min_samples_split:", int(optimal_theta))
print("Accuracy:", accuracy)

四、共轭梯度法:共轭梯度法是一种用于解决大规模线性方程组或最小化二次型函数的优化算法。它利用了不同方向上的梯度共轭性质,在每一步迭代中找到一个共轭方向,并沿着该方向进行参数更新,以加快收敛速度。

代码:

python 复制代码
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target

# 将数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义网格搜索的超参数范围
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],  # 正则化参数
    'penalty': ['l2'],  # 正则化类型,这里选择L2正则化
    'solver': ['lbfgs'],  # 选择共轭梯度法优化器
}

# 初始化GridSearchCV
grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=param_grid, cv=5)

# 将网格搜索适配到数据集上
grid_search.fit(X_train, y_train)

# 获取网格搜索结果
results = grid_search.cv_results_
param_names = list(param_grid.keys())
params_combinations = results['params']
mean_test_scores = results['mean_test_score']

# 打印出最佳超参数
print("最佳超参数:", grid_search.best_params_)

# 获取最佳模型
best_clf = grid_search.best_estimator_

# 评估最佳模型
y_pred = best_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("准确率:", accuracy)

# 计算混淆矩阵
conf_matrix = confusion_matrix(y_test, y_pred)

# 生成折线图
plt.figure(figsize=(12, 8))

for i, param in enumerate(param_names):
    param_values = [x[param] for x in params_combinations]  # 获取对应超参数的取值
    plt.plot(param_values, mean_test_scores, marker='o', label=f'{param}')

plt.title('Hyperparameter Tuning Results')
plt.xlabel('Hyperparameter Values')
plt.ylabel('Mean Test Score (Accuracy)')
plt.legend()
plt.grid(True)
plt.show()

# 生成真实值和预测值对比的图表
plt.figure(figsize=(10, 6))

plt.scatter(np.arange(len(y_test)), y_test, color='blue', label='True Values', alpha=0.5)
plt.scatter(np.arange(len(y_test)), y_pred, color='red', label='Predicted Values', alpha=1,marker='*')
plt.title('True vs Predicted Values')
plt.xlabel('Sample Index')
plt.ylabel('Class')
plt.legend()
plt.show()

# 输出混淆矩阵
print("混淆矩阵:")
print(conf_matrix)
相关推荐
天飓8 分钟前
基于OpenCV的自制Python访客识别程序
人工智能·python·opencv
lapiii3588 分钟前
图论-代码随想录刷题记录[JAVA]
java·数据结构·算法·图论
檀越剑指大厂9 分钟前
开源AI大模型工作流神器Flowise本地部署与远程访问
人工智能·开源
声网12 分钟前
「人眼视觉不再是视频消费的唯一形式」丨智能编解码和 AI 视频生成专场回顾@RTE2024
人工智能·音视频
newxtc21 分钟前
【AiPPT-注册/登录安全分析报告-无验证方式导致安全隐患】
人工智能·安全·ai写作·极验·行为验证
技术仔QAQ36 分钟前
【tokenization分词】WordPiece, Byte-Pair Encoding(BPE), Byte-level BPE(BBPE)的原理和代码
人工智能·python·gpt·语言模型·自然语言处理·开源·nlp
陌上阳光1 小时前
动手学深度学习70 BERT微调
人工智能·深度学习·bert
Dontla1 小时前
Rust泛型系统类型推导原理(Rust类型推导、泛型类型推导、泛型推导)为什么在某些情况必须手动添加泛型特征约束?(泛型trait约束)
开发语言·算法·rust
Ttang231 小时前
Leetcode:118. 杨辉三角——Java数学法求解
算法·leetcode