catboost回归自动调参

import os

import time

import optuna

import pandas as pd

from catboost import CatBoostRegressor

from sklearn.metrics import r2_score, mean_squared_error

from sklearn.model_selection import train_test_split

X_train = data.drop('label', 'b1', 'b2', axis=1).values

y_train = data'label'.values

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

def epoch_time(start_time, end_time):

elapsed_secs = end_time - start_time

elapsed_mins = elapsed_secs / 60

return elapsed_mins, elapsed_secs

def objective(trial):

自定义的参数空间

depth = trial.suggest_int('depth', 1, 16)

border_count = trial.suggest_int('border_count', 1, 222)

l2_leaf_reg = trial.suggest_int('l2_leaf_reg', 1, 222)

learning_rate = trial.suggest_uniform('learning_rate', 0.001, 0.9)

iterations = trial.suggest_int('iterations', 1, 100)

estimator = CatBoostRegressor(loss_function='RMSE', random_seed=22, learning_rate=learning_rate,

iterations=iterations, l2_leaf_reg=l2_leaf_reg,

border_count=border_count,

depth=depth, verbose=0)

estimator.fit(X_train, y_train)

val_pred = estimator.predict(X_test)

mse = mean_squared_error(y_test, val_pred)

return mse

""" Run optimize.

Set n_trials and/or timeout (in sec) for optimization by Optuna

"""

study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction='minimize')

study = optuna.create_study(sampler=optuna.samplers.RandomSampler(), direction='minimize')

start_time = time.time()

study.optimize(objective, n_trials=10)

end_time = time.time()

elapsed_mins, elapsed_secs = epoch_time(start_time, end_time)

print('elapsed_secs:', elapsed_secs)

print('Best value:', study.best_trial.value)

import os

import time

import pandas as pd

from catboost import CatBoostRegressor

from hyperopt import fmin, hp, partial, Trials, tpe,rand

from sklearn.metrics import r2_score, mean_squared_error

from sklearn.model_selection import train_test_split

自定义hyperopt的参数空间

space = {"iterations": hp.choice("iterations", range(1, 100)),

"depth": hp.randint("depth", 16),

"l2_leaf_reg": hp.randint("l2_leaf_reg", 222),

"border_count": hp.randint("border_count", 222),

'learning_rate': hp.uniform('learning_rate', 0.001, 0.9),

}

X_train = data.drop('label', 'b1', 'b2', axis=1).values

y_train = data'label'.values

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

def epoch_time(start_time, end_time):

elapsed_secs = end_time - start_time

elapsed_mins = elapsed_secs / 60

return elapsed_mins, elapsed_secs

自动化调参并训练

def cat_factory(argsDict):

estimator = CatBoostRegressor(loss_function='RMSE', random_seed=22, learning_rate=argsDict'learning_rate',

iterations=argsDict'iterations', l2_leaf_reg=argsDict'l2_leaf_reg',

border_count=argsDict'border_count',

depth=argsDict'depth', verbose=0)

estimator.fit(X_train, y_train)

val_pred = estimator.predict(X_test)

mse = mean_squared_error(y_test, val_pred)

return mse

算法选择 tpe

algo = partial(tpe.suggest)

随机搜索

algo = partial(rand.suggest)

初始化每次尝试

trials = Trials()

开始自动参数寻优

start_time = time.time()

best = fmin(cat_factory, space, algo=algo, max_evals=10, trials=trials)

end_time = time.time()

elapsed_mins, elapsed_secs = epoch_time(start_time, end_time)

print('elapsed_secs:', elapsed_secs)

all = \[\]

遍历每一次的寻参结果

for one in trials:

str_re = str(one)

argsDict = one'misc''vals'

value = one'result''loss'

learning_rate = argsDict"learning_rate"0

iterations = argsDict"iterations"0

depth = argsDict"depth"0

l2_leaf_reg = argsDict"l2_leaf_reg"0

border_count = argsDict"border_count"0

finish = value, learning_rate, iterations, depth, l2_leaf_reg, border_count

all.append(finish)

parameters = pd.DataFrame(all, columns='value', 'learning_rate', 'iterations', 'depth', 'l2_leaf_reg', 'border_count')

从寻参结果中找到r2最大的

best = parameters.locabs(parameters\['value').idxmin()]

print("best: {}".format(best))

相关推荐
网易云信10 分钟前
AI硬件的下一程,这场分论坛给你答案
人工智能·产品
网易云信25 分钟前
听说,我们搞了个 AI 编程"电子宠物"?
人工智能·aigc·ai编程
Lion0926 分钟前
【03】Function Calling:让 LLM 拥有双手
人工智能·ai编程
冬哥聊AI33 分钟前
多模态诅咒:给大模型装上眼睛,文本推理为什么反而变笨了?
人工智能
东风破_34 分钟前
LLM 是怎么预测下一个词的?从 Token 到 Transformer 的完整过程
人工智能
日是故乡明37 分钟前
Claude Code 正在用隐写术标记请求
人工智能
网易云信40 分钟前
Anthropic研究百万对话,情感陪伴AI正在成为基础设施
人工智能·aigc·agent
掘金一周42 分钟前
对车完全小白,不知买油买电还是买混动,求建议| 沸点周刊 7.2
前端·人工智能·后端
转转技术团队1 小时前
从神经元到大语言模型,回顾机器学习发展史
人工智能
MeixianAgent2 小时前
Python 回测数据入口怎么验?历史 K 线入库前先做 5 个检查
后端·python