使用optimtool训练符号神经网络

基于sympy构造符号神经网络

本文的代码来自linjing-lab/optimtool,分类任务的代码在这里,回归任务的代码在这里,通用符号神经网络的代码在这里

python 复制代码
pip install optimtool>=2.8.0

分类任务示例

python 复制代码
import optimtool.unconstrain as ou
from optimtool.base import sp, np
w11, w21, w31 = sp.symbols('w11 w21 w31', real=True)
w12, w22, w32 = sp.symbols('w12 w22 w32', real=True)
b1, b2, b3 = sp.symbols('b1 b2 b3', real=True)
X = np.array([
    [1.0, 1.0], 
    [1.5, 1.0],   
    [1.0, 1.5],   
    [4.0, 1.0],   
    [4.5, 1.0],   
    [4.0, 1.5],   
    [1.0, 4.0], 
    [1.5, 4.0],   
    [1.0, 4.5],   
])
mean = X.mean(axis=0)
std = X.std(axis=0)
std[std == 0] = 1
X = (X - mean) / std
Y = np.zeros((X.shape[0], 3))
Y[:3, 0] = 1
Y[3:6, 1] = 1
Y[6:, 2] = 1
def symbolic_loss():
    loss = 0.0
    for i in range(len(Y)):
        x1, x2 = X[i]
        z1 = w11 * x1 + w12 * x2 + b1
        z2 = w21 * x1 + w22 * x2 + b2
        z3 = w31 * x1 + w32 * x2 + b3
        exp_sum = sp.exp(z1) + sp.exp(z2) + sp.exp(z3)
        p1 = sp.exp(z1) / exp_sum
        p2 = sp.exp(z2) / exp_sum
        p3 = sp.exp(z3) / exp_sum
        loss_expr = - (Y[i, 0] * sp.log(p1) + Y[i, 1] * sp.log(p2) + Y[i, 2] * sp.log(p3))
        loss += loss_expr
    return loss / len(Y)
f_sym = symbolic_loss()
ou.gradient_descent.barzilar_borwein(f_sym, [w11, w21, w31, w12, w22, w32, b1, b2, b3], [0, 0, 0, 0, 0, 0, 0, 0, 0], verbose=True, epsilon=1e-2)

回归任务示例

python 复制代码
import optimtool.unconstrain as ou
from optimtool.base import sp, np
w1, w2, w3, b = sp.symbols('w1 w2 w3 b', real=True)
np.random.seed(0)
n = 20 # samples
x1 = np.linspace(-3, 3, n)
x2 = 0.8 * x1 + np.random.randn(n) * 0.5
X = np.column_stack([x1, x2])
y_true = 2 * x1 - 3 * x2 + 1.5 * x1**2
noise = np.random.randn(n)
y = y_true + noise
def symbolic_loss():
    loss = 0
    for i in range(len(y)):
        x1_, x2_ = X[i]
        y_pred = w1 * x1_ + w2 * x2_ + w3 * x1_**2 + b
        loss += (y_pred - y[i]) ** 2
    return loss / len(y)
f_sym = symbolic_loss()
ou.newton_quasi.bfgs(f_sym, [w1, w2, w3, b], [0, 0, 0, 0], verbose=True)

通用的神经网络

python 复制代码
import optimtool.unconstrain as ou
from optimtool.base import sp, np

def gen_nn(X_data, y_data, hidden_dims=[], task='classification'):
    n_samples, n_features = X_data.shape
    if task == 'classification':
        if len(y_data.shape) == 1:
            n_outputs = len(np.unique(y_data))
            y_one_hot = np.zeros((n_samples, n_outputs))
            for i, label in enumerate(y_data):
                y_one_hot[i, int(label)] = 1
        else:
            n_outputs = y_data.shape[1]
            y_one_hot = y_data
        y_processed = y_one_hot
    elif task == 'regression':
        if len(y_data.shape) == 1:
            n_outputs = 1
            y_processed = y_data.reshape(-1, 1)
        else:
            n_outputs = y_data.shape[1]
            y_processed = y_data
    else:
        raise ValueError(f"Support classification or regression. not support {task}.")
    params = []
    layer_dims = [n_features] + hidden_dims + [n_outputs]
    for i in range(len(layer_dims)-1):
        prev_dim = layer_dims[i]
        curr_dim = layer_dims[i+1]
        for j in range(curr_dim):
            for k in range(prev_dim):
                params.append(sp.symbols(f'W{i}_{j}_{k}', real=True))
        for j in range(curr_dim):
            params.append(sp.symbols(f'b{i}_{j}', real=True))
    def forward(X_vec):
        idx = 0
        x = X_vec.copy()
        current_dim = n_features
        for i, h_dim in enumerate(hidden_dims):
            W_mat = []
            for j in range(h_dim):
                row = []
                for k in range(current_dim):
                    row.append(params[idx])
                    idx += 1
                W_mat.append(row)
            b_vec = []
            for j in range(h_dim):
                b_vec.append(params[idx])
                idx += 1
            z = []
            for j in range(h_dim):
                sum_val = 0
                for k in range(current_dim):
                    sum_val += W_mat[j][k] * x[k]
                sum_val += b_vec[j]
                z.append(sp.Max(0, sum_val)) # ReLU
            x = z
            current_dim = h_dim
        output_dim = n_outputs
        W_mat = []
        for j in range(output_dim):
            row = []
            for k in range(current_dim):
                row.append(params[idx])
                idx += 1
            W_mat.append(row)
        b_vec = []
        for j in range(output_dim):
            b_vec.append(params[idx])
            idx += 1
        out = []
        for j in range(output_dim):
            sum_val = 0
            for k in range(current_dim):
                sum_val += W_mat[j][k] * x[k]
            sum_val += b_vec[j]
            out.append(sum_val)
        return out
    f_sym = 0
    epsilon = 1e-10
    if task == 'classification':
        for s in range(n_samples):
            logits = forward(X_data[s].tolist())
            y_true = y_processed[s].tolist()
            log_sum_exp = sp.log(sum(sp.exp(l) for l in logits) + epsilon)
            probs = [sp.exp(l - log_sum_exp) for l in logits]
            for c in range(n_outputs):
                f_sym += -y_true[c] * sp.log(probs[c] + epsilon) # Softmax
        f_sym /= n_samples 
    else:  # regression
        predictions = []
        for s in range(n_samples):
            pred = forward(X_data[s].tolist())
            predictions.append(pred)
        for s in range(n_samples):
            for o in range(n_outputs):
                diff = predictions[s][o] - y_processed[s, o]
                f_sym += diff ** 2 # MSE
        f_sym /= (n_samples * n_outputs)
    return f_sym, params

# # classification
# X = np.array([
#     [2.0, 1.0, 0.5, 0.1],
#     [5.1, 3.1, 2.1, 1.05],
#     [5.0, 3.0, 2.0, 1.0],
#     [2.1, 0.9, 0.55, 0.15],
#     [8.0, 5.0, 4.0, 2.0],
#     [8.2, 5.2, 4.2, 2.1],
# ], dtype=np.float64)
# y = np.array([0, 1, 1, 0, 2, 2], dtype=np.int64)
# f_sym, params = gen_nn(X, y, hidden_dims=[2,])
# ou.gradient_descent.barzilar_borwein(f_sym, params, np.ones(len(params)).tolist(), verbose=True, epsilon=1e-2)

# regression
def gen_reg(n_samples=20, seed=0):
    np.random.seed(seed)
    X = np.linspace(-5, 5, n_samples).reshape(-1, 1)
    noise = 0.2 * np.random.randn(n_samples, 3)
    y1 = np.sin(X)
    y2 = 0.5 * X
    y3 = np.cos(X) + 0.2 * X
    y = np.hstack([y1, y2, y3]) + noise
    return X, y

X, y = gen_reg(n_samples=5)
f_sym, params = gen_nn(X, y, hidden_dims=[2,], task='regression')
ou.newton_quasi.bfgs(f_sym, params, np.ones(len(params)).tolist(), verbose=True, epsilon=1e-4)
相关推荐
米小虾1 小时前
2026 年多模态大模型全面爆发:从「看懂图」到「听懂世界」的技术跃迁
人工智能
米小虾1 小时前
AI Agent 进入协议时代:MCP、A2A、AG-UI 三大协议全景解析
人工智能·agent
蝎子莱莱爱打怪1 小时前
🚀 🚀🚀2026年5月GitHub月榜精选:17个项目中挑出10个推荐,实操4个!
人工智能·后端·ai编程
升鲜宝供应链及收银系统源代码服务1 小时前
升鲜宝AI助手项目源码集成开发步骤(一)---升鲜宝生鲜配送供应链管理系统源代码服务
人工智能·生鲜配送系统·生鲜物流线路规划·生鲜电商订单系统·生鲜供应链系统·生鲜系统架构设计·生鲜配送ai功能集成
yjcode7891 小时前
探索游戏充值新纪元:友价源码技术革新之旅
大数据·人工智能·游戏·游戏交易
冬奇Lab2 小时前
Agent 系列(11):A2A 协议——Agent 与 Agent 如何协作
人工智能·agent
snow@li2 小时前
AI:理解 大数据、算法、算力、电力、生成式AI、token 之间的关系
大数据·人工智能·算法
冬奇Lab2 小时前
每日一个开源项目(第120篇):SkillLens - 微软出品,照亮 AI Agent 技能生命周期的“显微镜”
人工智能·开源·资讯
qingyulee2 小时前
深度学习介绍、pytorch框架
人工智能·深度学习
oort1232 小时前
VLStream:全开源决策式AI视频平台,赋能企业构建自主可控、降本增效的智能视觉应用介绍
大数据·开发语言·人工智能·开源·音视频·数据库架构