使用optimtool训练符号神经网络

基于sympy构造符号神经网络

本文的代码来自linjing-lab/optimtool,分类任务的代码在这里,回归任务的代码在这里,通用符号神经网络的代码在这里

python 复制代码
pip install optimtool>=2.8.0

分类任务示例

python 复制代码
import optimtool.unconstrain as ou
from optimtool.base import sp, np
w11, w21, w31 = sp.symbols('w11 w21 w31', real=True)
w12, w22, w32 = sp.symbols('w12 w22 w32', real=True)
b1, b2, b3 = sp.symbols('b1 b2 b3', real=True)
X = np.array([
    [1.0, 1.0], 
    [1.5, 1.0],   
    [1.0, 1.5],   
    [4.0, 1.0],   
    [4.5, 1.0],   
    [4.0, 1.5],   
    [1.0, 4.0], 
    [1.5, 4.0],   
    [1.0, 4.5],   
])
mean = X.mean(axis=0)
std = X.std(axis=0)
std[std == 0] = 1
X = (X - mean) / std
Y = np.zeros((X.shape[0], 3))
Y[:3, 0] = 1
Y[3:6, 1] = 1
Y[6:, 2] = 1
def symbolic_loss():
    loss = 0.0
    for i in range(len(Y)):
        x1, x2 = X[i]
        z1 = w11 * x1 + w12 * x2 + b1
        z2 = w21 * x1 + w22 * x2 + b2
        z3 = w31 * x1 + w32 * x2 + b3
        exp_sum = sp.exp(z1) + sp.exp(z2) + sp.exp(z3)
        p1 = sp.exp(z1) / exp_sum
        p2 = sp.exp(z2) / exp_sum
        p3 = sp.exp(z3) / exp_sum
        loss_expr = - (Y[i, 0] * sp.log(p1) + Y[i, 1] * sp.log(p2) + Y[i, 2] * sp.log(p3))
        loss += loss_expr
    return loss / len(Y)
f_sym = symbolic_loss()
ou.gradient_descent.barzilar_borwein(f_sym, [w11, w21, w31, w12, w22, w32, b1, b2, b3], [0, 0, 0, 0, 0, 0, 0, 0, 0], verbose=True, epsilon=1e-2)

回归任务示例

python 复制代码
import optimtool.unconstrain as ou
from optimtool.base import sp, np
w1, w2, w3, b = sp.symbols('w1 w2 w3 b', real=True)
np.random.seed(0)
n = 20 # samples
x1 = np.linspace(-3, 3, n)
x2 = 0.8 * x1 + np.random.randn(n) * 0.5
X = np.column_stack([x1, x2])
y_true = 2 * x1 - 3 * x2 + 1.5 * x1**2
noise = np.random.randn(n)
y = y_true + noise
def symbolic_loss():
    loss = 0
    for i in range(len(y)):
        x1_, x2_ = X[i]
        y_pred = w1 * x1_ + w2 * x2_ + w3 * x1_**2 + b
        loss += (y_pred - y[i]) ** 2
    return loss / len(y)
f_sym = symbolic_loss()
ou.newton_quasi.bfgs(f_sym, [w1, w2, w3, b], [0, 0, 0, 0], verbose=True)

通用的神经网络

python 复制代码
import optimtool.unconstrain as ou
from optimtool.base import sp, np

def gen_nn(X_data, y_data, hidden_dims=[], task='classification'):
    n_samples, n_features = X_data.shape
    if task == 'classification':
        if len(y_data.shape) == 1:
            n_outputs = len(np.unique(y_data))
            y_one_hot = np.zeros((n_samples, n_outputs))
            for i, label in enumerate(y_data):
                y_one_hot[i, int(label)] = 1
        else:
            n_outputs = y_data.shape[1]
            y_one_hot = y_data
        y_processed = y_one_hot
    elif task == 'regression':
        if len(y_data.shape) == 1:
            n_outputs = 1
            y_processed = y_data.reshape(-1, 1)
        else:
            n_outputs = y_data.shape[1]
            y_processed = y_data
    else:
        raise ValueError(f"Support classification or regression. not support {task}.")
    params = []
    layer_dims = [n_features] + hidden_dims + [n_outputs]
    for i in range(len(layer_dims)-1):
        prev_dim = layer_dims[i]
        curr_dim = layer_dims[i+1]
        for j in range(curr_dim):
            for k in range(prev_dim):
                params.append(sp.symbols(f'W{i}_{j}_{k}', real=True))
        for j in range(curr_dim):
            params.append(sp.symbols(f'b{i}_{j}', real=True))
    def forward(X_vec):
        idx = 0
        x = X_vec.copy()
        current_dim = n_features
        for i, h_dim in enumerate(hidden_dims):
            W_mat = []
            for j in range(h_dim):
                row = []
                for k in range(current_dim):
                    row.append(params[idx])
                    idx += 1
                W_mat.append(row)
            b_vec = []
            for j in range(h_dim):
                b_vec.append(params[idx])
                idx += 1
            z = []
            for j in range(h_dim):
                sum_val = 0
                for k in range(current_dim):
                    sum_val += W_mat[j][k] * x[k]
                sum_val += b_vec[j]
                z.append(sp.Max(0, sum_val)) # ReLU
            x = z
            current_dim = h_dim
        output_dim = n_outputs
        W_mat = []
        for j in range(output_dim):
            row = []
            for k in range(current_dim):
                row.append(params[idx])
                idx += 1
            W_mat.append(row)
        b_vec = []
        for j in range(output_dim):
            b_vec.append(params[idx])
            idx += 1
        out = []
        for j in range(output_dim):
            sum_val = 0
            for k in range(current_dim):
                sum_val += W_mat[j][k] * x[k]
            sum_val += b_vec[j]
            out.append(sum_val)
        return out
    f_sym = 0
    epsilon = 1e-10
    if task == 'classification':
        for s in range(n_samples):
            logits = forward(X_data[s].tolist())
            y_true = y_processed[s].tolist()
            log_sum_exp = sp.log(sum(sp.exp(l) for l in logits) + epsilon)
            probs = [sp.exp(l - log_sum_exp) for l in logits]
            for c in range(n_outputs):
                f_sym += -y_true[c] * sp.log(probs[c] + epsilon) # Softmax
        f_sym /= n_samples 
    else:  # regression
        predictions = []
        for s in range(n_samples):
            pred = forward(X_data[s].tolist())
            predictions.append(pred)
        for s in range(n_samples):
            for o in range(n_outputs):
                diff = predictions[s][o] - y_processed[s, o]
                f_sym += diff ** 2 # MSE
        f_sym /= (n_samples * n_outputs)
    return f_sym, params

# # classification
# X = np.array([
#     [2.0, 1.0, 0.5, 0.1],
#     [5.1, 3.1, 2.1, 1.05],
#     [5.0, 3.0, 2.0, 1.0],
#     [2.1, 0.9, 0.55, 0.15],
#     [8.0, 5.0, 4.0, 2.0],
#     [8.2, 5.2, 4.2, 2.1],
# ], dtype=np.float64)
# y = np.array([0, 1, 1, 0, 2, 2], dtype=np.int64)
# f_sym, params = gen_nn(X, y, hidden_dims=[2,])
# ou.gradient_descent.barzilar_borwein(f_sym, params, np.ones(len(params)).tolist(), verbose=True, epsilon=1e-2)

# regression
def gen_reg(n_samples=20, seed=0):
    np.random.seed(seed)
    X = np.linspace(-5, 5, n_samples).reshape(-1, 1)
    noise = 0.2 * np.random.randn(n_samples, 3)
    y1 = np.sin(X)
    y2 = 0.5 * X
    y3 = np.cos(X) + 0.2 * X
    y = np.hstack([y1, y2, y3]) + noise
    return X, y

X, y = gen_reg(n_samples=5)
f_sym, params = gen_nn(X, y, hidden_dims=[2,], task='regression')
ou.newton_quasi.bfgs(f_sym, params, np.ones(len(params)).tolist(), verbose=True, epsilon=1e-4)
相关推荐
武子康2 小时前
调查研究-198 Agent 到底该记住什么?读懂《What Must Generalist Agents Remember?》
人工智能·openai·agent
aqi003 小时前
15天学会AI应用开发(九)利用Chroma持久化向量数据
人工智能·python·大模型·ai编程·ai应用
武子康4 小时前
调查研究-197 FAISS vs Elasticsearch 全面对比:从向量检索、全文搜索到 RAG 选型指南
人工智能·elasticsearch·agent
青禾网络4 小时前
Web 前端如何接入 AI 音效生成:从零到可用的完整方案
人工智能·设计模式
用户252736278144 小时前
【技术实战】用 Spring Boot + Vue3 + LM Studio 在本地跑通 RAG 知识库
人工智能
用户5191495848454 小时前
VBScript随机数生成器内部机制:从时间种子到密码令牌破解
人工智能·aigc
米小虾5 小时前
Context Engineering —— 知识与记忆的窗口
人工智能·agent
IT_陈寒5 小时前
Python里这个赋值坑,连老司机都能翻车
前端·人工智能·后端
Shockang15 小时前
AI 设计工作流全景拆解:Figma MCP / Claude Design / Codex / Google Stitch
人工智能