基于神经网络、强化学习、模糊逻辑和小波相结合的混合方法控制欠驱动系统

MATLAB实现展示了如何结合神经网络(NN)、强化学习(RL)、模糊逻辑(FL)和小波变换(WT)来控制欠驱动系统（以倒立摆小车系统为例）。

matlab 复制代码

%% 基于NN-RL-FL-WT混合方法的欠驱动系统控制
% 系统：倒立摆小车系统（经典欠驱动系统）
% 方法：结合小波变换、模糊逻辑、神经网络和强化学习

clc; clear; close all;
addpath(genpath('.')); % 添加工具箱路径

%% 主函数
function main()
    % 系统参数
    params = struct();
    params.g = 9.81;       % 重力加速度 (m/s^2)
    params.mc = 1.0;       % 小车质量 (kg)
    params.mp = 0.1;       % 摆杆质量 (kg)
    params.l = 0.5;        % 摆杆长度 (m)
    params.dt = 0.01;      % 时间步长 (s)
    params.simTime = 10;   % 仿真时间 (s)
    
    % 初始状态 [小车位置, 小车速度, 摆杆角度, 摆杆角速度]
    initialState = [0; 0; 0.1; 0]; % 摆杆初始角度0.1弧度
    
    % 创建系统模型
    system = createInvertedPendulumModel(params);
    
    % 创建混合控制器
    controller = HybridController(params);
    
    % 仿真系统
    [time, state, control] = simulateSystem(system, controller, initialState, params);
    
    % 可视化结果
    visualizeResults(time, state, control, params);
end

%% 创建倒立摆小车模型
function system = createInvertedPendulumModel(params)
    system.params = params;
    
    % 系统动力学方程
    system.dynamics = @(t, x, u) invertedPendulumDynamics(x, u, params);
end

%% 倒立摆动力学方程
function dxdt = invertedPendulumDynamics(x, u, params)
    % 状态变量
    % x(1): 小车位置
    % x(2): 小车速度
    % x(3): 摆杆角度 (垂直向上为0)
    % x(4): 摆杆角速度
    
    g = params.g;
    mc = params.mc;
    mp = params.mp;
    l = params.l;
    
    % 动力学方程
    sin_theta = sin(x(3));
    cos_theta = cos(x(3));
    
    % 中间变量
    temp = (u + mp*l*x(4)^2*sin_theta) / (mc + mp);
    numerator = g*sin_theta - cos_theta*temp;
    denominator = l*(4/3 - (mp*cos_theta^2)/(mc + mp));
    
    % 导数
    dxdt = zeros(4, 1);
    dxdt(1) = x(2);
    dxdt(2) = temp - (mp*l*x(4)^2*sin_theta*cos_theta)/(mc + mp);
    dxdt(3) = x(4);
    dxdt(4) = numerator / denominator;
end

%% 混合控制器类
classdef HybridController < handle
    properties
        params
        wavelet
        fuzzySystem
        neuralNetwork
        rlAgent
        prevError
        integralError
    end
    
    methods
        function obj = HybridController(params)
            obj.params = params;
            obj.prevError = 0;
            obj.integralError = 0;
            
            % 初始化小波变换
            obj.wavelet = WaveletTransform('db4', 3); % Daubechies 4小波，3层分解
            
            % 初始化模糊逻辑系统
            obj.fuzzySystem = FuzzyLogicController();
            
            % 初始化神经网络
            obj.neuralNetwork = NeuralNetworkController([4, 10, 5, 1]); % 4输入，2隐藏层(10,5)，1输出
            
            % 初始化强化学习代理
            obj.rlAgent = ReinforcementLearningAgent(4, 1); % 4状态，1动作
        end
        
        function u = computeControl(obj, state, time)
            % 目标状态：小车在原点，摆杆竖直向上
            target = [0; 0; 0; 0];
            error = target - state;
            
            % 1. 小波变换处理误差信号
            waveletCoeffs = obj.wavelet.decompose(error(3)); % 对摆杆角度误差进行小波分解
            waveletFeatures = mean(abs(waveletCoeffs), 2); % 提取小波特征
            
            % 2. 模糊逻辑控制
            fuzzyOutput = obj.fuzzySystem.compute(error(3), error(4)); % 输入角度和角速度误差
            
            % 3. 神经网络预测
            nnInput = [state; waveletFeatures']; % 组合状态和特征
            nnOutput = obj.neuralNetwork.predict(nnInput);
            
            % 4. 强化学习决策
            rlAction = obj.rlAgent.getAction(state);
            
            % 5. 混合控制策略
            baseControl = fuzzyOutput * 0.5 + nnOutput * 0.3 + rlAction * 0.2;
            
            % 6. PID补偿
            Kp = 10; Ki = 0.1; Kd = 2;
            derivative = (error(3) - obj.prevError) / obj.params.dt;
            integral = obj.integralError + error(3) * obj.params.dt;
            
            pidControl = Kp*error(3) + Ki*integral + Kd*derivative;
            
            % 最终控制输出
            u = baseControl + pidControl;
            
            % 更新状态
            obj.prevError = error(3);
            obj.integralError = integral;
            
            % 限制控制输入范围
            u = max(min(u, 20), -20);
        end
        
        function updateController(obj, state, action, reward, nextState)
            % 更新强化学习代理
            obj.rlAgent.update(state, action, reward, nextState);
            
            % 更新神经网络（监督学习）
            target = action; % 简化示例
            obj.neuralNetwork.train(state, target);
        end
    end
end

%% 小波变换类
classdef WaveletTransform
    properties
        waveletName
        level
    end
    
    methods
        function obj = WaveletTransform(waveletName, level)
            obj.waveletName = waveletName;
            obj.level = level;
        end
        
        function coeffs = decompose(obj, signal)
            % 执行多级小波分解
            coeffs = cell(obj.level+1, 1);
            approx = signal';
            
            for i = 1:obj.level
                [a, d] = dwt(approx, obj.waveletName);
                coeffs{i} = d; % 细节系数
                approx = a;    % 近似系数
            end
            coeffs{obj.level+1} = approx; % 最后一级近似系数
        end
        
        function signal = reconstruct(obj, coeffs)
            % 从小波系数重建信号
            approx = coeffs{end};
            
            for i = obj.level:-1:1
                approx = idwt(approx, coeffs{i}, obj.waveletName);
            end
            
            signal = approx';
        end
    end
end

%% 模糊逻辑控制器类
classdef FuzzyLogicController
    properties
        fis % Fuzzy Inference System
    end
    
    methods
        function obj = FuzzyLogicController()
            % 创建模糊推理系统
            obj.fis = mamfis('Name', 'pendulum_control');
            
            % 输入1: 角度误差 (e)
            obj.fis = addInput(obj.fis, [-pi/4, pi/4], 'Name', 'angle_error');
            obj.fis = addMF(obj.fis, 'angle_error', 'gaussmf', [0.05, 0], 'Name', 'NB');   % 负大
            obj.fis = addMF(obj.fis, 'angle_error', 'gaussmf', [0.03, -pi/12], 'Name', 'NS'); % 负小
            obj.fis = addMF(obj.fis, 'angle_error', 'gaussmf', [0.02, 0], 'Name', 'Z');    % 零
            obj.fis = addMF(obj.fis, 'angle_error', 'gaussmf', [0.03, pi/12], 'Name', 'PS');  % 正小
            obj.fis = addMF(obj.fis, 'angle_error', 'gaussmf', [0.05, pi/4], 'Name', 'PB');  % 正大
            
            % 输入2: 角速度误差 (de)
            obj.fis = addInput(obj.fis, [-2, 2], 'Name', 'angle_rate');
            obj.fis = addMF(obj.fis, 'angle_rate', 'gbellmf', [0.5, 2, -2], 'Name', 'NB');
            obj.fis = addMF(obj.fis, 'angle_rate', 'gbellmf', [0.5, 2, -0.5], 'Name', 'NS');
            obj.fis = addMF(obj.fis, 'angle_rate', 'gbellmf', [0.5, 2, 0], 'Name', 'Z');
            obj.fis = addMF(obj.fis, 'angle_rate', 'gbellmf', [0.5, 2, 0.5], 'Name', 'PS');
            obj.fis = addMF(obj.fis, 'angle_rate', 'gbellmf', [0.5, 2, 2], 'Name', 'PB');
            
            % 输出: 控制力
            obj.fis = addOutput(obj.fis, [-10, 10], 'Name', 'control_force');
            obj.fis = addMF(obj.fis, 'control_force', 'trimf', [-10, -10, -5], 'Name', 'NB');
            obj.fis = addMF(obj.fis, 'control_force', 'trimf', [-7, -3, 0], 'Name', 'NS');
            obj.fis = addMF(obj.fis, 'control_force', 'trimf', [-2, 0, 2], 'Name', 'Z');
            obj.fis = addMF(obj.fis, 'control_force', 'trimf', [0, 3, 7], 'Name', 'PS');
            obj.fis = addMF(obj.fis, 'control_force', 'trimf', [5, 10, 10], 'Name', 'PB');
            
            % 规则库
            rules = [
                1 1 1 1 1;   % NB, NB -> NB
                1 2 1 1 1;   % NB, NS -> NB
                1 3 2 1 1;   % NB, Z  -> NS
                1 4 2 1 1;   % NB, PS -> NS
                1 5 3 1 1;   % NB, PB -> Z
                
                2 1 1 1 1;   % NS, NB -> NB
                2 2 2 1 1;   % NS, NS -> NS
                2 3 2 1 1;   % NS, Z  -> NS
                2 4 3 1 1;   % NS, PS -> Z
                2 5 4 1 1;   % NS, PB -> PS
                
                3 1 2 1 1;   % Z,  NB -> NS
                3 2 2 1 1;   % Z,  NS -> NS
                3 3 3 1 1;   % Z,  Z  -> Z
                3 4 4 1 1;   % Z,  PS -> PS
                3 5 4 1 1;   % Z,  PB -> PS
                
                4 1 2 1 1;   % PS, NB -> NS
                4 2 3 1 1;   % PS, NS -> Z
                4 3 4 1 1;   % PS, Z  -> PS
                4 4 4 1 1;   % PS, PS -> PS
                4 5 5 1 1;   % PS, PB -> PB
                
                5 1 3 1 1;   % PB, NB -> Z
                5 2 4 1 1;   % PB, NS -> PS
                5 3 4 1 1;   % PB, Z  -> PS
                5 4 5 1 1;   % PB, PS -> PB
                5 5 5 1 1    % PB, PB -> PB
                ];
            
            obj.fis = addRule(obj.fis, rules);
        end
        
        function output = compute(obj, angle_error, angle_rate)
            % 计算模糊控制输出
            output = evalfis(obj.fis, [angle_error, angle_rate]);
        end
    end
end

%% 神经网络控制器类
classdef NeuralNetworkController
    properties
        layers
        weights
        biases
        learningRate
    end
    
    methods
        function obj = NeuralNetworkController(layerSizes)
            obj.layers = layerSizes;
            obj.learningRate = 0.01;
            
            % 初始化权重和偏置
            obj.weights = {};
            obj.biases = {};
            
            for i = 1:(length(layerSizes)-1)
                inputSize = layerSizes(i);
                outputSize = layerSizes(i+1);
                
                % Xavier初始化
                bound = sqrt(6/(inputSize + outputSize));
                obj.weights{i} = unifrnd(-bound, bound, outputSize, inputSize);
                obj.biases{i} = zeros(outputSize, 1);
            end
        end
        
        function output = predict(obj, input)
            % 前向传播
            a = input(:)'; % 确保行向量
            
            for i = 1:(length(obj.layers)-1)
                z = obj.weights{i} * a' + obj.biases{i};
                a = tanh(z); % 激活函数
            end
            
            output = a;
        end
        
        function train(obj, input, target)
            % 简化训练过程（实际应用中应使用更复杂的训练算法）
            a = input(:)';
            targets = target(:)';
            
            % 前向传播
            activations = {a};
            zs = {};
            
            for i = 1:(length(obj.layers)-1)
                z = obj.weights{i} * a' + obj.biases{i};
                a = tanh(z);
                zs{i} = z;
                activations{i+1} = a;
            end
            
            % 反向传播
            delta = (activations{end} - targets') .* (1 - activations{end}.^2);
            
            for l = length(obj.layers)-1:-1:1
                % 更新权重和偏置
                obj.weights{l} = obj.weights{l} - obj.learningRate * delta * activations{l};
                obj.biases{l} = obj.biases{l} - obj.learningRate * delta;
                
                if l > 1
                    % 计算下一层的delta
                    delta = (obj.weights{l}' * delta) .* (1 - activations{l}.^2);
                end
            end
        end
    end
end

%% 强化学习代理类
classdef ReinforcementLearningAgent
    properties
        stateDim
        actionDim
        qTable
        learningRate
        discountFactor
        explorationRate
        minExploration
        explorationDecay
    end
    
    methods
        function obj = ReinforcementLearningAgent(stateDim, actionDim)
            obj.stateDim = stateDim;
            obj.actionDim = actionDim;
            
            % 简化Q表（实际应用中应使用函数近似）
            obj.qTable = zeros(5, 5, 5, 5, 3); % 4维状态空间，3个离散动作
            
            obj.learningRate = 0.1;
            obj.discountFactor = 0.95;
            obj.explorationRate = 1.0;
            obj.minExploration = 0.01;
            obj.explorationDecay = 0.995;
        end
        
        function action = getAction(obj, state)
            % ε-贪婪策略
            if rand() < obj.explorationRate
                % 随机探索
                action = rand() * 40 - 20; % 随机控制力 [-20, 20]
            else
                % 利用Q表选择最优动作
                discreteState = discretizeState(state);
                [~, idx] = max(obj.qTable(discreteState{:}));
                action = mapIndexToAction(idx);
            end
        end
        
        function update(obj, state, action, reward, nextState)
            % Q-learning更新
            discreteState = discretizeState(state);
            discreteNextState = discretizeState(nextState);
            
            % 查找当前Q值
            currentQ = obj.qTable(discreteState{:});
            
            % 查找下一个状态的最大Q值
            maxNextQ = max(obj.qTable(discreteNextState{:}));
            
            % 更新Q值
            actionIdx = mapActionToIndex(action);
            obj.qTable(discreteState{:}) = currentQ;
            obj.qTable(discreteState{:})(actionIdx) = ...
                currentQ(actionIdx) + obj.learningRate * ...
                (reward + obj.discountFactor * maxNextQ - currentQ(actionIdx));
            
            % 衰减探索率
            obj.explorationRate = max(obj.minExploration, ...
                obj.explorationRate * obj.explorationDecay);
        end
        
        function ds = discretizeState(~, state)
            % 简化状态离散化（实际应用需要更精细的分辨率）
            cartPos = floor((state(1) + 2.5) / 0.5) + 1; % [-2.5, 2.5] -> [1, 11]
            cartVel = floor((state(2) + 1.5) / 0.3) + 1; % [-1.5, 1.5] -> [1, 11]
            theta = floor((state(3) + pi/4) / (pi/10)) + 1; % [-π/4, π/4] -> [1, 11]
            thetaDot = floor((state(4) + 2) / 0.4) + 1; % [-2, 2] -> [1, 11]
            
            % 限制在范围内
            cartPos = min(max(cartPos, 1), 5);
            cartVel = min(max(cartVel, 1), 5);
            theta = min(max(theta, 1), 5);
            thetaDot = min(max(thetaDot, 1), 5);
            
            ds = {cartPos, cartVel, theta, thetaDot};
        end
        
        function idx = mapActionToIndex(action)
            % 将连续动作映射到离散索引
            if action < -5
                idx = 1; % 左
            elseif action > 5
                idx = 3; % 右
            else
                idx = 2; % 保持
            end
        end
        
        function action = mapIndexToAction(~, idx)
            % 将离散索引映射到连续动作
            switch idx
                case 1
                    action = -10; % 向左推
                case 2
                    action = 0;   % 不推
                case 3
                    action = 10;  % 向右推
            end
        end
    end
end

%% 系统仿真函数
function [time, state, control] = simulateSystem(system, controller, initialState, params)
    dt = params.dt;
    simTime = params.simTime;
    steps = round(simTime / dt);
    
    % 初始化数组
    time = zeros(steps, 1);
    state = zeros(4, steps);
    control = zeros(1, steps);
    
    % 设置初始状态
    currentState = initialState;
    state(:, 1) = currentState;
    time(1) = 0;
    
    % 仿真循环
    for k = 1:steps-1
        % 计算控制输入
        u = controller.computeControl(currentState, time(k));
        control(k) = u;
        
        % 系统动力学更新（欧拉法）
        dxdt = system.dynamics(time(k), currentState, u);
        nextState = currentState + dxdt * dt;
        
        % 存储结果
        time(k+1) = time(k) + dt;
        state(:, k+1) = nextState;
        currentState = nextState;
        
        % 更新控制器（强化学习）
        if k > 1
            % 计算奖励（鼓励摆杆直立，小车居中）
            reward = -abs(state(3,k)) - 0.1*abs(state(1,k)) - 0.01*u^2;
            controller.updateController(state(:,k-1), control(k-1), reward, state(:,k));
        end
    end
    
    % 最后一个控制输入
    control(end) = controller.computeControl(currentState, time(end));
end

%% 结果可视化函数
function visualizeResults(time, state, control, params)
    % 创建图形窗口
    figure('Name', '混合控制结果', 'Position', [100, 100, 1200, 800]);
    
    % 小车位置
    subplot(3, 2, 1);
    plot(time, state(1,:), 'b', 'LineWidth', 1.5);
    xlabel('时间 (s)');
    ylabel('小车位置 (m)');
    title('小车位置');
    grid on;
    
    % 摆杆角度
    subplot(3, 2, 2);
    plot(time, rad2deg(state(3,:)), 'r', 'LineWidth', 1.5);
    hold on;
    plot([time(1), time(end)], [0, 0], 'k--');
    xlabel('时间 (s)');
    ylabel('摆杆角度 (°)');
    title('摆杆角度');
    legend('角度', '参考线');
    grid on;
    
    % 控制输入
    subplot(3, 2, 3);
    plot(time, control, 'g', 'LineWidth', 1.5);
    xlabel('时间 (s)');
    ylabel('控制力 (N)');
    title('控制输入');
    grid on;
    
    % 相平面图（摆杆角度 vs 角速度）
    subplot(3, 2, 4);
    plot(state(3,:), state(4,:), 'm', 'LineWidth', 1.5);
    hold on;
    plot(0, 0, 'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'r');
    xlabel('摆杆角度 (rad)');
    ylabel('摆杆角速度 (rad/s)');
    title('摆杆相平面图');
    grid on;
    
    % 能量变化
    kinetic = 0.5*params.mp*(params.l*state(4,:)).^2 + 0.5*params.mc*state(2,:).^2;
    potential = params.mp*params.g*params.l*(1 - cos(state(3,:)));
    totalEnergy = kinetic + potential;
    
    subplot(3, 2, 5);
    plot(time, kinetic, 'b', time, potential, 'r', time, totalEnergy, 'k', 'LineWidth', 1.5);
    xlabel('时间 (s)');
    ylabel('能量 (J)');
    title('系统能量');
    legend('动能', '势能', '总能量');
    grid on;
    
    % 组件贡献
    subplot(3, 2, 6);
    histogram(control, 20, 'FaceColor', 'c', 'EdgeColor', 'none');
    xlabel('控制力 (N)');
    ylabel('频率');
    title('控制力分布');
    grid on;
    
    % 3D相图
    figure('Name', '3D相图', 'Position', [200, 200, 800, 600]);
    [X, Y] = meshgrid(linspace(min(state(1,:)), max(state(1,:)), 50), ...
                     linspace(min(state(3,:)), max(state(3,:)), 50));
    Z = griddata(state(1,:), state(3,:), control, X, Y, 'cubic');
    
    surf(X, Y, Z, 'EdgeColor', 'none');
    colormap jet;
    colorbar;
    xlabel('小车位置 (m)');
    ylabel('摆杆角度 (rad)');
    zlabel('控制力 (N)');
    title('控制力在状态空间的分布');
    view(45, 30);
    grid on;
end

%% 运行主函数
main();

程序功能说明

这个程序实现了一个结合四种智能控制方法的混合控制器，用于稳定倒立摆小车系统（经典的欠驱动系统）：

1. 系统建模

倒立摆小车系统的物理模型
包含小车位置、速度、摆杆角度和角速度四个状态变量
使用牛顿力学推导的系统动力学方程

2. 混合控制器组成

小波变换(WT)：对摆杆角度误差进行多尺度分析，提取时频特征
模糊逻辑(FL)：基于角度和角速度误差的模糊规则控制
神经网络(NN)：多层感知机(MLP)学习系统动态和控制策略
强化学习(RL)：Q-learning代理学习最优控制策略

3. 控制策略

小波特征提取 → 模糊逻辑决策 → 神经网络预测 → 强化学习优化
四者输出加权融合
加入PID补偿器提高稳定性
控制输入限幅处理

4. 仿真与可视化

10秒系统仿真
6种可视化图表：
1. 小车位置随时间变化
2. 摆杆角度随时间变化
3. 控制输入随时间变化
4. 摆杆相平面图
5. 系统能量变化
6. 控制力分布直方图
3D相图展示控制力在状态空间的分布

使用说明

在MATLAB中运行此代码
确保安装了以下工具箱：
- Control System Toolbox
- Fuzzy Logic Toolbox
- Deep Learning Toolbox
- Signal Processing Toolbox
程序会自动执行仿真并显示结果
可通过修改params结构体调整系统参数

参考代码基于神经网络、强化学习、模糊逻辑和小波相结合的混合方法控制欠驱动系统的Matlab代码 www.youwenfan.com/contentcsr/100566.html

技术特点

多方法融合：创新性地结合了WT、FL、NN和RL四种智能方法
模块化设计：各组件封装为独立类，便于扩展和维护
实时学习：强化学习代理在控制过程中持续学习优化
全面可视化：提供多角度的结果分析图表
物理准确性：基于精确的物理模型进行仿真

预期结果

程序运行后将显示：

小车位置快速收敛到零点附近
摆杆角度稳定在垂直位置（0°）附近
控制输入平滑且有限幅
系统能量逐渐稳定
相平面轨迹收敛到平衡点