前言
-
Unitree RL GYM是一个开源的基于 Unitree 机器人强化学习(Reinforcement Learning, RL)控制示例项目,用于训练、测试和部署四足机器人控制策略。该仓库支持多种 Unitree 机器人型号,包括Go2、H1、H1_2 和 G1。仓库地址
-
本系列将着手解析整个仓库的核心代码与算法实现和训练教程。此系列默认读者拥有一定的强化学习基础和代码基础,故在部分原理和基础代码逻辑不做解释,对强化学习基础感兴趣的读者可以阅读我的入门系列:
- 第一期: 【浅显易懂理解强化学习】(一)Q-Learning原来是查表法-CSDN博客
- 第二期: 【浅显易懂理解强化学习】(二):Sarsa,保守派的胜利-CSDN博客
- 第三期:【浅显易懂理解强化学习】(三):DQN:当查表法装上大脑-CSDN博客
- 第四期:【浅显易懂理解强化学习】(四):Policy Gradients玩转策略采样-CSDN博客
- 第五期:【浅显易懂理解强化学习】(五):Actor-Critic与A3C,多线程的完全胜利-CSDN博客
- 第六期:【浅显易懂理解强化学习】(六):DDPG与TD3集百家之长-CSDN博客
- 第七期:【浅显易懂理解强化学习】(七):PPO,策略更新的安全阀-CSDN博客
-
阅读本系列的前置知识:
python语法,明白面向对象的封装pytorch基础使用- 神经网络基础知识
- 强化学习基础知识,至少了解
Policy Gradient、Actor-Critic和PPO
-
本系列:
-
我们在第四期讲解
LeggedRobotCfg的地形参数terrain提到过一嘴,目前官方实现的代码只包含了'plane'的平地 -
本期我们就来解读一下
legged_gym/envs/base/legged_robot.py有关地形生成的代码,以及补上复杂地形实现的逻辑,最终实现的效果如图:
1 训练环境地形:平地生成逻辑
1-1 代码一栏
- 我们先来整体看看
legged_gym/envs/base/legged_robot.py中有关地形生成的全部代码
python
from legged_gym import LEGGED_GYM_ROOT_DIR, envs
import time
from warnings import WarningMessage
import numpy as np
import os
from isaacgym.torch_utils import *
from isaacgym import gymtorch, gymapi, gymutil
import torch
from torch import Tensor
from typing import Tuple, Dict
from legged_gym import LEGGED_GYM_ROOT_DIR
from legged_gym.envs.base.base_task import BaseTask
from legged_gym.utils.math import wrap_to_pi
from legged_gym.utils.isaacgym_utils import get_euler_xyz as get_euler_xyz_in_tensor
from legged_gym.utils.helpers import class_to_dict
from .legged_robot_config import LeggedRobotCfg
class LeggedRobot(BaseTask):
def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless):
""" Parses the provided config file,
calls create_sim() (which creates, simulation and environments),
initilizes pytorch buffers used during training
Args:
cfg (Dict): Environment config file
sim_params (gymapi.SimParams): simulation parameters
physics_engine (gymapi.SimType): gymapi.SIM_PHYSX (must be PhysX)
device_type (string): 'cuda' or 'cpu'
device_id (int): 0, 1, ...
headless (bool): Run without rendering if True
"""
self.cfg = cfg
self.sim_params = sim_params
self.height_samples = None
self.debug_viz = False
self.init_done = False
self._parse_cfg(self.cfg)
super().__init__(self.cfg, sim_params, physics_engine, sim_device, headless)
if not self.headless:
self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat)
self._init_buffers()
self._prepare_reward_function()
self.init_done = True
def create_sim(self):
""" Creates simulation, terrain and evironments
"""
self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
self.sim = self.gym.create_sim(self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
self._create_ground_plane()
self._create_envs()
def _create_ground_plane(self):
""" Adds a ground plane to the simulation, sets friction and restitution based on the cfg.
"""
plane_params = gymapi.PlaneParams()
plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
plane_params.static_friction = self.cfg.terrain.static_friction
plane_params.dynamic_friction = self.cfg.terrain.dynamic_friction
plane_params.restitution = self.cfg.terrain.restitution
self.gym.add_ground(self.sim, plane_params)
def _create_envs(self):
""" Creates environments:
1. loads the robot URDF/MJCF asset,
2. For each environment
2.1 creates the environment,
2.2 calls DOF and Rigid shape properties callbacks,
2.3 create actor with these properties and add them to the env
3. Store indices of different bodies of the robot
"""
asset_path = self.cfg.asset.file.format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR)
asset_root = os.path.dirname(asset_path)
asset_file = os.path.basename(asset_path)
asset_options = gymapi.AssetOptions()
asset_options.default_dof_drive_mode = self.cfg.asset.default_dof_drive_mode
asset_options.collapse_fixed_joints = self.cfg.asset.collapse_fixed_joints
asset_options.replace_cylinder_with_capsule = self.cfg.asset.replace_cylinder_with_capsule
asset_options.flip_visual_attachments = self.cfg.asset.flip_visual_attachments
asset_options.fix_base_link = self.cfg.asset.fix_base_link
asset_options.density = self.cfg.asset.density
asset_options.angular_damping = self.cfg.asset.angular_damping
asset_options.linear_damping = self.cfg.asset.linear_damping
asset_options.max_angular_velocity = self.cfg.asset.max_angular_velocity
asset_options.max_linear_velocity = self.cfg.asset.max_linear_velocity
asset_options.armature = self.cfg.asset.armature
asset_options.thickness = self.cfg.asset.thickness
asset_options.disable_gravity = self.cfg.asset.disable_gravity
robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
self.num_dof = self.gym.get_asset_dof_count(robot_asset)
self.num_bodies = self.gym.get_asset_rigid_body_count(robot_asset)
dof_props_asset = self.gym.get_asset_dof_properties(robot_asset)
rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(robot_asset)
# save body names from the asset
body_names = self.gym.get_asset_rigid_body_names(robot_asset)
self.dof_names = self.gym.get_asset_dof_names(robot_asset)
self.num_bodies = len(body_names)
self.num_dofs = len(self.dof_names)
feet_names = [s for s in body_names if self.cfg.asset.foot_name in s]
penalized_contact_names = []
for name in self.cfg.asset.penalize_contacts_on:
penalized_contact_names.extend([s for s in body_names if name in s])
termination_contact_names = []
for name in self.cfg.asset.terminate_after_contacts_on:
termination_contact_names.extend([s for s in body_names if name in s])
base_init_state_list = self.cfg.init_state.pos + self.cfg.init_state.rot + self.cfg.init_state.lin_vel + self.cfg.init_state.ang_vel
self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False)
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(*self.base_init_state[:3])
self._get_env_origins()
env_lower = gymapi.Vec3(0., 0., 0.)
env_upper = gymapi.Vec3(0., 0., 0.)
self.actor_handles = []
self.envs = []
for i in range(self.num_envs):
# create env instance
env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs)))
pos = self.env_origins[i].clone()
pos[:2] += torch_rand_float(-1., 1., (2,1), device=self.device).squeeze(1)
start_pose.p = gymapi.Vec3(*pos)
rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i)
self.gym.set_asset_rigid_shape_properties(robot_asset, rigid_shape_props)
actor_handle = self.gym.create_actor(env_handle, robot_asset, start_pose, self.cfg.asset.name, i, self.cfg.asset.self_collisions, 0)
dof_props = self._process_dof_props(dof_props_asset, i)
self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props)
body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle)
body_props = self._process_rigid_body_props(body_props, i)
self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=True)
self.envs.append(env_handle)
self.actor_handles.append(actor_handle)
self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(feet_names)):
self.feet_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], feet_names[i])
self.penalised_contact_indices = torch.zeros(len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(penalized_contact_names)):
self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], penalized_contact_names[i])
self.termination_contact_indices = torch.zeros(len(termination_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(termination_contact_names)):
self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], termination_contact_names[i])
def _get_env_origins(self):
""" Sets environment origins. On rough terrain the origins are defined by the terrain platforms.
Otherwise create a grid.
"""
self.custom_origins = False
self.env_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False)
# create a grid of robots
num_cols = np.floor(np.sqrt(self.num_envs))
num_rows = np.ceil(self.num_envs / num_cols)
xx, yy = torch.meshgrid(torch.arange(num_rows), torch.arange(num_cols))
spacing = self.cfg.env.env_spacing
self.env_origins[:, 0] = spacing * xx.flatten()[:self.num_envs]
self.env_origins[:, 1] = spacing * yy.flatten()[:self.num_envs]
self.env_origins[:, 2] = 0.
- 其中
create_sim()函数在LeggedRobot类初始化时候调用父类BaseTask的构造函数时被调用
1-2 create_sim()函数
python
def create_sim(self):
""" Creates simulation, terrain and evironments
"""
self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
self.sim = self.gym.create_sim(self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
self._create_ground_plane()
self._create_envs()
self.up_axis_idx = 2,表示Z 轴是向上,规定地面朝向和重力方向- 紧接着这里创建了
Isaac Gym Sim实例,然后调用了两个函数
1-3 _create_ground_plane()
python
def _create_ground_plane(self):
""" Adds a ground plane to the simulation, sets friction and restitution based on the cfg.
"""
plane_params = gymapi.PlaneParams()
plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
plane_params.static_friction = self.cfg.terrain.static_friction
plane_params.dynamic_friction = self.cfg.terrain.dynamic_friction
plane_params.restitution = self.cfg.terrain.restitution
self.gym.add_ground(self.sim, plane_params)
- 这个函数顾名思义就是给仿真世界加一个"地板",并定义摩擦 + 弹性
1-4 _create_envs()核心
python
def _create_envs(self):
""" Creates environments:
1. loads the robot URDF/MJCF asset,
2. For each environment
2.1 creates the environment,
2.2 calls DOF and Rigid shape properties callbacks,
2.3 create actor with these properties and add them to the env
3. Store indices of different bodies of the robot
"""
asset_path = self.cfg.asset.file.format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR)
asset_root = os.path.dirname(asset_path)
asset_file = os.path.basename(asset_path)
asset_options = gymapi.AssetOptions()
asset_options.default_dof_drive_mode = self.cfg.asset.default_dof_drive_mode
asset_options.collapse_fixed_joints = self.cfg.asset.collapse_fixed_joints
asset_options.replace_cylinder_with_capsule = self.cfg.asset.replace_cylinder_with_capsule
asset_options.flip_visual_attachments = self.cfg.asset.flip_visual_attachments
asset_options.fix_base_link = self.cfg.asset.fix_base_link
asset_options.density = self.cfg.asset.density
asset_options.angular_damping = self.cfg.asset.angular_damping
asset_options.linear_damping = self.cfg.asset.linear_damping
asset_options.max_angular_velocity = self.cfg.asset.max_angular_velocity
asset_options.max_linear_velocity = self.cfg.asset.max_linear_velocity
asset_options.armature = self.cfg.asset.armature
asset_options.thickness = self.cfg.asset.thickness
asset_options.disable_gravity = self.cfg.asset.disable_gravity
robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
self.num_dof = self.gym.get_asset_dof_count(robot_asset)
self.num_bodies = self.gym.get_asset_rigid_body_count(robot_asset)
dof_props_asset = self.gym.get_asset_dof_properties(robot_asset)
rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(robot_asset)
# save body names from the asset
body_names = self.gym.get_asset_rigid_body_names(robot_asset)
self.dof_names = self.gym.get_asset_dof_names(robot_asset)
self.num_bodies = len(body_names)
self.num_dofs = len(self.dof_names)
feet_names = [s for s in body_names if self.cfg.asset.foot_name in s]
penalized_contact_names = []
for name in self.cfg.asset.penalize_contacts_on:
penalized_contact_names.extend([s for s in body_names if name in s])
termination_contact_names = []
for name in self.cfg.asset.terminate_after_contacts_on:
termination_contact_names.extend([s for s in body_names if name in s])
base_init_state_list = self.cfg.init_state.pos + self.cfg.init_state.rot + self.cfg.init_state.lin_vel + self.cfg.init_state.ang_vel
self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False)
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(*self.base_init_state[:3])
self._get_env_origins()
env_lower = gymapi.Vec3(0., 0., 0.)
env_upper = gymapi.Vec3(0., 0., 0.)
self.actor_handles = []
self.envs = []
for i in range(self.num_envs):
# create env instance
env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs)))
pos = self.env_origins[i].clone()
pos[:2] += torch_rand_float(-1., 1., (2,1), device=self.device).squeeze(1)
start_pose.p = gymapi.Vec3(*pos)
rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i)
self.gym.set_asset_rigid_shape_properties(robot_asset, rigid_shape_props)
actor_handle = self.gym.create_actor(env_handle, robot_asset, start_pose, self.cfg.asset.name, i, self.cfg.asset.self_collisions, 0)
dof_props = self._process_dof_props(dof_props_asset, i)
self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props)
body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle)
body_props = self._process_rigid_body_props(body_props, i)
self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=True)
self.envs.append(env_handle)
self.actor_handles.append(actor_handle)
self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(feet_names)):
self.feet_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], feet_names[i])
self.penalised_contact_indices = torch.zeros(len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(penalized_contact_names)):
self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], penalized_contact_names[i])
self.termination_contact_indices = torch.zeros(len(termination_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(termination_contact_names)):
self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], termination_contact_names[i])
- 这个函数的作用:批量创建 N 个机器人环境 + 绑定物理属性 + 建立所有索引(脚、碰撞、终止)
- 下面我们一部分一部分看
1-4-1 配置机器人物理属性
python
asset_options.fix_base_link
asset_options.disable_gravity
asset_options.density
...
- 这里一开始配置机器人属性,这些属性都定义在
legged_gym/envs/base/legged_robot_config.py的asset类中
1-4-2 加载机器人模型并读取读取机器人结构信息
python
robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
self.num_dof = self.gym.get_asset_dof_count(robot_asset)
self.num_bodies = self.gym.get_asset_rigid_body_count(robot_asset)
dof_props_asset = self.gym.get_asset_dof_properties(robot_asset)
rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(robot_asset)
# save body names from the asset
body_names = self.gym.get_asset_rigid_body_names(robot_asset)
self.dof_names = self.gym.get_asset_dof_names(robot_asset)
self.num_bodies = len(body_names)
self.num_dofs = len(self.dof_names)
feet_names = [s for s in body_names if self.cfg.asset.foot_name in s]
penalized_contact_names = []
for name in self.cfg.asset.penalize_contacts_on:
penalized_contact_names.extend([s for s in body_names if name in s])
termination_contact_names = []
for name in self.cfg.asset.terminate_after_contacts_on:
termination_contact_names.extend([s for s in body_names if name in s])
- 这里先加载了机器人模型,并读取机器人结构信息
- 同时这里初始化了两个用于计算奖励的列表
penalized_contact_names,termination_contact_names。
1-4-3 初始化机器人位置(核心)
python
base_init_state_list = self.cfg.init_state.pos + self.cfg.init_state.rot + self.cfg.init_state.lin_vel + self.cfg.init_state.ang_vel
self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False)
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(*self.base_init_state[:3])
self._get_env_origins()
- 这里非常关键,定义了机器人出生时候的位姿和速度
- 其中
_get_env_origins定义如下:
python
def _get_env_origins(self):
""" Sets environment origins. On rough terrain the origins are defined by the terrain platforms.
Otherwise create a grid.
"""
self.custom_origins = False
self.env_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False)
# create a grid of robots
num_cols = np.floor(np.sqrt(self.num_envs))
num_rows = np.ceil(self.num_envs / num_cols)
xx, yy = torch.meshgrid(torch.arange(num_rows), torch.arange(num_cols))
spacing = self.cfg.env.env_spacing
self.env_origins[:, 0] = spacing * xx.flatten()[:self.num_envs]
self.env_origins[:, 1] = spacing * yy.flatten()[:self.num_envs]
self.env_origins[:, 2] = 0.
- 他的作用是给每个 env 分配一个"出生坐标",让所有机器人排成网格。
- 这个函数一会我们生成复杂地形的时候需要进行修改。
1-4-4 根据配置参数初始化环境
python
for i in range(self.num_envs):
# create env instance
env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs)))
pos = self.env_origins[i].clone()
pos[:2] += torch_rand_float(-1., 1., (2,1), device=self.device).squeeze(1)
start_pose.p = gymapi.Vec3(*pos)
rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i)
self.gym.set_asset_rigid_shape_properties(robot_asset, rigid_shape_props)
actor_handle = self.gym.create_actor(env_handle, robot_asset, start_pose, self.cfg.asset.name, i, self.cfg.asset.self_collisions, 0)
dof_props = self._process_dof_props(dof_props_asset, i)
self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props)
body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle)
body_props = self._process_rigid_body_props(body_props, i)
self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=True)
self.envs.append(env_handle)
self.actor_handles.append(actor_handle)
self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(feet_names)):
self.feet_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], feet_names[i])
self.penalised_contact_indices = torch.zeros(len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(penalized_contact_names)):
self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], penalized_contact_names[i])
self.termination_contact_indices = torch.zeros(len(termination_contact_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(termination_contact_names)):
self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], termination_contact_names[i])
- 然后这里就是根据前面配置的参数进行初始化。
1-5 小结
- 不难看出,如果我们需要自定义复杂地形,我们只需要修改:
create_sim():添加新地形_get_env_origins():修改机器人初始化函数
2 Terrain类
2-1 代码一栏
- 在正式修改函数前,我们需要看一下这个类,这是一个高度场生成类,用来生成机器人训练环境中的复杂地形(台阶、坡道、坑、石头等),并最终生成 Isaac Gym 可用的
height_samples或三角网格vertices/triangles。 - 位置:
legged_gym/utils/terrain.py
python
import numpy as np
from numpy.random import choice
from scipy import interpolate
from isaacgym import terrain_utils
from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg
class Terrain:
def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None:
self.cfg = cfg
self.num_robots = num_robots
self.type = cfg.mesh_type
if self.type in ["none", 'plane']:
return
self.env_length = cfg.terrain_length
self.env_width = cfg.terrain_width
self.proportions = [np.sum(cfg.terrain_proportions[:i+1]) for i in range(len(cfg.terrain_proportions))]
self.cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols
self.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3))
self.width_per_env_pixels = int(self.env_width / cfg.horizontal_scale)
self.length_per_env_pixels = int(self.env_length / cfg.horizontal_scale)
self.border = int(cfg.border_size/self.cfg.horizontal_scale)
self.tot_cols = int(cfg.num_cols * self.width_per_env_pixels) + 2 * self.border
self.tot_rows = int(cfg.num_rows * self.length_per_env_pixels) + 2 * self.border
self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.int16)
if cfg.curriculum:
self.curiculum()
elif cfg.selected:
self.selected_terrain()
else:
self.randomized_terrain()
self.heightsamples = self.height_field_raw
if self.type=="trimesh":
self.vertices, self.triangles = terrain_utils.convert_heightfield_to_trimesh( self.height_field_raw,
self.cfg.horizontal_scale,
self.cfg.vertical_scale,
self.cfg.slope_treshold)
def randomized_terrain(self):
for k in range(self.cfg.num_sub_terrains):
# Env coordinates in the world
(i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
choice = np.random.uniform(0, 1)
difficulty = np.random.choice([0.5, 0.75, 0.9])
terrain = self.make_terrain(choice, difficulty)
self.add_terrain_to_map(terrain, i, j)
def curiculum(self):
for j in range(self.cfg.num_cols):
for i in range(self.cfg.num_rows):
difficulty = i / self.cfg.num_rows
choice = j / self.cfg.num_cols + 0.001
terrain = self.make_terrain(choice, difficulty)
self.add_terrain_to_map(terrain, i, j)
def selected_terrain(self):
terrain_type = self.cfg.terrain_kwargs.pop('type')
for k in range(self.cfg.num_sub_terrains):
# Env coordinates in the world
(i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
terrain = terrain_utils.SubTerrain("terrain",
width=self.width_per_env_pixels,
length=self.width_per_env_pixels,
vertical_scale=self.vertical_scale,
horizontal_scale=self.horizontal_scale)
eval(terrain_type)(terrain, **self.cfg.terrain_kwargs.terrain_kwargs)
self.add_terrain_to_map(terrain, i, j)
def make_terrain(self, choice, difficulty):
terrain = terrain_utils.SubTerrain( "terrain",
width=self.width_per_env_pixels,
length=self.width_per_env_pixels,
vertical_scale=self.cfg.vertical_scale,
horizontal_scale=self.cfg.horizontal_scale)
slope = difficulty * 0.4
step_height = 0.05 + 0.18 * difficulty
discrete_obstacles_height = 0.05 + difficulty * 0.2
stepping_stones_size = 1.5 * (1.05 - difficulty)
stone_distance = 0.05 if difficulty==0 else 0.1
gap_size = 1. * difficulty
pit_depth = 1. * difficulty
if choice < self.proportions[0]:
if choice < self.proportions[0]/ 2:
slope *= -1
terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
elif choice < self.proportions[1]:
terrain_utils.pyramid_sloped_terrain(terrain, slope=slope, platform_size=3.)
terrain_utils.random_uniform_terrain(terrain, min_height=-0.05, max_height=0.05, step=0.005, downsampled_scale=0.2)
elif choice < self.proportions[3]:
if choice<self.proportions[2]:
step_height *= -1
terrain_utils.pyramid_stairs_terrain(terrain, step_width=0.31, step_height=step_height, platform_size=3.)
elif choice < self.proportions[4]:
num_rectangles = 20
rectangle_min_size = 1.
rectangle_max_size = 2.
terrain_utils.discrete_obstacles_terrain(terrain, discrete_obstacles_height, rectangle_min_size, rectangle_max_size, num_rectangles, platform_size=3.)
elif choice < self.proportions[5]:
terrain_utils.stepping_stones_terrain(terrain, stone_size=stepping_stones_size, stone_distance=stone_distance, max_height=0., platform_size=4.)
elif choice < self.proportions[6]:
gap_terrain(terrain, gap_size=gap_size, platform_size=3.)
else:
pit_terrain(terrain, depth=pit_depth, platform_size=4.)
return terrain
def add_terrain_to_map(self, terrain, row, col):
i = row
j = col
# map coordinate system
start_x = self.border + i * self.length_per_env_pixels
end_x = self.border + (i + 1) * self.length_per_env_pixels
start_y = self.border + j * self.width_per_env_pixels
end_y = self.border + (j + 1) * self.width_per_env_pixels
self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw
env_origin_x = (i + 0.5) * self.env_length
env_origin_y = (j + 0.5) * self.env_width
x1 = int((self.env_length/2. - 1) / terrain.horizontal_scale)
x2 = int((self.env_length/2. + 1) / terrain.horizontal_scale)
y1 = int((self.env_width/2. - 1) / terrain.horizontal_scale)
y2 = int((self.env_width/2. + 1) / terrain.horizontal_scale)
env_origin_z = np.max(terrain.height_field_raw[x1:x2, y1:y2])*terrain.vertical_scale
self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z]
def gap_terrain(terrain, gap_size, platform_size=1.):
gap_size = int(gap_size / terrain.horizontal_scale)
platform_size = int(platform_size / terrain.horizontal_scale)
center_x = terrain.length // 2
center_y = terrain.width // 2
x1 = (terrain.length - platform_size) // 2
x2 = x1 + gap_size
y1 = (terrain.width - platform_size) // 2
y2 = y1 + gap_size
terrain.height_field_raw[center_x-x2 : center_x + x2, center_y-y2 : center_y + y2] = -1000
terrain.height_field_raw[center_x-x1 : center_x + x1, center_y-y1 : center_y + y1] = 0
def pit_terrain(terrain, depth, platform_size=1.):
depth = int(depth / terrain.vertical_scale)
platform_size = int(platform_size / terrain.horizontal_scale / 2)
x1 = terrain.length // 2 - platform_size
x2 = terrain.length // 2 + platform_size
y1 = terrain.width // 2 - platform_size
y2 = terrain.width // 2 + platform_size
terrain.height_field_raw[x1:x2, y1:y2] = -depth
2-2 初始化函数
python
def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None:
self.cfg = cfg
self.num_robots = num_robots
self.type = cfg.mesh_type
if self.type in ["none", 'plane']:
return
self.env_length = cfg.terrain_length
self.env_width = cfg.terrain_width
self.proportions = [np.sum(cfg.terrain_proportions[:i+1]) for i in range(len(cfg.terrain_proportions))]
self.cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols
self.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3))
self.width_per_env_pixels = int(self.env_width / cfg.horizontal_scale)
self.length_per_env_pixels = int(self.env_length / cfg.horizontal_scale)
self.border = int(cfg.border_size/self.cfg.horizontal_scale)
self.tot_cols = int(cfg.num_cols * self.width_per_env_pixels) + 2 * self.border
self.tot_rows = int(cfg.num_rows * self.length_per_env_pixels) + 2 * self.border
self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.int16)
if cfg.curriculum:
self.curiculum()
elif cfg.selected:
self.selected_terrain()
else:
self.randomized_terrain()
self.heightsamples = self.height_field_raw
if self.type=="trimesh":
self.vertices, self.triangles =
2-1-1 函数输入与初始化
python
def __init__(self, cfg: LeggedRobotCfg.terrain, num_robots) -> None:
self.cfg = cfg
self.num_robots = num_robots
self.type = cfg.mesh_type
if self.type in ["none", 'plane']:
return
cfg:地形配置(高度、宽度、水平/垂直比例、台阶/坡比例等)num_robots:用于训练的机器人数量
2-1-2 计算整个训练环境大小
python
self.env_length = cfg.terrain_length
self.env_width = cfg.terrain_width
self.proportions = [np.sum(cfg.terrain_proportions[:i+1]) for i in range(len(cfg.terrain_proportions))]
self.proportions用于决定生成地形的类型(坡、台阶、坑等)的概率累积和
2-1-3 地形初始化计算
python
# Patch 与总格子数计算
self.cfg.num_sub_terrains = cfg.num_rows * cfg.num_cols # 总 patch 数
self.env_origins = np.zeros((cfg.num_rows, cfg.num_cols, 3))# 每个 patch 在世界坐标系的中心位置
# 每个 patch 的分辨率(以格子数计)
# 每个 patch 的宽度/长度的格子数 = 实际尺寸 ÷ 水平比例(horizontal_scale)
self.width_per_env_pixels = int(self.env_width / cfg.horizontal_scale)
self.length_per_env_pixels = int(self.env_length / cfg.horizontal_scale)
# 边界格子
# 为 heightfield 增加边界格子,避免机器人掉出训练区
self.border = int(cfg.border_size / self.cfg.horizontal_scale)
# 总格子数(最终 heightfield 的尺寸)
self.tot_cols = int(cfg.num_cols * self.width_per_env_pixels) + 2 * self.border
self.tot_rows = int(cfg.num_rows * self.length_per_env_pixels) + 2 * self.border
# 高度场数组初始化
# 存放整个训练环境的高度值(整数,单位是格子高度)
self.height_field_raw = np.zeros((self.tot_rows, self.tot_cols), dtype=np.int16)
2-1-4 地形生成策略
python
if cfg.curriculum:
self.curiculum()
elif cfg.selected:
self.selected_terrain()
else:
self.randomized_terrain()
curriculum→ 按列/行逐步增加难度selected→ 用户指定特定地形类型randomized→ 随机生成
2-2 curiculum()
python
def curiculum(self):
for j in range(self.cfg.num_cols):
for i in range(self.cfg.num_rows):
difficulty = i / self.cfg.num_rows
choice = j / self.cfg.num_cols + 0.001
terrain = self.make_terrain(choice, difficulty)
self.add_terrain_to_map(terrain, i, j)
- 按列/行顺序生成地形
- 难度随行数递增
- choice 决定地形类型
- 用于训练逐渐增加难度
2-3 selected_terrain()
python
def selected_terrain(self):
terrain_type = self.cfg.terrain_kwargs.pop('type')
for k in range(self.cfg.num_sub_terrains):
# Env coordinates in the world
(i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
terrain = terrain_utils.SubTerrain("terrain",
width=self.width_per_env_pixels,
length=self.width_per_env_pixels,
vertical_scale=self.vertical_scale,
horizontal_scale=self.horizontal_scale)
eval(terrain_type)(terrain, **self.cfg.terrain_kwargs.terrain_kwargs)
self.add_terrain_to_map(terrain, i, j)
- 指定地形类型和参数,生成固定 patch
2-4 randomized_terrain()
python
def randomized_terrain(self):
for k in range(self.cfg.num_sub_terrains):
# Env coordinates in the world
(i, j) = np.unravel_index(k, (self.cfg.num_rows, self.cfg.num_cols))
choice = np.random.uniform(0, 1)
difficulty = np.random.choice([0.5, 0.75, 0.9])
terrain = self.make_terrain(choice, difficulty)
self.add_terrain_to_map(terrain, i, j)
- 遍历每个 patch(按行列展开)
- 随机选择地形类型和难度
- 调用
make_terrain()生成单个 patch - 调用
add_terrain_to_map()拼接到总 heightfield
3 自定义复杂地形
3-1 地形生成代码一栏
3-1-1 自定义地形
- 我们在
legged_gym/envs/base/legged_robot.py添加如下函数
python
def _create_heightfield(self):
""" Adds a heightfield terrain to the simulation, sets parameters based on the cfg.
"""
hf_params = gymapi.HeightFieldParams()
hf_params.column_scale = self.terrain.cfg.horizontal_scale
hf_params.row_scale = self.terrain.cfg.horizontal_scale
hf_params.vertical_scale = self.terrain.cfg.vertical_scale
hf_params.nbRows = self.terrain.tot_rows
hf_params.nbColumns = self.terrain.tot_cols
hf_params.transform.p.x = -self.terrain.cfg.border_size
hf_params.transform.p.y = -self.terrain.cfg.border_size
hf_params.transform.p.z = 0.0
hf_params.static_friction = self.cfg.terrain.static_friction
hf_params.dynamic_friction = self.cfg.terrain.dynamic_friction
hf_params.restitution = self.cfg.terrain.restitution
self.gym.add_heightfield(self.sim, self.terrain.heightsamples, hf_params)
self.height_samples = torch.tensor(self.terrain.heightsamples).view(self.terrain.tot_rows, self.terrain.tot_cols).to(self.device)
def _create_trimesh(self):
""" Adds a triangle mesh terrain to the simulation, sets parameters based on the cfg.
"""
tm_params = gymapi.TriangleMeshParams()
tm_params.nb_vertices = self.terrain.vertices.shape[0]
tm_params.nb_triangles = self.terrain.triangles.shape[0]
tm_params.transform.p.x = -self.terrain.cfg.border_size
tm_params.transform.p.y = -self.terrain.cfg.border_size
tm_params.transform.p.z = 0.0
tm_params.static_friction = self.cfg.terrain.static_friction
tm_params.dynamic_friction = self.cfg.terrain.dynamic_friction
tm_params.restitution = self.cfg.terrain.restitution
self.gym.add_triangle_mesh(self.sim, self.terrain.vertices.flatten(order='C'), self.terrain.triangles.flatten(order='C'), tm_params)
self.height_samples = torch.tensor(self.terrain.heightsamples).view(self.terrain.tot_rows, self.terrain.tot_cols).to(self.device)
-
注意
nbRows和nbColumns要和tot_rows以及tot_cols对上,不然会生成这样的地:
-
正常的应该是这样的

3-1-2 修改生成函数
- 同时我们修改
create_sim函数:
python
def create_sim(self):
""" Creates simulation, terrain and evironments
"""
# self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
# self.sim = self.gym.create_sim(self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
# self._create_ground_plane()
# self._create_envs()
self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
self.sim = self.gym.create_sim(self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
mesh_type = self.cfg.terrain.mesh_type
if mesh_type in ['heightfield', 'trimesh']:
self.terrain = Terrain(self.cfg.terrain, self.num_envs)
if mesh_type in ['heightfield', 'trimesh']:
self.terrain = Terrain(self.cfg.terrain, self.num_envs)
if mesh_type=='plane':
self._create_ground_plane()
elif mesh_type=='heightfield':
self._create_heightfield()
elif mesh_type=='trimesh':
self._create_trimesh()
elif mesh_type is not None:
raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
self._create_envs()
- 注意我们需要再这里添加头文集
python
from legged_gym.utils.terrain import Terrain
3-1-3 修改配置项
- 同时我们需要修改
legged_gym/envs/base/legged_robot_config.py
python
class LeggedRobotCfg(BaseConfig):
class terrain:
mesh_type = 'heightfield' # "heightfield" # none, plane, heightfield or trimesh
3-1-4 测试
-
修改好代码后,我们运行
train.py,可以看到新的地形已经被生成,但是机器人生产并没有在地形中心
-
那么接下来我们需要修改
_get_env_origins()
3-2 修改机器人初始化位姿
legged_gym/envs/base/legged_robot.py- 这里我们直接根据地形数据,吧机器人初始化在地形中心即可
- 注:这里的x,y坐标需要翻转(
python
def _get_env_origins(self):
self.custom_origins = False
self.env_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False)
if hasattr(self, 'terrain') and self.terrain.type in ['heightfield', 'trimesh']:
env_count = 0
num_cols = self.terrain.cfg.num_cols
num_rows = self.terrain.cfg.num_rows
assert self.num_envs == num_rows * num_cols, \
f"num_envs ({self.num_envs}) must equal num_rows*num_cols ({num_rows*num_cols}) for terrain placement"
for i in range(num_rows):
for j in range(num_cols):
if env_count >= self.num_envs:
break
x = self.terrain.env_origins[i, j, 1]
y = self.terrain.env_origins[i, j, 0]
z = self.terrain.env_origins[i, j, 2]
self.env_origins[env_count] = torch.tensor([x, y, z], device=self.device)
env_count += 1
else:
num_cols = int(np.floor(np.sqrt(self.num_envs)))
num_rows = int(np.ceil(self.num_envs / num_cols))
xx, yy = torch.meshgrid(torch.arange(num_rows), torch.arange(num_cols), indexing='ij') # 行优先
spacing = self.cfg.env.env_spacing
self.env_origins[:, 0] = yy.flatten()[:self.num_envs] * spacing # x
self.env_origins[:, 1] = xx.flatten()[:self.num_envs] * spacing # y
self.env_origins[:, 2] = 0.0
- 值得一提的是我们为地形数据加入了断言限制
python
assert self.num_envs == num_rows * num_cols, \
f"num_envs ({self.num_envs}) must equal num_rows*num_cols ({num_rows*num_cols}) for terrain placement"
- 这要确保你在配置环境参数和地形参数时候需要保持数量一致:
legged_gym/envs/base/legged_robot_config.py
python
class LeggedRobotCfg(BaseConfig):
class env:
num_envs = 100
class terrain:
num_rows= 10 # number of terrain rows (levels)
num_cols = 10 # number of terrain cols (types)
- 你需要保证
num_rows*num_cols=num_envs - 当然你在使用
play.py的时候请务必也设置这样
3-3 测试
-
修改好代码后再次运行
train.py
-
可以看到每个机器狗都在对应的环境内进行训练,且每次
reset都能回到原地
小结
- 本期我们分析了
LeggedRobot类的环境创建逻辑,并且修改代码实现了复杂地形的生成以及对应地形机器人的初始化。 - 下一期我们将开始修改奖励函数来适应复杂地形的训练(已经拖了两期实在抱歉,前置的工作需要做完先)
- 如有错误,欢迎指出!感谢观看
参考
- 本文部分代码参考修改环境训练不生效 #66(注:里头的代码有误,如需要达成本文效果请务必跟着本文走)
