egpo进行train_egpo训练时,keyvalueError:“replay_sequence_length“

def execution_plan(workers: WorkerSet,

config: TrainerConfigDict) -> LocalIterator[dict]:

if config.get("prioritized_replay"):

prio_args = {

"prioritized_replay_alpha": config["prioritized_replay_alpha"],

"prioritized_replay_beta": config["prioritized_replay_beta"],

"prioritized_replay_eps": config["prioritized_replay_eps"],

}

else:

prio_args = {}

复制代码
local_replay_buffer = LocalReplayBuffer(
    num_shards=1,
    learning_starts=config["learning_starts"],
    buffer_size=config["buffer_size"],
    replay_batch_size=config["train_batch_size"],
    replay_mode=config["multiagent"]["replay_mode"],
    #这一行需要注释掉,如果不注释掉,整个代码就跑不起来,可能是因为ray1.4.1版本没有这个参数
    # replay_sequence_length=config["replay_sequence_length"],
    **prio_args)

rollouts = ParallelRollouts(workers, mode="bulk_sync")

# Update penalty
rollouts = rollouts.for_each(UpdateSaverPenalty(workers))
# We execute the following steps concurrently:
# (1) Generate rollouts and store them in our local replay buffer. Calling
# next() on store_op drives this.
store_op = rollouts.for_each(StoreToReplayBuffer(local_buffer=local_replay_buffer))

def update_prio(item):
    samples, info_dict = item
    if config.get("prioritized_replay"):
        prio_dict = {}
        for policy_id, info in info_dict.items():
            # TODO(sven): This is currently structured differently for
            #  torch/tf. Clean up these results/info dicts across
            #  policies (note: fixing this in torch_policy.py will
            #  break e.g. DDPPO!).
            td_error = info.get("td_error",
                                info[LEARNER_STATS_KEY].get("td_error"))
            prio_dict[policy_id] = (samples.policy_batches[policy_id]
                                    .data.get("batch_indexes"), td_error)
        local_replay_buffer.update_priorities(prio_dict)
    return info_dict

# (2) Read and train on experiences from the replay buffer. Every batch
# returned from the LocalReplay() iterator is passed to TrainOneStep to
# take a SGD step, and then we decide whether to update the target network.
post_fn = config.get("before_learn_on_batch") or (lambda b, *a: b)
replay_op = Replay(local_buffer=local_replay_buffer) \
    .for_each(lambda x: post_fn(x, workers, config)) \
    .for_each(TrainOneStep(workers)) \
    .for_each(update_prio) \
    .for_each(UpdateTargetNetwork(
    workers, config["target_network_update_freq"]))

# Alternate deterministically between (1) and (2). Only return the output
# of (2) since training metrics are not available until (2) runs.
train_op = Concurrently(
    [store_op, replay_op],
    mode="round_robin",
    output_indexes=[1],
    round_robin_weights=calculate_rr_weights(config))

return StandardMetricsReporting(train_op, workers, config)
相关推荐
Python×CATIA工业智造1 小时前
Frida RPC高级应用:动态模拟执行Android so文件实战指南
开发语言·python·pycharm
onceco1 小时前
领域LLM九讲——第5讲 为什么选择OpenManus而不是QwenAgent(附LLM免费api邀请码)
人工智能·python·深度学习·语言模型·自然语言处理·自动化
狐凄2 小时前
Python实例题:基于 Python 的简单聊天机器人
开发语言·python
悦悦子a啊3 小时前
Python之--基本知识
开发语言·前端·python
笑稀了的野生俊5 小时前
在服务器中下载 HuggingFace 模型:终极指南
linux·服务器·python·bash·gpu算力
Naiva5 小时前
【小技巧】Python+PyCharm IDE 配置解释器出错,环境配置不完整或不兼容。(小智AI、MCP、聚合数据、实时新闻查询、NBA赛事查询)
ide·python·pycharm
路来了5 小时前
Python小工具之PDF合并
开发语言·windows·python
蓝婷儿5 小时前
Python 机器学习核心入门与实战进阶 Day 3 - 决策树 & 随机森林模型实战
人工智能·python·机器学习
AntBlack6 小时前
拖了五个月 ,不当韭菜体验版算是正式发布了
前端·后端·python
.30-06Springfield6 小时前
决策树(Decision tree)算法详解(ID3、C4.5、CART)
人工智能·python·算法·决策树·机器学习