egpo进行train_egpo训练时,keyvalueError:“replay_sequence_length“

def execution_plan(workers: WorkerSet,

config: TrainerConfigDict) -> LocalIterator[dict]:

if config.get("prioritized_replay"):

prio_args = {

"prioritized_replay_alpha": config["prioritized_replay_alpha"],

"prioritized_replay_beta": config["prioritized_replay_beta"],

"prioritized_replay_eps": config["prioritized_replay_eps"],

}

else:

prio_args = {}

复制代码
local_replay_buffer = LocalReplayBuffer(
    num_shards=1,
    learning_starts=config["learning_starts"],
    buffer_size=config["buffer_size"],
    replay_batch_size=config["train_batch_size"],
    replay_mode=config["multiagent"]["replay_mode"],
    #这一行需要注释掉,如果不注释掉,整个代码就跑不起来,可能是因为ray1.4.1版本没有这个参数
    # replay_sequence_length=config["replay_sequence_length"],
    **prio_args)

rollouts = ParallelRollouts(workers, mode="bulk_sync")

# Update penalty
rollouts = rollouts.for_each(UpdateSaverPenalty(workers))
# We execute the following steps concurrently:
# (1) Generate rollouts and store them in our local replay buffer. Calling
# next() on store_op drives this.
store_op = rollouts.for_each(StoreToReplayBuffer(local_buffer=local_replay_buffer))

def update_prio(item):
    samples, info_dict = item
    if config.get("prioritized_replay"):
        prio_dict = {}
        for policy_id, info in info_dict.items():
            # TODO(sven): This is currently structured differently for
            #  torch/tf. Clean up these results/info dicts across
            #  policies (note: fixing this in torch_policy.py will
            #  break e.g. DDPPO!).
            td_error = info.get("td_error",
                                info[LEARNER_STATS_KEY].get("td_error"))
            prio_dict[policy_id] = (samples.policy_batches[policy_id]
                                    .data.get("batch_indexes"), td_error)
        local_replay_buffer.update_priorities(prio_dict)
    return info_dict

# (2) Read and train on experiences from the replay buffer. Every batch
# returned from the LocalReplay() iterator is passed to TrainOneStep to
# take a SGD step, and then we decide whether to update the target network.
post_fn = config.get("before_learn_on_batch") or (lambda b, *a: b)
replay_op = Replay(local_buffer=local_replay_buffer) \
    .for_each(lambda x: post_fn(x, workers, config)) \
    .for_each(TrainOneStep(workers)) \
    .for_each(update_prio) \
    .for_each(UpdateTargetNetwork(
    workers, config["target_network_update_freq"]))

# Alternate deterministically between (1) and (2). Only return the output
# of (2) since training metrics are not available until (2) runs.
train_op = Concurrently(
    [store_op, replay_op],
    mode="round_robin",
    output_indexes=[1],
    round_robin_weights=calculate_rr_weights(config))

return StandardMetricsReporting(train_op, workers, config)
相关推荐
极客数模1 分钟前
【2026美赛赛题初步翻译F题】2026_ICM_Problem_F
大数据·c语言·python·数学建模·matlab
小鸡吃米…1 小时前
机器学习中的代价函数
人工智能·python·机器学习
Li emily2 小时前
如何通过外汇API平台快速实现实时数据接入?
开发语言·python·api·fastapi·美股
m0_561359673 小时前
掌握Python魔法方法(Magic Methods)
jvm·数据库·python
Ulyanov3 小时前
顶层设计——单脉冲雷达仿真器的灵魂蓝图
python·算法·pyside·仿真系统·单脉冲
2401_838472513 小时前
使用Python进行图像识别:CNN卷积神经网络实战
jvm·数据库·python
CoLiuRs4 小时前
语义搜索系统原理与实现
redis·python·向量·es
zhihuaba4 小时前
使用PyTorch构建你的第一个神经网络
jvm·数据库·python
u0109272714 小时前
Python Web爬虫入门:使用Requests和BeautifulSoup
jvm·数据库·python
Stream_Silver5 小时前
【Agent学习笔记3:使用Python开发简单MCP服务】
笔记·python