egpo进行train_egpo训练时,keyvalueError:“replay_sequence_length“

def execution_plan(workers: WorkerSet,

config: TrainerConfigDict) -> LocalIteratordict:

if config.get("prioritized_replay"):

prio_args = {

"prioritized_replay_alpha": config"prioritized_replay_alpha",

"prioritized_replay_beta": config"prioritized_replay_beta",

"prioritized_replay_eps": config"prioritized_replay_eps",

}

else:

prio_args = {}

复制代码
local_replay_buffer = LocalReplayBuffer(
    num_shards=1,
    learning_starts=config["learning_starts"],
    buffer_size=config["buffer_size"],
    replay_batch_size=config["train_batch_size"],
    replay_mode=config["multiagent"]["replay_mode"],
    #这一行需要注释掉,如果不注释掉,整个代码就跑不起来,可能是因为ray1.4.1版本没有这个参数
    # replay_sequence_length=config["replay_sequence_length"],
    **prio_args)

rollouts = ParallelRollouts(workers, mode="bulk_sync")

# Update penalty
rollouts = rollouts.for_each(UpdateSaverPenalty(workers))
# We execute the following steps concurrently:
# (1) Generate rollouts and store them in our local replay buffer. Calling
# next() on store_op drives this.
store_op = rollouts.for_each(StoreToReplayBuffer(local_buffer=local_replay_buffer))

def update_prio(item):
    samples, info_dict = item
    if config.get("prioritized_replay"):
        prio_dict = {}
        for policy_id, info in info_dict.items():
            # TODO(sven): This is currently structured differently for
            #  torch/tf. Clean up these results/info dicts across
            #  policies (note: fixing this in torch_policy.py will
            #  break e.g. DDPPO!).
            td_error = info.get("td_error",
                                info[LEARNER_STATS_KEY].get("td_error"))
            prio_dict[policy_id] = (samples.policy_batches[policy_id]
                                    .data.get("batch_indexes"), td_error)
        local_replay_buffer.update_priorities(prio_dict)
    return info_dict

# (2) Read and train on experiences from the replay buffer. Every batch
# returned from the LocalReplay() iterator is passed to TrainOneStep to
# take a SGD step, and then we decide whether to update the target network.
post_fn = config.get("before_learn_on_batch") or (lambda b, *a: b)
replay_op = Replay(local_buffer=local_replay_buffer) \
    .for_each(lambda x: post_fn(x, workers, config)) \
    .for_each(TrainOneStep(workers)) \
    .for_each(update_prio) \
    .for_each(UpdateTargetNetwork(
    workers, config["target_network_update_freq"]))

# Alternate deterministically between (1) and (2). Only return the output
# of (2) since training metrics are not available until (2) runs.
train_op = Concurrently(
    [store_op, replay_op],
    mode="round_robin",
    output_indexes=[1],
    round_robin_weights=calculate_rr_weights(config))

return StandardMetricsReporting(train_op, workers, config)
相关推荐
程序员三藏2 分钟前
Web自动化测试详解
自动化测试·软件测试·python·selenium·测试工具·职场和发展·测试用例
在放️28 分钟前
Python 爬虫 · 第三方代理接入与合规使用
开发语言·爬虫·python
财经资讯数据_灵砚智能1 小时前
基于全球经济类多源新闻的NLP情感分析与数据可视化(夜间-次晨)2026年6月14日
大数据·人工智能·python·ai·信息可视化·自然语言处理·灵砚智能
JOJO数据科学3 小时前
JupyterLab Electron 鸿蒙 PC 适配全记录:从 Python 原生崩溃到 node-static 本地工作台
python·electron·harmonyos
xufengzhu4 小时前
第三方 Python 库 redis-py + hiredis 的使用
开发语言·redis·python
llxxyy卢4 小时前
polar夏季赛部分题目
开发语言·python
闵孚龙4 小时前
PyTorch 系列 之 nn.Module:所有模型的骨架
人工智能·pytorch·python
AI玫瑰助手4 小时前
Python模块:from...import...导入指定内容
开发语言·python·信息可视化
小森林之主4 小时前
Python re 模块速查:从实战对比中掌握正则表达式
python·正则表达式·性能测试·re模块·编程实战
郭wes代码4 小时前
Win10 拒绝访问、长期关机自动维护与声音图标灰色故障解决记录
windows·python·开源