egpo进行train_egpo训练时,keyvalueError:“replay_sequence_length“

def execution_plan(workers: WorkerSet,

config: TrainerConfigDict) -> LocalIterator[dict]:

if config.get("prioritized_replay"):

prio_args = {

"prioritized_replay_alpha": config["prioritized_replay_alpha"],

"prioritized_replay_beta": config["prioritized_replay_beta"],

"prioritized_replay_eps": config["prioritized_replay_eps"],

}

else:

prio_args = {}

复制代码
local_replay_buffer = LocalReplayBuffer(
    num_shards=1,
    learning_starts=config["learning_starts"],
    buffer_size=config["buffer_size"],
    replay_batch_size=config["train_batch_size"],
    replay_mode=config["multiagent"]["replay_mode"],
    #这一行需要注释掉,如果不注释掉,整个代码就跑不起来,可能是因为ray1.4.1版本没有这个参数
    # replay_sequence_length=config["replay_sequence_length"],
    **prio_args)

rollouts = ParallelRollouts(workers, mode="bulk_sync")

# Update penalty
rollouts = rollouts.for_each(UpdateSaverPenalty(workers))
# We execute the following steps concurrently:
# (1) Generate rollouts and store them in our local replay buffer. Calling
# next() on store_op drives this.
store_op = rollouts.for_each(StoreToReplayBuffer(local_buffer=local_replay_buffer))

def update_prio(item):
    samples, info_dict = item
    if config.get("prioritized_replay"):
        prio_dict = {}
        for policy_id, info in info_dict.items():
            # TODO(sven): This is currently structured differently for
            #  torch/tf. Clean up these results/info dicts across
            #  policies (note: fixing this in torch_policy.py will
            #  break e.g. DDPPO!).
            td_error = info.get("td_error",
                                info[LEARNER_STATS_KEY].get("td_error"))
            prio_dict[policy_id] = (samples.policy_batches[policy_id]
                                    .data.get("batch_indexes"), td_error)
        local_replay_buffer.update_priorities(prio_dict)
    return info_dict

# (2) Read and train on experiences from the replay buffer. Every batch
# returned from the LocalReplay() iterator is passed to TrainOneStep to
# take a SGD step, and then we decide whether to update the target network.
post_fn = config.get("before_learn_on_batch") or (lambda b, *a: b)
replay_op = Replay(local_buffer=local_replay_buffer) \
    .for_each(lambda x: post_fn(x, workers, config)) \
    .for_each(TrainOneStep(workers)) \
    .for_each(update_prio) \
    .for_each(UpdateTargetNetwork(
    workers, config["target_network_update_freq"]))

# Alternate deterministically between (1) and (2). Only return the output
# of (2) since training metrics are not available until (2) runs.
train_op = Concurrently(
    [store_op, replay_op],
    mode="round_robin",
    output_indexes=[1],
    round_robin_weights=calculate_rr_weights(config))

return StandardMetricsReporting(train_op, workers, config)
相关推荐
大江东去浪淘尽千古风流人物9 分钟前
【VLN】VLN(Vision-and-Language Navigation视觉语言导航)算法本质,范式难点及解决方向(1)
人工智能·python·算法
Swift社区10 分钟前
Gunicorn 与 Uvicorn 部署 Python 后端详解
开发语言·python·gunicorn
Coinsheep14 分钟前
SSTI-flask靶场搭建及通关
python·flask·ssti
IT实战课堂小元酱15 分钟前
大数据深度学习|计算机毕设项目|计算机毕设答辩|flask露天矿爆破效果分析系统开发及应用
人工智能·python·flask
码农阿豪16 分钟前
Flask应用上下文问题解析与解决方案:从错误日志到完美修复
后端·python·flask
wqq631085518 分钟前
Python基于Vue的实验室管理系统 django flask pycharm
vue.js·python·django
Q_Q196328847520 分钟前
python大学生爱心校园互助代购网站_nyvlx_django Flask vue pycharm项目
python·django·flask
码农阿豪22 分钟前
Python Flask应用中文件处理与异常处理的实践指南
开发语言·python·flask
xcLeigh23 分钟前
Python 项目实战:用 Flask 实现 MySQL 数据库增删改查 API
数据库·python·mysql·flask·教程·python3
威迪斯特24 分钟前
Flask:轻量级Web框架的技术本质与工程实践
前端·数据库·后端·python·flask·开发框架·核心架构