fix bug when resetting envs at different time
This commit is contained in:
parent
22e9f8de7b
commit
43e1b2ab88
8
tools.py
8
tools.py
@ -150,15 +150,15 @@ def simulate(
|
|||||||
indices = [index for index, d in enumerate(done) if d]
|
indices = [index for index, d in enumerate(done) if d]
|
||||||
results = [envs[i].reset() for i in indices]
|
results = [envs[i].reset() for i in indices]
|
||||||
results = [r() for r in results]
|
results = [r() for r in results]
|
||||||
for i in indices:
|
for index, result in zip(indices, results):
|
||||||
t = results[i].copy()
|
t = result.copy()
|
||||||
t = {k: convert(v) for k, v in t.items()}
|
t = {k: convert(v) for k, v in t.items()}
|
||||||
# action will be added to transition in add_to_cache
|
# action will be added to transition in add_to_cache
|
||||||
t["reward"] = 0.0
|
t["reward"] = 0.0
|
||||||
t["discount"] = 1.0
|
t["discount"] = 1.0
|
||||||
# initial state should be added to cache
|
# initial state should be added to cache
|
||||||
add_to_cache(cache, envs[i].id, t)
|
add_to_cache(cache, envs[index].id, t)
|
||||||
for index, result in zip(indices, results):
|
# replace obs with done by initial state
|
||||||
obs[index] = result
|
obs[index] = result
|
||||||
# step agents
|
# step agents
|
||||||
obs = {k: np.stack([o[k] for o in obs]) for k in obs[0]}
|
obs = {k: np.stack([o[k] for o in obs]) for k in obs[0]}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user