fixed bug where only n_envs samples of metrics are used

This commit is contained in:
Cheng Chi 2023-06-01 11:08:12 -04:00
parent 27395b7500
commit 5e36d50603
6 changed files with 54 additions and 6 deletions

View File

@ -235,7 +235,15 @@ class BlockPushLowdimRunner(BaseLowdimRunner):
prefix_counts = collections.defaultdict(lambda : 0) prefix_counts = collections.defaultdict(lambda : 0)
log_data = dict() log_data = dict()
for i in range(len(self.env_fns)): # results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i] seed = self.env_seeds[i]
prefix = self.env_prefixs[i] prefix = self.env_prefixs[i]
this_rewards = all_rewards[i] this_rewards = all_rewards[i]

View File

@ -279,7 +279,15 @@ class KitchenLowdimRunner(BaseLowdimRunner):
log_data = dict() log_data = dict()
prefix_total_reward_map = collections.defaultdict(list) prefix_total_reward_map = collections.defaultdict(list)
prefix_n_completed_map = collections.defaultdict(list) prefix_n_completed_map = collections.defaultdict(list)
for i in range(len(self.env_fns)): # results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i] seed = self.env_seeds[i]
prefix = self.env_prefixs[i] prefix = self.env_prefixs[i]
this_rewards = all_rewards[i] this_rewards = all_rewards[i]

View File

@ -221,7 +221,15 @@ class PushTImageRunner(BaseImageRunner):
# log # log
max_rewards = collections.defaultdict(list) max_rewards = collections.defaultdict(list)
log_data = dict() log_data = dict()
for i in range(len(self.env_fns)): # results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i] seed = self.env_seeds[i]
prefix = self.env_prefixs[i] prefix = self.env_prefixs[i]
max_reward = np.max(all_rewards[i]) max_reward = np.max(all_rewards[i])

View File

@ -243,7 +243,15 @@ class PushTKeypointsRunner(BaseLowdimRunner):
# log # log
max_rewards = collections.defaultdict(list) max_rewards = collections.defaultdict(list)
log_data = dict() log_data = dict()
for i in range(len(self.env_fns)): # results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i] seed = self.env_seeds[i]
prefix = self.env_prefixs[i] prefix = self.env_prefixs[i]
max_reward = np.max(all_rewards[i]) max_reward = np.max(all_rewards[i])

View File

@ -324,7 +324,15 @@ class RobomimicImageRunner(BaseImageRunner):
# log # log
max_rewards = collections.defaultdict(list) max_rewards = collections.defaultdict(list)
log_data = dict() log_data = dict()
for i in range(len(self.env_fns)): # results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i] seed = self.env_seeds[i]
prefix = self.env_prefixs[i] prefix = self.env_prefixs[i]
max_reward = np.max(all_rewards[i]) max_reward = np.max(all_rewards[i])

View File

@ -317,7 +317,15 @@ class RobomimicLowdimRunner(BaseLowdimRunner):
# log # log
max_rewards = collections.defaultdict(list) max_rewards = collections.defaultdict(list)
log_data = dict() log_data = dict()
for i in range(len(self.env_fns)): # results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i] seed = self.env_seeds[i]
prefix = self.env_prefixs[i] prefix = self.env_prefixs[i]
max_reward = np.max(all_rewards[i]) max_reward = np.max(all_rewards[i])