diff --git a/diffusion_policy/env_runner/blockpush_lowdim_runner.py b/diffusion_policy/env_runner/blockpush_lowdim_runner.py index b3f31bf..5193daf 100644 --- a/diffusion_policy/env_runner/blockpush_lowdim_runner.py +++ b/diffusion_policy/env_runner/blockpush_lowdim_runner.py @@ -235,7 +235,15 @@ class BlockPushLowdimRunner(BaseLowdimRunner): prefix_counts = collections.defaultdict(lambda : 0) log_data = dict() - for i in range(len(self.env_fns)): + # results reported in the paper are generated using the commented out line below + # which will only report and average metrics from first n_envs initial condition and seeds + # fortunately this won't invalidate our conclusion since + # 1. This bug only affects the variance of metrics, not their mean + # 2. All baseline methods are evaluated using the same code + # to completely reproduce reported numbers, uncomment this line: + # for i in range(len(self.env_fns)): + # and comment out this line + for i in range(n_inits): seed = self.env_seeds[i] prefix = self.env_prefixs[i] this_rewards = all_rewards[i] diff --git a/diffusion_policy/env_runner/kitchen_lowdim_runner.py b/diffusion_policy/env_runner/kitchen_lowdim_runner.py index 417294b..39f5623 100644 --- a/diffusion_policy/env_runner/kitchen_lowdim_runner.py +++ b/diffusion_policy/env_runner/kitchen_lowdim_runner.py @@ -279,7 +279,15 @@ class KitchenLowdimRunner(BaseLowdimRunner): log_data = dict() prefix_total_reward_map = collections.defaultdict(list) prefix_n_completed_map = collections.defaultdict(list) - for i in range(len(self.env_fns)): + # results reported in the paper are generated using the commented out line below + # which will only report and average metrics from first n_envs initial condition and seeds + # fortunately this won't invalidate our conclusion since + # 1. This bug only affects the variance of metrics, not their mean + # 2. All baseline methods are evaluated using the same code + # to completely reproduce reported numbers, uncomment this line: + # for i in range(len(self.env_fns)): + # and comment out this line + for i in range(n_inits): seed = self.env_seeds[i] prefix = self.env_prefixs[i] this_rewards = all_rewards[i] diff --git a/diffusion_policy/env_runner/pusht_image_runner.py b/diffusion_policy/env_runner/pusht_image_runner.py index 3c7fa09..f65c06a 100644 --- a/diffusion_policy/env_runner/pusht_image_runner.py +++ b/diffusion_policy/env_runner/pusht_image_runner.py @@ -221,7 +221,15 @@ class PushTImageRunner(BaseImageRunner): # log max_rewards = collections.defaultdict(list) log_data = dict() - for i in range(len(self.env_fns)): + # results reported in the paper are generated using the commented out line below + # which will only report and average metrics from first n_envs initial condition and seeds + # fortunately this won't invalidate our conclusion since + # 1. This bug only affects the variance of metrics, not their mean + # 2. All baseline methods are evaluated using the same code + # to completely reproduce reported numbers, uncomment this line: + # for i in range(len(self.env_fns)): + # and comment out this line + for i in range(n_inits): seed = self.env_seeds[i] prefix = self.env_prefixs[i] max_reward = np.max(all_rewards[i]) diff --git a/diffusion_policy/env_runner/pusht_keypoints_runner.py b/diffusion_policy/env_runner/pusht_keypoints_runner.py index b9c107c..a16bd58 100644 --- a/diffusion_policy/env_runner/pusht_keypoints_runner.py +++ b/diffusion_policy/env_runner/pusht_keypoints_runner.py @@ -243,7 +243,15 @@ class PushTKeypointsRunner(BaseLowdimRunner): # log max_rewards = collections.defaultdict(list) log_data = dict() - for i in range(len(self.env_fns)): + # results reported in the paper are generated using the commented out line below + # which will only report and average metrics from first n_envs initial condition and seeds + # fortunately this won't invalidate our conclusion since + # 1. This bug only affects the variance of metrics, not their mean + # 2. All baseline methods are evaluated using the same code + # to completely reproduce reported numbers, uncomment this line: + # for i in range(len(self.env_fns)): + # and comment out this line + for i in range(n_inits): seed = self.env_seeds[i] prefix = self.env_prefixs[i] max_reward = np.max(all_rewards[i]) diff --git a/diffusion_policy/env_runner/robomimic_image_runner.py b/diffusion_policy/env_runner/robomimic_image_runner.py index ade77ea..cbae74e 100644 --- a/diffusion_policy/env_runner/robomimic_image_runner.py +++ b/diffusion_policy/env_runner/robomimic_image_runner.py @@ -324,7 +324,15 @@ class RobomimicImageRunner(BaseImageRunner): # log max_rewards = collections.defaultdict(list) log_data = dict() - for i in range(len(self.env_fns)): + # results reported in the paper are generated using the commented out line below + # which will only report and average metrics from first n_envs initial condition and seeds + # fortunately this won't invalidate our conclusion since + # 1. This bug only affects the variance of metrics, not their mean + # 2. All baseline methods are evaluated using the same code + # to completely reproduce reported numbers, uncomment this line: + # for i in range(len(self.env_fns)): + # and comment out this line + for i in range(n_inits): seed = self.env_seeds[i] prefix = self.env_prefixs[i] max_reward = np.max(all_rewards[i]) diff --git a/diffusion_policy/env_runner/robomimic_lowdim_runner.py b/diffusion_policy/env_runner/robomimic_lowdim_runner.py index 7246943..f3ba642 100644 --- a/diffusion_policy/env_runner/robomimic_lowdim_runner.py +++ b/diffusion_policy/env_runner/robomimic_lowdim_runner.py @@ -317,7 +317,15 @@ class RobomimicLowdimRunner(BaseLowdimRunner): # log max_rewards = collections.defaultdict(list) log_data = dict() - for i in range(len(self.env_fns)): + # results reported in the paper are generated using the commented out line below + # which will only report and average metrics from first n_envs initial condition and seeds + # fortunately this won't invalidate our conclusion since + # 1. This bug only affects the variance of metrics, not their mean + # 2. All baseline methods are evaluated using the same code + # to completely reproduce reported numbers, uncomment this line: + # for i in range(len(self.env_fns)): + # and comment out this line + for i in range(n_inits): seed = self.env_seeds[i] prefix = self.env_prefixs[i] max_reward = np.max(all_rewards[i])