Merge pull request #7 from columbia-ai-robotics/cchi/bug_fix_eval_sample
fixed bug where only n_envs samples of metrics are used
This commit is contained in:
commit
74b6391737
@ -235,7 +235,15 @@ class BlockPushLowdimRunner(BaseLowdimRunner):
|
|||||||
prefix_counts = collections.defaultdict(lambda : 0)
|
prefix_counts = collections.defaultdict(lambda : 0)
|
||||||
|
|
||||||
log_data = dict()
|
log_data = dict()
|
||||||
for i in range(len(self.env_fns)):
|
# results reported in the paper are generated using the commented out line below
|
||||||
|
# which will only report and average metrics from first n_envs initial condition and seeds
|
||||||
|
# fortunately this won't invalidate our conclusion since
|
||||||
|
# 1. This bug only affects the variance of metrics, not their mean
|
||||||
|
# 2. All baseline methods are evaluated using the same code
|
||||||
|
# to completely reproduce reported numbers, uncomment this line:
|
||||||
|
# for i in range(len(self.env_fns)):
|
||||||
|
# and comment out this line
|
||||||
|
for i in range(n_inits):
|
||||||
seed = self.env_seeds[i]
|
seed = self.env_seeds[i]
|
||||||
prefix = self.env_prefixs[i]
|
prefix = self.env_prefixs[i]
|
||||||
this_rewards = all_rewards[i]
|
this_rewards = all_rewards[i]
|
||||||
|
@ -279,7 +279,15 @@ class KitchenLowdimRunner(BaseLowdimRunner):
|
|||||||
log_data = dict()
|
log_data = dict()
|
||||||
prefix_total_reward_map = collections.defaultdict(list)
|
prefix_total_reward_map = collections.defaultdict(list)
|
||||||
prefix_n_completed_map = collections.defaultdict(list)
|
prefix_n_completed_map = collections.defaultdict(list)
|
||||||
for i in range(len(self.env_fns)):
|
# results reported in the paper are generated using the commented out line below
|
||||||
|
# which will only report and average metrics from first n_envs initial condition and seeds
|
||||||
|
# fortunately this won't invalidate our conclusion since
|
||||||
|
# 1. This bug only affects the variance of metrics, not their mean
|
||||||
|
# 2. All baseline methods are evaluated using the same code
|
||||||
|
# to completely reproduce reported numbers, uncomment this line:
|
||||||
|
# for i in range(len(self.env_fns)):
|
||||||
|
# and comment out this line
|
||||||
|
for i in range(n_inits):
|
||||||
seed = self.env_seeds[i]
|
seed = self.env_seeds[i]
|
||||||
prefix = self.env_prefixs[i]
|
prefix = self.env_prefixs[i]
|
||||||
this_rewards = all_rewards[i]
|
this_rewards = all_rewards[i]
|
||||||
|
@ -221,7 +221,15 @@ class PushTImageRunner(BaseImageRunner):
|
|||||||
# log
|
# log
|
||||||
max_rewards = collections.defaultdict(list)
|
max_rewards = collections.defaultdict(list)
|
||||||
log_data = dict()
|
log_data = dict()
|
||||||
for i in range(len(self.env_fns)):
|
# results reported in the paper are generated using the commented out line below
|
||||||
|
# which will only report and average metrics from first n_envs initial condition and seeds
|
||||||
|
# fortunately this won't invalidate our conclusion since
|
||||||
|
# 1. This bug only affects the variance of metrics, not their mean
|
||||||
|
# 2. All baseline methods are evaluated using the same code
|
||||||
|
# to completely reproduce reported numbers, uncomment this line:
|
||||||
|
# for i in range(len(self.env_fns)):
|
||||||
|
# and comment out this line
|
||||||
|
for i in range(n_inits):
|
||||||
seed = self.env_seeds[i]
|
seed = self.env_seeds[i]
|
||||||
prefix = self.env_prefixs[i]
|
prefix = self.env_prefixs[i]
|
||||||
max_reward = np.max(all_rewards[i])
|
max_reward = np.max(all_rewards[i])
|
||||||
|
@ -243,7 +243,15 @@ class PushTKeypointsRunner(BaseLowdimRunner):
|
|||||||
# log
|
# log
|
||||||
max_rewards = collections.defaultdict(list)
|
max_rewards = collections.defaultdict(list)
|
||||||
log_data = dict()
|
log_data = dict()
|
||||||
for i in range(len(self.env_fns)):
|
# results reported in the paper are generated using the commented out line below
|
||||||
|
# which will only report and average metrics from first n_envs initial condition and seeds
|
||||||
|
# fortunately this won't invalidate our conclusion since
|
||||||
|
# 1. This bug only affects the variance of metrics, not their mean
|
||||||
|
# 2. All baseline methods are evaluated using the same code
|
||||||
|
# to completely reproduce reported numbers, uncomment this line:
|
||||||
|
# for i in range(len(self.env_fns)):
|
||||||
|
# and comment out this line
|
||||||
|
for i in range(n_inits):
|
||||||
seed = self.env_seeds[i]
|
seed = self.env_seeds[i]
|
||||||
prefix = self.env_prefixs[i]
|
prefix = self.env_prefixs[i]
|
||||||
max_reward = np.max(all_rewards[i])
|
max_reward = np.max(all_rewards[i])
|
||||||
|
@ -324,7 +324,15 @@ class RobomimicImageRunner(BaseImageRunner):
|
|||||||
# log
|
# log
|
||||||
max_rewards = collections.defaultdict(list)
|
max_rewards = collections.defaultdict(list)
|
||||||
log_data = dict()
|
log_data = dict()
|
||||||
for i in range(len(self.env_fns)):
|
# results reported in the paper are generated using the commented out line below
|
||||||
|
# which will only report and average metrics from first n_envs initial condition and seeds
|
||||||
|
# fortunately this won't invalidate our conclusion since
|
||||||
|
# 1. This bug only affects the variance of metrics, not their mean
|
||||||
|
# 2. All baseline methods are evaluated using the same code
|
||||||
|
# to completely reproduce reported numbers, uncomment this line:
|
||||||
|
# for i in range(len(self.env_fns)):
|
||||||
|
# and comment out this line
|
||||||
|
for i in range(n_inits):
|
||||||
seed = self.env_seeds[i]
|
seed = self.env_seeds[i]
|
||||||
prefix = self.env_prefixs[i]
|
prefix = self.env_prefixs[i]
|
||||||
max_reward = np.max(all_rewards[i])
|
max_reward = np.max(all_rewards[i])
|
||||||
|
@ -317,7 +317,15 @@ class RobomimicLowdimRunner(BaseLowdimRunner):
|
|||||||
# log
|
# log
|
||||||
max_rewards = collections.defaultdict(list)
|
max_rewards = collections.defaultdict(list)
|
||||||
log_data = dict()
|
log_data = dict()
|
||||||
for i in range(len(self.env_fns)):
|
# results reported in the paper are generated using the commented out line below
|
||||||
|
# which will only report and average metrics from first n_envs initial condition and seeds
|
||||||
|
# fortunately this won't invalidate our conclusion since
|
||||||
|
# 1. This bug only affects the variance of metrics, not their mean
|
||||||
|
# 2. All baseline methods are evaluated using the same code
|
||||||
|
# to completely reproduce reported numbers, uncomment this line:
|
||||||
|
# for i in range(len(self.env_fns)):
|
||||||
|
# and comment out this line
|
||||||
|
for i in range(n_inits):
|
||||||
seed = self.env_seeds[i]
|
seed = self.env_seeds[i]
|
||||||
prefix = self.env_prefixs[i]
|
prefix = self.env_prefixs[i]
|
||||||
max_reward = np.max(all_rewards[i])
|
max_reward = np.max(all_rewards[i])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user