diff --git a/examples/dqn_example.py b/examples/dqn_example.py index 6998373..70c9e4b 100644 --- a/examples/dqn_example.py +++ b/examples/dqn_example.py @@ -79,4 +79,4 @@ if __name__ == '__main__': feed_dict = data_collector.next_batch(batch_size) sess.run(train_op, feed_dict=feed_dict) - print('Elapsed time: {:.1f} min'.format((time.time() - start_time) / 60)) \ No newline at end of file + print('Elapsed time: {:.1f} min'.format((time.time() - start_time) / 60)) diff --git a/examples/ppo_cartpole.py b/examples/ppo_cartpole.py index 46f7fad..e88a379 100755 --- a/examples/ppo_cartpole.py +++ b/examples/ppo_cartpole.py @@ -83,4 +83,4 @@ if __name__ == '__main__': # assigning actor to pi_old pi.update_weights() - print('Elapsed time: {:.1f} min'.format((time.time() - start_time) / 60)) \ No newline at end of file + print('Elapsed time: {:.1f} min'.format((time.time() - start_time) / 60)) diff --git a/tianshou/data/batch.py b/tianshou/data/batch.py index 1dce932..01ca78d 100644 --- a/tianshou/data/batch.py +++ b/tianshou/data/batch.py @@ -39,10 +39,10 @@ class Batch(object): if num_timesteps > 0: # YouQiaoben: finish this implementation, the following code are just from openai/baselines t = 0 - ac = self.env.action_space.sample() # not used, just so we have the datatype + ac = self._env.action_space.sample() # not used, just so we have the datatype new = True # marks if we're on first timestep of an episode if self.is_first_collect: - ob = self.env.reset() + ob = self._env.reset() self.is_first_collect = False else: ob = self.raw_data['observations'][0] # last observation! @@ -69,7 +69,7 @@ class Batch(object): actions[i] = ac prevacs[i] = prevac - ob, rew, new, _ = env.step(ac) + ob, rew, new, _ = self._env.step(ac) rewards[i] = rew cur_ep_ret += rew @@ -79,7 +79,7 @@ class Batch(object): ep_lens.append(cur_ep_len) cur_ep_ret = 0 cur_ep_len = 0 - ob = env.reset() + ob = self._env.reset() t += 1 if num_episodes > 0: # YouQiaoben: fix memory growth, both del and gc.collect() fail