added minecraft environment
This commit is contained in:
		
							parent
							
								
									8fa2274cfc
								
							
						
					
					
						commit
						036e9a8028
					
				| @ -172,6 +172,15 @@ atari100k: | ||||
|   imag_gradient: 'reinforce' | ||||
|   time_limit: 108000 | ||||
| 
 | ||||
| minecraft: | ||||
|   task: minecraft_diamond | ||||
|   break_speed: 100.0 | ||||
|   envs: 16 | ||||
|   train_ratio: 16 | ||||
|   log_keys_max: '^log_inventory.*' | ||||
|   encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|reward', cnn_keys: 'image'} | ||||
|   decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: 'image'} | ||||
|   time_limit: 36000 | ||||
| 
 | ||||
| debug: | ||||
|   debug: True | ||||
|  | ||||
| @ -218,9 +218,12 @@ def make_env(config, logger, mode, train_eps, eval_eps): | ||||
|         env = wrappers.OneHotAction(env) | ||||
|     elif suite == "crafter": | ||||
|         import envs.crafter as crafter | ||||
| 
 | ||||
|         env = crafter.Crafter(task, config.size) | ||||
|         env = wrappers.OneHotAction(env) | ||||
|     elif suite == "minecraft": | ||||
|         import envs.minecraft as minecraft | ||||
|         env = minecraft.make_env(task, size=config.size, break_speed=config.break_speed) | ||||
|         env = wrappers.OneHotAction(env) | ||||
|     else: | ||||
|         raise NotImplementedError(suite) | ||||
|     env = wrappers.TimeLimit(env, config.time_limit) | ||||
|  | ||||
							
								
								
									
										154
									
								
								envs/minecraft.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										154
									
								
								envs/minecraft.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,154 @@ | ||||
| import numpy as np | ||||
| from . import minecraft_base | ||||
| 
 | ||||
| import gym | ||||
| 
 | ||||
| def make_env(task, *args, **kwargs): | ||||
|     return { | ||||
|         'wood': MinecraftWood, | ||||
|         'climb': MinecraftClimb, | ||||
|         'diamond': MinecraftDiamond, | ||||
|         }[task](*args, **kwargs) | ||||
| 
 | ||||
| 
 | ||||
| class MinecraftWood: | ||||
| 
 | ||||
|   def __init__(self, *args, **kwargs): | ||||
|     actions = BASIC_ACTIONS | ||||
|     self.rewards = [ | ||||
|         CollectReward('log', repeated=1), | ||||
|         HealthReward(), | ||||
|     ] | ||||
|     env = minecraft_base.MinecraftBase(actions, *args, **kwargs) | ||||
| 
 | ||||
|   def step(self, action): | ||||
|     obs, reward, done, info = self.env.step(action) | ||||
|     reward = sum([fn(obs, self.env.inventory) for fn in self.rewards]) | ||||
|     obs['reward'] = reward | ||||
|     return obs, reward, done, info | ||||
| 
 | ||||
| 
 | ||||
| class MinecraftClimb: | ||||
| 
 | ||||
|   def __init__(self, *args, **kwargs): | ||||
|     actions = BASIC_ACTIONS | ||||
|     env = minecraft_base.MinecraftBase(actions, *args, **kwargs) | ||||
|     self._previous = None | ||||
|     self._health_reward = HealthReward() | ||||
| 
 | ||||
|   def step(self, action): | ||||
|     obs, reward, done, info = self.env.step(action) | ||||
|     x, y, z = obs['log_player_pos'] | ||||
|     height = np.float32(y) | ||||
|     if obs['is_first']: | ||||
|       self._previous = height | ||||
|     reward = height - self._previous | ||||
|     reward += self._health_reward(obs) | ||||
|     obs['reward'] = reward | ||||
|     self._previous = height | ||||
|     return obs, reward, done, info | ||||
| 
 | ||||
| 
 | ||||
| class MinecraftDiamond(gym.Wrapper): | ||||
| 
 | ||||
|   def __init__(self, *args, **kwargs): | ||||
|     actions = { | ||||
|         **BASIC_ACTIONS, | ||||
|         'craft_planks': dict(craft='planks'), | ||||
|         'craft_stick': dict(craft='stick'), | ||||
|         'craft_crafting_table': dict(craft='crafting_table'), | ||||
|         'place_crafting_table': dict(place='crafting_table'), | ||||
|         'craft_wooden_pickaxe': dict(nearbyCraft='wooden_pickaxe'), | ||||
|         'craft_stone_pickaxe': dict(nearbyCraft='stone_pickaxe'), | ||||
|         'craft_iron_pickaxe': dict(nearbyCraft='iron_pickaxe'), | ||||
|         'equip_stone_pickaxe': dict(equip='stone_pickaxe'), | ||||
|         'equip_wooden_pickaxe': dict(equip='wooden_pickaxe'), | ||||
|         'equip_iron_pickaxe': dict(equip='iron_pickaxe'), | ||||
|         'craft_furnace': dict(nearbyCraft='furnace'), | ||||
|         'place_furnace': dict(place='furnace'), | ||||
|         'smelt_iron_ingot': dict(nearbySmelt='iron_ingot'), | ||||
|     } | ||||
|     self.rewards = [ | ||||
|         CollectReward('log', once=1), | ||||
|         CollectReward('planks', once=1), | ||||
|         CollectReward('stick', once=1), | ||||
|         CollectReward('crafting_table', once=1), | ||||
|         CollectReward('wooden_pickaxe', once=1), | ||||
|         CollectReward('cobblestone', once=1), | ||||
|         CollectReward('stone_pickaxe', once=1), | ||||
|         CollectReward('iron_ore', once=1), | ||||
|         CollectReward('furnace', once=1), | ||||
|         CollectReward('iron_ingot', once=1), | ||||
|         CollectReward('iron_pickaxe', once=1), | ||||
|         CollectReward('diamond', once=1), | ||||
|         HealthReward(), | ||||
|     ] | ||||
|     env = minecraft_base.MinecraftBase(actions, *args, **kwargs) | ||||
|     super().__init__(env) | ||||
| 
 | ||||
|   def step(self, action): | ||||
|     obs, reward, done, info  = self.env.step(action) | ||||
|     reward = sum([fn(obs, self.env.inventory) for fn in self.rewards]) | ||||
|     obs['reward'] = reward | ||||
|     return obs, reward, done, info | ||||
| 
 | ||||
|   def reset(self): | ||||
|     obs = self.env.reset() | ||||
|     # called for reset of reward calculations | ||||
|     _ = sum([fn(obs, self.env.inventory) for fn in self.rewards]) | ||||
|     return obs | ||||
| 
 | ||||
| 
 | ||||
| class CollectReward: | ||||
| 
 | ||||
|   def __init__(self, item, once=0, repeated=0): | ||||
|     self.item = item | ||||
|     self.once = once | ||||
|     self.repeated = repeated | ||||
|     self.previous = 0 | ||||
|     self.maximum = 0 | ||||
| 
 | ||||
|   def __call__(self, obs, inventory): | ||||
|     current = inventory[self.item] | ||||
|     if obs['is_first']: | ||||
|       self.previous = current | ||||
|       self.maximum = current | ||||
|       return 0 | ||||
|     reward = self.repeated * max(0, current - self.previous) | ||||
|     if self.maximum == 0 and current > 0: | ||||
|       reward += self.once | ||||
|     self.previous = current | ||||
|     self.maximum = max(self.maximum, current) | ||||
|     return reward | ||||
| 
 | ||||
| 
 | ||||
| class HealthReward: | ||||
| 
 | ||||
|   def __init__(self, scale=0.01): | ||||
|     self.scale = scale | ||||
|     self.previous = None | ||||
| 
 | ||||
|   def __call__(self, obs, inventory=None): | ||||
|     health = obs['health'] | ||||
|     if obs['is_first']: | ||||
|       self.previous = health | ||||
|       return 0 | ||||
|     reward = self.scale * (health - self.previous) | ||||
|     self.previous = health | ||||
|     return np.float32(reward) | ||||
| 
 | ||||
| 
 | ||||
| BASIC_ACTIONS = { | ||||
|     'noop': dict(), | ||||
|     'attack': dict(attack=1), | ||||
|     'turn_up': dict(camera=(-15, 0)), | ||||
|     'turn_down': dict(camera=(15, 0)), | ||||
|     'turn_left': dict(camera=(0, -15)), | ||||
|     'turn_right': dict(camera=(0, 15)), | ||||
|     'forward': dict(forward=1), | ||||
|     'back': dict(back=1), | ||||
|     'left': dict(left=1), | ||||
|     'right': dict(right=1), | ||||
|     'jump': dict(jump=1, forward=1), | ||||
|     'place_dirt': dict(place='dirt'), | ||||
| } | ||||
							
								
								
									
										219
									
								
								envs/minecraft_base.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										219
									
								
								envs/minecraft_base.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,219 @@ | ||||
| import logging | ||||
| import threading | ||||
| 
 | ||||
| import numpy as np | ||||
| import gym | ||||
| 
 | ||||
| class MinecraftBase(gym.Env): | ||||
| 
 | ||||
|   _LOCK = threading.Lock() | ||||
| 
 | ||||
|   def __init__( | ||||
|       self, actions, | ||||
|       repeat=1, | ||||
|       size=(64, 64), | ||||
|       break_speed=100.0, | ||||
|       gamma=10.0, | ||||
|       sticky_attack=30, | ||||
|       sticky_jump=10, | ||||
|       pitch_limit=(-60, 60), | ||||
|       logs=True, | ||||
|   ): | ||||
|     if logs: | ||||
|       logging.basicConfig(level=logging.DEBUG) | ||||
|     self._repeat = repeat | ||||
|     self._size = size | ||||
|     if break_speed != 1.0: | ||||
|       sticky_attack = 0 | ||||
| 
 | ||||
|     # Make env | ||||
|     with self._LOCK: | ||||
|         from .import minecraft_minerl | ||||
|         self._env = minecraft_minerl.MineRLEnv(size, break_speed, gamma).make() | ||||
|     self._inventory = {} | ||||
| 
 | ||||
|     # Observations | ||||
|     self._inv_keys = [ | ||||
|         k for k in self._flatten(self._env.observation_space.spaces) if k.startswith('inventory/') | ||||
|         if k != 'inventory/log2'] | ||||
|     self._step = 0 | ||||
|     self._max_inventory = None | ||||
|     self._equip_enum = self._env.observation_space[ | ||||
|         'equipped_items']['mainhand']['type'].values.tolist() | ||||
| 
 | ||||
|     # Actions | ||||
|     self._noop_action = minecraft_minerl.NOOP_ACTION | ||||
|     actions = self._insert_defaults(actions) | ||||
|     self._action_names = tuple(actions.keys()) | ||||
|     self._action_values = tuple(actions.values()) | ||||
|     message = f'Minecraft action space ({len(self._action_values)}):' | ||||
|     print(message, ', '.join(self._action_names)) | ||||
|     self._sticky_attack_length = sticky_attack | ||||
|     self._sticky_attack_counter = 0 | ||||
|     self._sticky_jump_length = sticky_jump | ||||
|     self._sticky_jump_counter = 0 | ||||
|     self._pitch_limit = pitch_limit | ||||
|     self._pitch = 0 | ||||
| 
 | ||||
|   @property | ||||
|   def observation_space(self): | ||||
|     return gym.spaces.Dict( | ||||
|         { | ||||
|         'image': gym.spaces.Box(0, 255, self._size + (3,), np.uint8), | ||||
|         'inventory': gym.spaces.Box(-np.inf, np.inf, (len(self._inv_keys),), dtype=np.float32), | ||||
|         'inventory_max': gym.spaces.Box(-np.inf, np.inf, (len(self._inv_keys),), dtype=np.float32), | ||||
|         'equipped': gym.spaces.Box(-np.inf, np.inf, (len(self._equip_enum),), dtype=np.float32), | ||||
|         'reward': gym.spaces.Box(-np.inf, np.inf, (1,), dtype=np.float32), | ||||
|         'health': gym.spaces.Box(-np.inf, np.inf, (1,), dtype=np.float32), | ||||
|         'hunger': gym.spaces.Box(-np.inf, np.inf, (1,), dtype=np.float32), | ||||
|         'breath': gym.spaces.Box(-np.inf, np.inf, (1,), dtype=np.float32), | ||||
|         'is_first': gym.spaces.Box(-np.inf, np.inf, (1,), dtype=np.uint8), | ||||
|         'is_last': gym.spaces.Box(-np.inf, np.inf, (1,), dtype=np.uint8), | ||||
|         'is_terminal': gym.spaces.Box(-np.inf, np.inf, (1,), dtype=np.uint8), | ||||
|         **{f'log_{k}': gym.spaces.Box(-np.inf, np.inf, (1,), dtype=np.int64) for k in self._inv_keys}, | ||||
|         'log_player_pos': gym.spaces.Box(-np.inf, np.inf, (3,), dtype=np.float32), | ||||
|         } | ||||
|     ) | ||||
| 
 | ||||
|   @property | ||||
|   def action_space(self): | ||||
|     space = gym.spaces.discrete.Discrete(len(self._action_values)) | ||||
|     space.discrete = True | ||||
|     return space | ||||
| 
 | ||||
|   def step(self, action): | ||||
|     action = action.copy() | ||||
|     print(self._step, action) | ||||
|     action = self._action_values[action] | ||||
|     action = self._action(action) | ||||
|     following = self._noop_action.copy() | ||||
|     for key in ('attack', 'forward', 'back', 'left', 'right'): | ||||
|         following[key] = action[key] | ||||
|     for act in [action] + ([following] * (self._repeat - 1)): | ||||
|         obs, reward, done, info = self._env.step(act) | ||||
|         if 'error' in info: | ||||
|             done = True | ||||
|             break | ||||
|         obs['is_first'] = False | ||||
|         obs['is_last'] = bool(done) | ||||
|         obs['is_terminal'] = bool(info.get('is_terminal', done)) | ||||
| 
 | ||||
|     obs = self._obs(obs) | ||||
|     self._step += 1 | ||||
|     assert 'pov' not in obs, list(obs.keys()) | ||||
|     return obs, reward, done, info | ||||
| 
 | ||||
|   @property | ||||
|   def inventory(self): | ||||
|     return self._inventory | ||||
| 
 | ||||
|   def reset(self): | ||||
|     # inventory will be added in _obs | ||||
|     self._inventory = {} | ||||
|     self._max_inventory = None | ||||
| 
 | ||||
|     with self._LOCK: | ||||
|       obs = self._env.reset() | ||||
|     obs['is_first'] = True | ||||
|     obs['is_last'] = False | ||||
|     obs['is_terminal'] = False | ||||
|     obs = self._obs(obs) | ||||
| 
 | ||||
|     self._step = 0 | ||||
|     self._sticky_attack_counter = 0 | ||||
|     self._sticky_jump_counter = 0 | ||||
|     self._pitch = 0 | ||||
|     return obs | ||||
| 
 | ||||
|   def _obs(self, obs): | ||||
|     obs = self._flatten(obs) | ||||
|     obs['inventory/log'] += obs.pop('inventory/log2') | ||||
|     self._inventory = { | ||||
|         k.split('/', 1)[1]: obs[k] for k in self._inv_keys | ||||
|         if k != 'inventory/air'} | ||||
|     inventory = np.array([obs[k] for k in self._inv_keys], np.float32) | ||||
|     if self._max_inventory is None: | ||||
|       self._max_inventory = inventory | ||||
|     else: | ||||
|       self._max_inventory = np.maximum(self._max_inventory, inventory) | ||||
|     index = self._equip_enum.index(obs['equipped_items/mainhand/type']) | ||||
|     equipped = np.zeros(len(self._equip_enum), np.float32) | ||||
|     equipped[index] = 1.0 | ||||
|     player_x = obs['location_stats/xpos'] | ||||
|     player_y = obs['location_stats/ypos'] | ||||
|     player_z = obs['location_stats/zpos'] | ||||
|     obs = { | ||||
|         'image': obs['pov'], | ||||
|         'inventory': inventory, | ||||
|         'inventory_max': self._max_inventory.copy(), | ||||
|         'equipped': equipped, | ||||
|         'health': np.float32(obs['life_stats/life'] / 20), | ||||
|         'hunger': np.float32(obs['life_stats/food'] / 20), | ||||
|         'breath': np.float32(obs['life_stats/air'] / 300), | ||||
|         'reward': 0.0, | ||||
|         'is_first': obs['is_first'], | ||||
|         'is_last': obs['is_last'], | ||||
|         'is_terminal': obs['is_terminal'], | ||||
|         **{f'log_{k}': np.int64(obs[k]) for k in self._inv_keys}, | ||||
|         'log_player_pos': np.array([player_x, player_y, player_z], np.float32), | ||||
|     } | ||||
|     for key, value in obs.items(): | ||||
|       space = self.observation_space[key] | ||||
|       if not isinstance(value, np.ndarray): | ||||
|         value = np.array(value) | ||||
|       assert (key, value, value.dtype, value.shape, space) | ||||
|     return obs | ||||
| 
 | ||||
|   def _action(self, action): | ||||
|     if self._sticky_attack_length: | ||||
|       if action['attack']: | ||||
|         self._sticky_attack_counter = self._sticky_attack_length | ||||
|       if self._sticky_attack_counter > 0: | ||||
|         action['attack'] = 1 | ||||
|         action['jump'] = 0 | ||||
|         self._sticky_attack_counter -= 1 | ||||
|     if self._sticky_jump_length: | ||||
|       if action['jump']: | ||||
|         self._sticky_jump_counter = self._sticky_jump_length | ||||
|       if self._sticky_jump_counter > 0: | ||||
|         action['jump'] = 1 | ||||
|         action['forward'] = 1 | ||||
|         self._sticky_jump_counter -= 1 | ||||
|     if self._pitch_limit and action['camera'][0]: | ||||
|       lo, hi = self._pitch_limit | ||||
|       if not (lo <= self._pitch + action['camera'][0] <= hi): | ||||
|         action['camera'] = (0, action['camera'][1]) | ||||
|       self._pitch += action['camera'][0] | ||||
|     return action | ||||
| 
 | ||||
|   def _insert_defaults(self, actions): | ||||
|     actions = {name: action.copy() for name, action in actions.items()} | ||||
|     for key, default in self._noop_action.items(): | ||||
|       for action in actions.values(): | ||||
|         if key not in action: | ||||
|           action[key] = default | ||||
|     return actions | ||||
| 
 | ||||
|   def _flatten(self, nest, prefix=None): | ||||
|     result = {} | ||||
|     for key, value in nest.items(): | ||||
|       key = prefix + '/' + key if prefix else key | ||||
|       if isinstance(value, gym.spaces.Dict): | ||||
|         value = value.spaces | ||||
|       if isinstance(value, dict): | ||||
|         result.update(self._flatten(value, key)) | ||||
|       else: | ||||
|         result[key] = value | ||||
|     return result | ||||
| 
 | ||||
|   def _unflatten(self, flat): | ||||
|     result = {} | ||||
|     for key, value in flat.items(): | ||||
|       parts = key.split('/') | ||||
|       node = result | ||||
|       for part in parts[:-1]: | ||||
|         if part not in node: | ||||
|           node[part] = {} | ||||
|         node = node[part] | ||||
|       node[parts[-1]] = value | ||||
|     return result | ||||
							
								
								
									
										150
									
								
								envs/minecraft_minerl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								envs/minecraft_minerl.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,150 @@ | ||||
| from minerl.herobraine.env_spec import EnvSpec | ||||
| from minerl.herobraine.hero import handler | ||||
| from minerl.herobraine.hero import handlers | ||||
| from minerl.herobraine.hero import mc | ||||
| from minerl.herobraine.hero.mc import INVERSE_KEYMAP | ||||
| 
 | ||||
| 
 | ||||
| def edit_options(**kwargs): | ||||
|   import os, pathlib, re | ||||
|   for word in os.popen('pip3 --version').read().split(' '): | ||||
|     if '-packages/pip' in word: | ||||
|       break | ||||
|   else: | ||||
|     raise RuntimeError('Could not found python package directory.') | ||||
|   packages = pathlib.Path(word).parent | ||||
|   filename = packages / 'minerl/Malmo/Minecraft/run/options.txt' | ||||
|   options = filename.read_text() | ||||
|   if 'fovEffectScale:' not in options: | ||||
|     options += 'fovEffectScale:1.0\n' | ||||
|   if 'simulationDistance:' not in options: | ||||
|     options += 'simulationDistance:12\n' | ||||
|   for key, value in kwargs.items(): | ||||
|     assert f'{key}:' in options, key | ||||
|     assert isinstance(value, str), (value, type(value)) | ||||
|     options = re.sub(f'{key}:.*\n', f'{key}:{value}\n', options) | ||||
|   filename.write_text(options) | ||||
| 
 | ||||
| 
 | ||||
| edit_options( | ||||
|     difficulty='2', | ||||
|     renderDistance='6', | ||||
|     simulationDistance='6', | ||||
|     fovEffectScale='0.0', | ||||
|     ao='1', | ||||
|     gamma='5.0', | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class MineRLEnv(EnvSpec): | ||||
| 
 | ||||
|   def __init__(self, resolution=(64, 64), break_speed=50, gamma=10.0): | ||||
|     self.resolution = resolution | ||||
|     self.break_speed = break_speed | ||||
|     self.gamma = gamma | ||||
|     super().__init__(name='MineRLEnv-v1') | ||||
| 
 | ||||
|   def create_agent_start(self): | ||||
|     return [ | ||||
|         BreakSpeedMultiplier(self.break_speed), | ||||
|     ] | ||||
| 
 | ||||
|   def create_agent_handlers(self): | ||||
|     return [] | ||||
| 
 | ||||
|   def create_server_world_generators(self): | ||||
|     return [handlers.DefaultWorldGenerator(force_reset=True)] | ||||
| 
 | ||||
|   def create_server_quit_producers(self): | ||||
|     return [handlers.ServerQuitWhenAnyAgentFinishes()] | ||||
| 
 | ||||
|   def create_server_initial_conditions(self): | ||||
|     return [ | ||||
|         handlers.TimeInitialCondition( | ||||
|             allow_passage_of_time=True, | ||||
|             start_time=0, | ||||
|         ), | ||||
|         handlers.SpawningInitialCondition( | ||||
|             allow_spawning=True, | ||||
|         ) | ||||
|     ] | ||||
| 
 | ||||
|   def create_observables(self): | ||||
|     return [ | ||||
|         handlers.POVObservation(self.resolution), | ||||
|         handlers.FlatInventoryObservation(mc.ALL_ITEMS), | ||||
|         handlers.EquippedItemObservation( | ||||
|             mc.ALL_ITEMS, _default='air', _other='other'), | ||||
|         handlers.ObservationFromCurrentLocation(), | ||||
|         handlers.ObservationFromLifeStats(), | ||||
|     ] | ||||
| 
 | ||||
|   def create_actionables(self): | ||||
|     kw = dict(_other='none', _default='none') | ||||
|     return [ | ||||
|         handlers.KeybasedCommandAction('forward', INVERSE_KEYMAP['forward']), | ||||
|         handlers.KeybasedCommandAction('back', INVERSE_KEYMAP['back']), | ||||
|         handlers.KeybasedCommandAction('left', INVERSE_KEYMAP['left']), | ||||
|         handlers.KeybasedCommandAction('right', INVERSE_KEYMAP['right']), | ||||
|         handlers.KeybasedCommandAction('jump', INVERSE_KEYMAP['jump']), | ||||
|         handlers.KeybasedCommandAction('sneak', INVERSE_KEYMAP['sneak']), | ||||
|         handlers.KeybasedCommandAction('attack', INVERSE_KEYMAP['attack']), | ||||
|         handlers.CameraAction(), | ||||
|         handlers.PlaceBlock(['none'] + mc.ALL_ITEMS, **kw), | ||||
|         handlers.EquipAction(['none'] + mc.ALL_ITEMS, **kw), | ||||
|         handlers.CraftAction(['none'] + mc.ALL_ITEMS, **kw), | ||||
|         handlers.CraftNearbyAction(['none'] + mc.ALL_ITEMS, **kw), | ||||
|         handlers.SmeltItemNearby(['none'] + mc.ALL_ITEMS, **kw), | ||||
|     ] | ||||
| 
 | ||||
|   def is_from_folder(self, folder): | ||||
|     return folder == 'none' | ||||
| 
 | ||||
|   def get_docstring(self): | ||||
|     return '' | ||||
| 
 | ||||
|   def determine_success_from_rewards(self, rewards): | ||||
|     return True | ||||
| 
 | ||||
|   def create_rewardables(self): | ||||
|     return [] | ||||
| 
 | ||||
|   def create_server_decorators(self): | ||||
|     return [] | ||||
| 
 | ||||
|   def create_mission_handlers(self): | ||||
|     return [] | ||||
| 
 | ||||
|   def create_monitors(self): | ||||
|     return [] | ||||
| 
 | ||||
| 
 | ||||
| class BreakSpeedMultiplier(handler.Handler): | ||||
| 
 | ||||
|   def __init__(self, multiplier=1.0): | ||||
|     self.multiplier = multiplier | ||||
| 
 | ||||
|   def to_string(self): | ||||
|     return f'break_speed({self.multiplier})' | ||||
| 
 | ||||
|   def xml_template(self): | ||||
|     return '<BreakSpeedMultiplier>{{multiplier}}</BreakSpeedMultiplier>' | ||||
| 
 | ||||
| 
 | ||||
| class Gamma(handler.Handler): | ||||
| 
 | ||||
|   def __init__(self, gamma=2.0): | ||||
|     self.gamma = gamma | ||||
| 
 | ||||
|   def to_string(self): | ||||
|     return f'gamma({self.gamma})' | ||||
| 
 | ||||
|   def xml_template(self): | ||||
|     return '<GammaSetting>{{gamma}}</GammaSetting>' | ||||
| 
 | ||||
| 
 | ||||
| NOOP_ACTION = dict( | ||||
|     camera=(0, 0), forward=0, back=0, left=0, right=0, attack=0, sprint=0, | ||||
|     jump=0, sneak=0, craft='none', nearbyCraft='none', nearbySmelt='none', | ||||
|     place='none', equip='none', | ||||
| ) | ||||
| @ -1,8 +1,7 @@ | ||||
| setuptools==60.0.0 | ||||
| torch==2.0.0 | ||||
| torchvision==0.15.1 | ||||
| numpy==1.20.1 | ||||
| tensorboard==2.5.0 | ||||
| tensorboard==2.10.0 | ||||
| pandas==1.2.4 | ||||
| matplotlib==3.5.0 | ||||
| ruamel.yaml==0.17.4 | ||||
| @ -11,8 +10,12 @@ einops==0.3.0 | ||||
| protobuf==3.20.0 | ||||
| gym==0.19.0 | ||||
| dm_control==1.0.9 | ||||
| scipy==1.7.0 | ||||
| scipy==1.8.0 | ||||
| memory_maze==1.0.2 | ||||
| atari-py==0.2.9 | ||||
| crafter==1.8.0 | ||||
| opencv-python==4.7.0.72 | ||||
| numpy==1.21.0 | ||||
| # minerl==0.4.4 | ||||
| # This was needed for minerl | ||||
| # conda install -c conda-forge openjdk=8 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user