import numpy as np from . import minecraft_base import gym def make_env(task, *args, **kwargs): return { 'wood': MinecraftWood, 'climb': MinecraftClimb, 'diamond': MinecraftDiamond, }[task](*args, **kwargs) class MinecraftWood: def __init__(self, *args, **kwargs): actions = BASIC_ACTIONS self.rewards = [ CollectReward('log', repeated=1), HealthReward(), ] env = minecraft_base.MinecraftBase(actions, *args, **kwargs) def step(self, action): obs, reward, done, info = self.env.step(action) reward = sum([fn(obs, self.env.inventory) for fn in self.rewards]) obs['reward'] = reward return obs, reward, done, info class MinecraftClimb: def __init__(self, *args, **kwargs): actions = BASIC_ACTIONS env = minecraft_base.MinecraftBase(actions, *args, **kwargs) self._previous = None self._health_reward = HealthReward() def step(self, action): obs, reward, done, info = self.env.step(action) x, y, z = obs['log_player_pos'] height = np.float32(y) if obs['is_first']: self._previous = height reward = height - self._previous reward += self._health_reward(obs) obs['reward'] = reward self._previous = height return obs, reward, done, info class MinecraftDiamond(gym.Wrapper): def __init__(self, *args, **kwargs): actions = { **BASIC_ACTIONS, 'craft_planks': dict(craft='planks'), 'craft_stick': dict(craft='stick'), 'craft_crafting_table': dict(craft='crafting_table'), 'place_crafting_table': dict(place='crafting_table'), 'craft_wooden_pickaxe': dict(nearbyCraft='wooden_pickaxe'), 'craft_stone_pickaxe': dict(nearbyCraft='stone_pickaxe'), 'craft_iron_pickaxe': dict(nearbyCraft='iron_pickaxe'), 'equip_stone_pickaxe': dict(equip='stone_pickaxe'), 'equip_wooden_pickaxe': dict(equip='wooden_pickaxe'), 'equip_iron_pickaxe': dict(equip='iron_pickaxe'), 'craft_furnace': dict(nearbyCraft='furnace'), 'place_furnace': dict(place='furnace'), 'smelt_iron_ingot': dict(nearbySmelt='iron_ingot'), } self.rewards = [ CollectReward('log', once=1), CollectReward('planks', once=1), CollectReward('stick', once=1), CollectReward('crafting_table', once=1), CollectReward('wooden_pickaxe', once=1), CollectReward('cobblestone', once=1), CollectReward('stone_pickaxe', once=1), CollectReward('iron_ore', once=1), CollectReward('furnace', once=1), CollectReward('iron_ingot', once=1), CollectReward('iron_pickaxe', once=1), CollectReward('diamond', once=1), HealthReward(), ] env = minecraft_base.MinecraftBase(actions, *args, **kwargs) super().__init__(env) def step(self, action): obs, reward, done, info = self.env.step(action) reward = sum([fn(obs, self.env.inventory) for fn in self.rewards]) obs['reward'] = reward return obs, reward, done, info def reset(self): obs = self.env.reset() # called for reset of reward calculations _ = sum([fn(obs, self.env.inventory) for fn in self.rewards]) return obs class CollectReward: def __init__(self, item, once=0, repeated=0): self.item = item self.once = once self.repeated = repeated self.previous = 0 self.maximum = 0 def __call__(self, obs, inventory): current = inventory[self.item] if obs['is_first']: self.previous = current self.maximum = current return 0 reward = self.repeated * max(0, current - self.previous) if self.maximum == 0 and current > 0: reward += self.once self.previous = current self.maximum = max(self.maximum, current) return reward class HealthReward: def __init__(self, scale=0.01): self.scale = scale self.previous = None def __call__(self, obs, inventory=None): health = obs['health'] if obs['is_first']: self.previous = health return 0 reward = self.scale * (health - self.previous) self.previous = health return np.float32(reward) BASIC_ACTIONS = { 'noop': dict(), 'attack': dict(attack=1), 'turn_up': dict(camera=(-15, 0)), 'turn_down': dict(camera=(15, 0)), 'turn_left': dict(camera=(0, -15)), 'turn_right': dict(camera=(0, 15)), 'forward': dict(forward=1), 'back': dict(back=1), 'left': dict(left=1), 'right': dict(right=1), 'jump': dict(jump=1, forward=1), 'place_dirt': dict(place='dirt'), }