dreamerv3-torch/envs/minecraft.py
2023-07-02 11:29:48 +09:00

155 lines
4.5 KiB
Python

import numpy as np
from . import minecraft_base
import gym
def make_env(task, *args, **kwargs):
return {
'wood': MinecraftWood,
'climb': MinecraftClimb,
'diamond': MinecraftDiamond,
}[task](*args, **kwargs)
class MinecraftWood:
def __init__(self, *args, **kwargs):
actions = BASIC_ACTIONS
self.rewards = [
CollectReward('log', repeated=1),
HealthReward(),
]
env = minecraft_base.MinecraftBase(actions, *args, **kwargs)
def step(self, action):
obs, reward, done, info = self.env.step(action)
reward = sum([fn(obs, self.env.inventory) for fn in self.rewards])
obs['reward'] = reward
return obs, reward, done, info
class MinecraftClimb:
def __init__(self, *args, **kwargs):
actions = BASIC_ACTIONS
env = minecraft_base.MinecraftBase(actions, *args, **kwargs)
self._previous = None
self._health_reward = HealthReward()
def step(self, action):
obs, reward, done, info = self.env.step(action)
x, y, z = obs['log_player_pos']
height = np.float32(y)
if obs['is_first']:
self._previous = height
reward = height - self._previous
reward += self._health_reward(obs)
obs['reward'] = reward
self._previous = height
return obs, reward, done, info
class MinecraftDiamond(gym.Wrapper):
def __init__(self, *args, **kwargs):
actions = {
**BASIC_ACTIONS,
'craft_planks': dict(craft='planks'),
'craft_stick': dict(craft='stick'),
'craft_crafting_table': dict(craft='crafting_table'),
'place_crafting_table': dict(place='crafting_table'),
'craft_wooden_pickaxe': dict(nearbyCraft='wooden_pickaxe'),
'craft_stone_pickaxe': dict(nearbyCraft='stone_pickaxe'),
'craft_iron_pickaxe': dict(nearbyCraft='iron_pickaxe'),
'equip_stone_pickaxe': dict(equip='stone_pickaxe'),
'equip_wooden_pickaxe': dict(equip='wooden_pickaxe'),
'equip_iron_pickaxe': dict(equip='iron_pickaxe'),
'craft_furnace': dict(nearbyCraft='furnace'),
'place_furnace': dict(place='furnace'),
'smelt_iron_ingot': dict(nearbySmelt='iron_ingot'),
}
self.rewards = [
CollectReward('log', once=1),
CollectReward('planks', once=1),
CollectReward('stick', once=1),
CollectReward('crafting_table', once=1),
CollectReward('wooden_pickaxe', once=1),
CollectReward('cobblestone', once=1),
CollectReward('stone_pickaxe', once=1),
CollectReward('iron_ore', once=1),
CollectReward('furnace', once=1),
CollectReward('iron_ingot', once=1),
CollectReward('iron_pickaxe', once=1),
CollectReward('diamond', once=1),
HealthReward(),
]
env = minecraft_base.MinecraftBase(actions, *args, **kwargs)
super().__init__(env)
def step(self, action):
obs, reward, done, info = self.env.step(action)
reward = sum([fn(obs, self.env.inventory) for fn in self.rewards])
obs['reward'] = reward
return obs, reward, done, info
def reset(self):
obs = self.env.reset()
# called for reset of reward calculations
_ = sum([fn(obs, self.env.inventory) for fn in self.rewards])
return obs
class CollectReward:
def __init__(self, item, once=0, repeated=0):
self.item = item
self.once = once
self.repeated = repeated
self.previous = 0
self.maximum = 0
def __call__(self, obs, inventory):
current = inventory[self.item]
if obs['is_first']:
self.previous = current
self.maximum = current
return 0
reward = self.repeated * max(0, current - self.previous)
if self.maximum == 0 and current > 0:
reward += self.once
self.previous = current
self.maximum = max(self.maximum, current)
return reward
class HealthReward:
def __init__(self, scale=0.01):
self.scale = scale
self.previous = None
def __call__(self, obs, inventory=None):
health = obs['health']
if obs['is_first']:
self.previous = health
return 0
reward = self.scale * (health - self.previous)
self.previous = health
return np.float32(reward)
BASIC_ACTIONS = {
'noop': dict(),
'attack': dict(attack=1),
'turn_up': dict(camera=(-15, 0)),
'turn_down': dict(camera=(15, 0)),
'turn_left': dict(camera=(0, -15)),
'turn_right': dict(camera=(0, 15)),
'forward': dict(forward=1),
'back': dict(back=1),
'left': dict(left=1),
'right': dict(right=1),
'jump': dict(jump=1, forward=1),
'place_dirt': dict(place='dirt'),
}