parent
57bca16f94
commit
6237cc0d52
@ -150,9 +150,10 @@ class DQNPolicy(BasePolicy):
|
|||||||
# add eps to act
|
# add eps to act
|
||||||
if eps is None:
|
if eps is None:
|
||||||
eps = self.eps
|
eps = self.eps
|
||||||
for i in range(len(q)):
|
if not np.isclose(eps, 0):
|
||||||
if np.random.rand() < eps:
|
for i in range(len(q)):
|
||||||
act[i] = np.random.randint(q.shape[1])
|
if np.random.rand() < eps:
|
||||||
|
act[i] = np.random.randint(q.shape[1])
|
||||||
return Batch(logits=q, act=act, state=h)
|
return Batch(logits=q, act=act, state=h)
|
||||||
|
|
||||||
def learn(self, batch: Batch, **kwargs) -> Dict[str, float]:
|
def learn(self, batch: Batch, **kwargs) -> Dict[str, float]:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user