fix #77

2020-06-10 12:06:56 +08:00 · 2020-06-10 12:06:56 +08:00 · 397e92b0fc
commit 397e92b0fc
parent f1951780ab
3 changed files with 6 additions and 8 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,3 @@
+# Contributing to Tianshou
+
+Please refer to [tianshou.readthedocs.io/en/latest/contributing.html](https://tianshou.readthedocs.io/en/latest/contributing.html).
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@ -1,5 +0,0 @@
-========================
-Contributing to Tianshou
-========================
-
-Please refer to https://tianshou.readthedocs.io/en/latest/contributing.html 
--- a/tianshou/policy/modelfree/pg.py
+++ b/tianshou/policy/modelfree/pg.py
@ -3,7 +3,7 @@ import numpy as np
 from typing import Dict, List, Union, Optional

 from tianshou.policy import BasePolicy
-from tianshou.data import Batch, ReplayBuffer, to_torch
+from tianshou.data import Batch, ReplayBuffer, to_torch_as


 class PGPolicy(BasePolicy):
@ -88,8 +88,8 @@ class PGPolicy(BasePolicy):
            for b in batch.split(batch_size):
                self.optim.zero_grad()
                dist = self(b).dist
-                a = to_torch(b.act, device=dist.logits.device)
-                r = to_torch(b.returns, device=dist.logits.device)
+                a = to_torch_as(b.act, dist.logits)
+                r = to_torch_as(b.returns, dist.logits)
                loss = -(dist.log_prob(a) * r).sum()
                loss.backward()
                self.optim.step()