2017-11-06 13:50:35 +08:00
|
|
|
# Optimizer for policy gradient methods
|
|
|
|
TODO:
|
2017-11-06 15:15:44 +08:00
|
|
|
|
2017-11-06 13:50:35 +08:00
|
|
|
vanilla
|
2017-11-06 15:15:44 +08:00
|
|
|
|
|
|
|
baseline
|
|
|
|
|
2017-11-06 13:50:35 +08:00
|
|
|
REINFORCE
|
2017-11-06 15:15:44 +08:00
|
|
|
|
2017-11-06 13:50:35 +08:00
|
|
|
TRPO
|
2017-11-06 15:15:44 +08:00
|
|
|
|
2017-11-06 13:50:35 +08:00
|
|
|
PPO
|
2017-11-06 15:15:44 +08:00
|
|
|
|
2017-11-06 13:50:35 +08:00
|
|
|
GAE
|
2017-11-06 15:15:44 +08:00
|
|
|
|
2017-11-06 13:50:35 +08:00
|
|
|
NAF
|
2017-11-06 15:15:44 +08:00
|
|
|
|
2017-11-06 13:50:35 +08:00
|
|
|
DPG
|
2017-11-06 15:15:44 +08:00
|
|
|
|
|
|
|
ACKTR
|