From e9cb2ae157c7f3c51c622132d2066ab68e1f0d01 Mon Sep 17 00:00:00 2001 From: Dmitry Chichkov Date: Thu, 10 Jan 2019 20:46:56 -0800 Subject: [PATCH] Assuring a non-zero increase of _penalty Does it makes sense? --- agents/algorithms/ppo/ppo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/algorithms/ppo/ppo.py b/agents/algorithms/ppo/ppo.py index e62ce94..6359b84 100644 --- a/agents/algorithms/ppo/ppo.py +++ b/agents/algorithms/ppo/ppo.py @@ -530,7 +530,7 @@ def _adjust_penalty(self, observ, old_policy_params, length): kl_change > 1.3 * self._config.kl_target, # pylint: disable=g-long-lambda lambda: tf.Print(self._penalty.assign( - self._penalty * 1.5), [0], 'increase penalty '), + self._penalty * 1.5 + 1e-8), [0], 'increase penalty '), float) maybe_decrease = tf.cond( kl_change < 0.7 * self._config.kl_target,