Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit b1da810

Browse files
author
piotrmilos
committed
stylistic fixes
1 parent d59fa26 commit b1da810

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

tensor2tensor/rl/ppo.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@
2020

2121
import tensorflow as tf
2222

23-
def get_otimiser(config):
23+
24+
def get_optimiser(config):
2425

2526
if config.optimizer=='Adam':
2627
return tf.train.AdamOptimizer(config.learning_rate)
2728

2829
return config.optimizer(config.learning_rate)
2930

3031

31-
3232
def define_ppo_step(observation, action, reward, done, value, old_pdf,
3333
policy_factory, config):
3434

@@ -58,7 +58,7 @@ def define_ppo_step(observation, action, reward, done, value, old_pdf,
5858
entropy = new_policy_dist.entropy()
5959
entropy_loss = -config.entropy_loss_coef * tf.reduce_mean(entropy)
6060

61-
optimizer = get_otimiser(config)
61+
optimizer = get_optimiser(config)
6262
losses = [policy_loss, value_loss, entropy_loss]
6363

6464
gradients = [list(zip(*optimizer.compute_gradients(loss))) for loss in losses]
@@ -86,8 +86,8 @@ def define_ppo_epoch(memory, policy_factory, config):
8686
old_pdf = tf.stop_gradient(old_pdf)
8787

8888
ppo_step_rets = tf.scan(
89-
lambda a, x: define_ppo_step(observation, action, reward, done, value,
90-
old_pdf, policy_factory, config),
89+
lambda _1, _2: define_ppo_step(observation, action, reward, done, value,
90+
old_pdf, policy_factory, config),
9191
tf.range(config.optimization_epochs),
9292
[0., 0., 0., 0., 0., 0.],
9393
parallel_iterations=1)

0 commit comments

Comments
 (0)