Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 2cc873a

Browse files
Lukasz KaiserCopybara-Service
authored andcommitted
Make Transformer decoding stable (don't fail on each Unicode error, work around missing hparams).
PiperOrigin-RevId: 219209875
1 parent 42778f4 commit 2cc873a

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

tensor2tensor/data_generators/text_encoder.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,14 @@
6060

6161
# Unicode utility functions that work with Python 2 and 3
6262
def native_to_unicode(s):
63-
return s if is_unicode(s) else to_unicode(s)
63+
if is_unicode(s):
64+
return s
65+
try:
66+
return to_unicode(s)
67+
except UnicodeDecodeError:
68+
res = to_unicode(s, ignore_errors=True)
69+
tf.logging.info("Ignoring Unicode error, outputting: %s" % res)
70+
return res
6471

6572

6673
def unicode_to_native(s):
@@ -1057,4 +1064,3 @@ def decode(self, ids, strip_extraneous=False):
10571064
"""
10581065
del strip_extraneous
10591066
return " ".join([str(i) for i in ids])
1060-

tensor2tensor/data_generators/translate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ def compute_bleu_summaries(hook_args):
8181
decode_hparams = hook_args.decode_hparams
8282
estimator = hook_args.estimator
8383
current_step = estimator.get_variable_value(tf.GraphKeys.GLOBAL_STEP)
84-
if current_step and decode_hparams.iterations_per_loop:
84+
has_iters = hasattr(decode_hparams, "iterations_per_loop")
85+
if current_step and has_iters and decode_hparams.iterations_per_loop:
8586
iterations_per_loop = decode_hparams.iterations_per_loop
8687
current_epoch = np.asscalar(current_step) // iterations_per_loop
8788
else:

0 commit comments

Comments
 (0)