tensorflow
diff --git a/‎tensor2tensor/data_generators/gym.py
Lines changed: 38 additions & 129 deletions b/‎tensor2tensor/data_generators/gym.py
Lines changed: 38 additions & 129 deletions
diff --git a/‎tensor2tensor/data_generators/video_utils.py
Lines changed: 29 additions & 4 deletions b/‎tensor2tensor/data_generators/video_utils.py
Lines changed: 29 additions & 4 deletions
diff --git a/‎tensor2tensor/layers/modalities.py
Lines changed: 4 additions & 2 deletions b/‎tensor2tensor/layers/modalities.py
Lines changed: 4 additions & 2 deletions
@@ -22,11 +22,9 @@
 from collections import deque
 
 import functools
-import os
 # Dependency imports
 import gym
 
-from tensor2tensor.data_generators import generator_utils
 from tensor2tensor.data_generators import problem
 from tensor2tensor.data_generators import video_utils
 
@@ -35,6 +33,7 @@
 from tensor2tensor.rl.envs import tf_atari_wrappers as atari
 from tensor2tensor.rl.envs.utils import batch_env_factory
 
+from tensor2tensor.utils import metrics
 from tensor2tensor.utils import registry
 
 import tensorflow as tf
@@ -63,6 +62,12 @@ def num_target_frames(self):
     """Number of frames to batch on one target."""
     return 1
 
+  def eval_metrics(self):
+    eval_metrics = [
+        metrics.Metrics.ACC, metrics.Metrics.ACC_PER_SEQ,
+        metrics.Metrics.NEG_LOG_PERPLEXITY]
+    return eval_metrics
+
   @property
   def extra_reading_spec(self):
     """Additional data fields to store on disk and their decoders."""
@@ -116,7 +121,8 @@ def hparams(self, defaults, unused_model_hparams):
     p.input_modality = {"inputs": ("video", 256),
                         "input_reward": ("symbol", self.num_rewards),
                         "input_action": ("symbol", self.num_actions)}
-    p.target_modality = ("video", 256)
+    p.target_modality = {"targets": ("video", 256),
+                         "target_reward": ("symbol", self.num_rewards)}
     p.input_space_id = problem.SpaceID.IMAGE
     p.target_space_id = problem.SpaceID.IMAGE
 
@@ -174,34 +180,27 @@ def num_steps(self):
     return 50000
 
 
-def moviepy_editor():
-  """Access to moviepy that fails gracefully without a moviepy install."""
-  try:
-    from moviepy import editor  # pylint: disable=g-import-not-at-top
-  except ImportError:
-    raise ImportError("pip install moviepy to record videos")
-  return editor
-
-
 @registry.register_problem
-class GymDiscreteProblemWithAgent(problem.Problem):
-  """Gym environment with discrete actions and rewards."""
+class GymDiscreteProblemWithAgent(GymPongRandom5k):
+  """Gym environment with discrete actions and rewards and an agent."""
 
   def __init__(self, *args, **kwargs):
     super(GymDiscreteProblemWithAgent, self).__init__(*args, **kwargs)
-    self.num_channels = 3
+    self._env = None
     self.history_size = 2
 
     # defaults
-    self.environment_spec = lambda: gym.make("PongNoFrameskip-v4")
+    self.environment_spec = lambda: gym.make("PongDeterministic-v4")
     self.in_graph_wrappers = [(atari.MaxAndSkipWrapper, {"skip": 4})]
     self.collect_hparams = rl.atari_base()
-    self.num_steps = 1000
-    self.movies = False
-    self.movies_fps = 24
+    self.settable_num_steps = 1000
     self.simulated_environment = None
     self.warm_up = 70
 
+  @property
+  def num_steps(self):
+    return self.settable_num_steps
+
   def _setup(self):
     in_graph_wrappers = [(atari.ShiftRewardWrapper, {"add_value": 2}),
                          (atari.MemoryWrapper, {})] + self.in_graph_wrappers
@@ -234,85 +233,23 @@ def _setup(self):
     self.data_get_op = atari.MemoryWrapper.singleton.speculum.dequeue()
     self.history_buffer = deque(maxlen=self.history_size+1)
 
-  def example_reading_spec(self, label_repr=None):
-    data_fields = {
-        "targets_encoded": tf.FixedLenFeature((), tf.string),
-        "image/format": tf.FixedLenFeature((), tf.string),
-        "action": tf.FixedLenFeature([1], tf.int64),
-        "reward": tf.FixedLenFeature([1], tf.int64),
-        # "done": tf.FixedLenFeature([1], tf.int64)
-    }
-
-    for x in range(self.history_size):
-      data_fields["inputs_encoded_{}".format(x)] = tf.FixedLenFeature(
-          (), tf.string)
-
-    data_items_to_decoders = {
-        "targets": tf.contrib.slim.tfexample_decoder.Image(
-            image_key="targets_encoded",
-            format_key="image/format",
-            shape=[210, 160, 3],
-            channels=3),
-        # Just do a pass through.
-        "action": tf.contrib.slim.tfexample_decoder.Tensor(tensor_key="action"),
-        "reward": tf.contrib.slim.tfexample_decoder.Tensor(tensor_key="reward"),
-    }
-
-    for x in range(self.history_size):
-      key = "inputs_{}".format(x)
-      data_items_to_decoders[key] = tf.contrib.slim.tfexample_decoder.Image(
-          image_key="inputs_encoded_{}".format(x),
-          format_key="image/format",
-          shape=[210, 160, 3],
-          channels=3)
-
-    return data_fields, data_items_to_decoders
-
-  @property
-  def num_actions(self):
-    return 4
-
-  @property
-  def num_rewards(self):
-    return 2
-
-  @property
-  def num_shards(self):
-    return 10
-
-  @property
-  def num_dev_shards(self):
-    return 1
-
-  def get_action(self, observation=None):
-    return self.env.action_space.sample()
-
-  def hparams(self, defaults, unused_model_hparams):
-    p = defaults
-    # The hard coded +1 after "symbol" refers to the fact
-    # that 0 is a special symbol meaning padding
-    # when symbols are e.g. 0, 1, 2, 3 we
-    # shift them to 0, 1, 2, 3, 4.
-    p.input_modality = {"action": ("symbol:identity", self.num_actions)}
-
-    for x in range(self.history_size):
-      p.input_modality["inputs_{}".format(x)] = ("image", 256)
-
-    p.target_modality = {"targets": ("image", 256),
-                         "reward": ("symbol", self.num_rewards + 1)}
-
-    p.input_space_id = problem.SpaceID.IMAGE
-    p.target_space_id = problem.SpaceID.IMAGE
-
   def restore_networks(self, sess):
     model_saver = tf.train.Saver(
         tf.global_variables(".*network_parameters.*"))
     if FLAGS.agent_policy_path:
       model_saver.restore(sess, FLAGS.agent_policy_path)
 
-  def generator(self, data_dir, tmp_dir):
+  def generate_encoded_samples(self, data_dir, tmp_dir, unused_dataset_split):
     self._setup()
-    clip_files = []
+
+    # When no agent_policy_path is set, just generate random samples.
+    if not FLAGS.agent_policy_path:
+      for sample in super(GymDiscreteProblemWithAgent,
+                          self).generate_encoded_samples(
+                              data_dir, tmp_dir, unused_dataset_split):
+        yield sample
+      return
+
     with tf.Session() as sess:
       sess.run(tf.global_variables_initializer())
       self.restore_networks(sess)
@@ -324,61 +261,33 @@ def generator(self, data_dir, tmp_dir):
           observ, reward, action, _ = sess.run(self.data_get_op)
           self.history_buffer.append(observ)
 
-          if self.movies and pieces_generated > self.warm_up:
-            file_name = os.path.join(tmp_dir,
-                                     "output_{}.png".format(pieces_generated))
-            clip_files.append(file_name)
-            with open(file_name, "wb") as f:
-              f.write(observ)
-
-          if len(self.history_buffer) == self.history_size+1:
+          if len(self.history_buffer) == self.history_size + 1:
             pieces_generated += 1
-            ret_dict = {
-                "targets_encoded": [observ],
-                "image/format": ["png"],
-                "action": [int(action)],
-                # "done": [bool(done)],
-                "reward": [int(reward)],
-            }
-            for i, v in enumerate(list(self.history_buffer)[:-1]):
-              ret_dict["inputs_encoded_{}".format(i)] = [v]
+            ret_dict = {"image/encoded": [observ],
+                        "image/format": ["png"],
+                        "image/height": [self.frame_height],
+                        "image/width": [self.frame_width],
+                        "action": [int(action)],
+                        "done": [int(False)],
+                        "reward": [int(reward) - self.min_reward]}
             if pieces_generated > self.warm_up:
               yield ret_dict
         else:
           sess.run(self.collect_trigger_op)
 
-    if self.movies:
-      clip = moviepy_editor().ImageSequenceClip(clip_files, fps=self.movies_fps)
-      clip_path = os.path.join(data_dir, "output_{}.mp4".format(self.name))
-      clip.write_videofile(clip_path, fps=self.movies_fps, codec="mpeg4")
-
-  def generate_data(self, data_dir, tmp_dir, task_id=-1):
-    train_paths = self.training_filepaths(
-        data_dir, self.num_shards, shuffled=False)
-    dev_paths = self.dev_filepaths(
-        data_dir, self.num_dev_shards, shuffled=False)
-    all_paths = train_paths + dev_paths
-    generator_utils.generate_files(
-        self.generator(data_dir, tmp_dir), all_paths)
-    generator_utils.shuffle_dataset(all_paths)
-
 
 @registry.register_problem
 class GymSimulatedDiscreteProblemWithAgent(GymDiscreteProblemWithAgent):
   """Simulated gym environment with discrete actions and rewards."""
 
   def __init__(self, *args, **kwargs):
     super(GymSimulatedDiscreteProblemWithAgent, self).__init__(*args, **kwargs)
-    # TODO(lukaszkaiser): pull it outside
-    self.in_graph_wrappers = [(atari.TimeLimitWrapper, {"timelimit": 150}),
-                              (atari.MaxAndSkipWrapper, {"skip": 4})]
     self.simulated_environment = True
-    self.movies_fps = 2
+    self.debug_dump_frames_path = "/tmp/t2t_debug_dump_frames"
 
   def restore_networks(self, sess):
     super(GymSimulatedDiscreteProblemWithAgent, self).restore_networks(sess)
-
-    # TODO(lukaszkaiser): adjust regexp for different models
+    # TODO(blazej): adjust regexp for different models.
     env_model_loader = tf.train.Saver(tf.global_variables(".*basic_conv_gen.*"))
     sess = tf.get_default_session()
 
 
@@ -19,8 +19,12 @@
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 # Dependency imports
 
+import six
+
 from tensor2tensor.data_generators import generator_utils
 from tensor2tensor.data_generators import image_utils
 from tensor2tensor.data_generators import problem
@@ -43,6 +47,12 @@ def resize_video_frames(images, size):
 class VideoProblem(problem.Problem):
   """Base class for problems with videos."""
 
+  def __init__(self, *args, **kwargs):
+    super(VideoProblem, self).__init__(*args, **kwargs)
+    # Path to a directory to dump generated frames as png for debugging.
+    # If empty, no debug frames will be generated.
+    self.debug_dump_frames_path = ""
+
   @property
   def num_channels(self):
     """Number of color channels in each frame."""
@@ -157,7 +167,7 @@ def features_from_batch(batched_prefeatures):
         Features dictionary with joint features per-frame.
       """
       features = {}
-      for k, v in batched_prefeatures.iteritems():
+      for k, v in six.iteritems(batched_prefeatures):
         if k == "frame":  # We rename past frames to inputs and targets.
           s1, s2 = split_on_batch(v)
           # Reshape just to make sure shapes are right and set.
@@ -242,13 +252,27 @@ def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):
       if width != self.frame_width:
         raise ValueError("Generated frame has width %d while the class "
                          "assumes width %d." % (width, self.frame_width))
-      encoded_frame = image_utils.encode_images_as_png([unencoded_frame]).next()
+      encoded_frame = six.next(
+          image_utils.encode_images_as_png([unencoded_frame]))
       features["image/encoded"] = [encoded_frame]
       features["image/format"] = ["png"]
       features["image/height"] = [height]
       features["image/width"] = [width]
       yield features
 
+  def generate_encoded_samples_debug(self, data_dir, tmp_dir, dataset_split):
+    """Generate samples of the encoded frames and dump for debug if needed."""
+    counter = 0
+    for sample in self.generate_encoded_samples(
+        data_dir, tmp_dir, dataset_split):
+      if self.debug_dump_frames_path:
+        path = os.path.join(self.debug_dump_frames_path,
+                            "frame_%d.png" % counter)
+        with tf.gfile.Open(path, "wb") as f:
+          f.write(sample["image/encoded"][0])
+        counter += 1
+      yield sample
+
   def generate_data(self, data_dir, tmp_dir, task_id=-1):
     """The function generating the data."""
     filepath_fns = {
@@ -268,10 +292,11 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1):
     if self.is_generate_per_split:
       for split, paths in split_paths:
         generator_utils.generate_files(
-            self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
+            self.generate_encoded_samples_debug(
+                data_dir, tmp_dir, split), paths)
     else:
       generator_utils.generate_files(
-          self.generate_encoded_samples(
+          self.generate_encoded_samples_debug(
               data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)
 
 
 
@@ -464,7 +464,8 @@ def bottom(self, inputs):
       inputs_shape = common_layers.shape_list(inputs)
       if len(inputs_shape) != 5:
         raise ValueError("Assuming videos given as tensors in the format "
-                         "[batch, time, height, width, channels].")
+                         "[batch, time, height, width, channels] but got one "
+                         "of shape: %s" % str(inputs_shape))
       if not context.in_eager_mode():
         tf.summary.image("inputs", tf.cast(inputs[:, -1, :, :, :], tf.uint8),
                          max_outputs=1)
@@ -484,7 +485,8 @@ def targets_bottom(self, inputs):
       inputs_shape = common_layers.shape_list(inputs)
       if len(inputs_shape) != 5:
         raise ValueError("Assuming videos given as tensors in the format "
-                         "[batch, time, height, width, channels].")
+                         "[batch, time, height, width, channels] but got one "
+                         "of shape: %s" % str(inputs_shape))
       if not context.in_eager_mode():
         tf.summary.image(
             "targets_bottom", tf.cast(inputs[:, -1, :, :, :], tf.uint8),