internal merge of PR #872

Ryan Sepassi · Copybara-Service · commit ac8f6e3bea7b · 2018-08-03T10:53:04.000-07:00
PiperOrigin-RevId: 207290554
diff --git a/.travis.yml b/.travis.yml
@@ -60,7 +60,6 @@ script:
   #   * visualization_test
   #   * model_rl_experiment_test
   #   * allen_brain_test
-  #   * allen_brain_utils_test
   #   * model_rl_experiment_stochastic_test
   #   * models/research
   # algorithmic_math_test: flaky
@@ -74,14 +73,12 @@ script:
     --ignore=tensor2tensor/models/research/universal_transformer_test.py
     --ignore=tensor2tensor/rl/model_rl_experiment_test.py
     --ignore=tensor2tensor/data_generators/allen_brain_test.py
-    --ignore=tensor2tensor/data_generators/allen_brain_utils_test.py
     --ignore=tensor2tensor/rl/model_rl_experiment_stochastic_test.py
     --ignore=tensor2tensor/models/research
   - pytest tensor2tensor/utils/registry_test.py
   - pytest tensor2tensor/utils/trainer_lib_test.py
   - pytest tensor2tensor/visualization/visualization_test.py
   - pytest tensor2tensor/data_generators/allen_brain_test.py
-  - pytest tensor2tensor/data_generators/allen_brain_utils_test.py
   - if [[ "$TF_VERSION" == "$TF_LATEST"  ]] || [[ "$TF_VERSION" == "tf-nightly"  ]];
     then
       pytest tensor2tensor/models/research;
diff --git a/setup.py b/setup.py
@@ -58,7 +58,7 @@
             # explicit pip install gym[atari] for the tests.
             # 'gym[atari]',
         ],
-        'allen': ['Pillow==5.1.0', 'pandas==0.23.0']
+        'allen': ['Pillow==5.1.0', 'pandas==0.23.0'],
     },
     classifiers=[
         'Development Status :: 4 - Beta',
diff --git a/tensor2tensor/data_generators/allen_brain.py b/tensor2tensor/data_generators/allen_brain.py
@@ -1,16 +1,17 @@
 # coding=utf-8
+# Copyright 2018 The Tensor2Tensor Authors.
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#   http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Problem definitions for Allen Brain Atlas problems.
 
 Notes:
@@ -28,18 +29,17 @@
 
 from io import BytesIO
 import math
-import numpy as np
 import os
+
+import numpy as np
 import requests
 
 from tensor2tensor.data_generators import generator_utils
 from tensor2tensor.data_generators import image_utils
 from tensor2tensor.data_generators import problem
 from tensor2tensor.data_generators import text_encoder
-from tensor2tensor.utils import registry
 from tensor2tensor.utils import metrics
-
-from tensor2tensor.data_generators.allen_brain_utils import try_importing_pil_image
+from tensor2tensor.utils import registry
 
 import tensorflow as tf
 
@@ -52,23 +52,28 @@
 # the steps described here: http://help.brain-map.org/display/api,
 # e.g. https://gist.github.com/cwbeitel/5dffe90eb561637e35cdf6aa4ee3e704
 _IMAGE_IDS = [
-    '74887117', '71894997', '69443979', '79853548', '101371232', '77857182',
-    '70446772', '68994990', '69141561', '70942310', '70942316', '68298378',
-    '69690156', '74364867', '77874134', '75925043', '73854431', '69206601',
-    '71771457', '101311379', '74777533', '70960269', '71604493', '102216720',
-    '74776437', '75488723', '79815814', '77857132', '77857138', '74952778',
-    '69068486', '648167', '75703410', '74486118', '77857098', '637407',
-    '67849516', '69785503', '71547630', '69068504', '69184074', '74853078',
-    '74890694', '74890698', '75488687', '71138602', '71652378', '68079764',
-    '70619061', '68280153', '73527042', '69764608', '68399025', '244297',
-    '69902658', '68234159', '71495521', '74488395', '73923026', '68280155',
-    '75488747', '69589140', '71342189', '75119214', '79455452', '71774294',
-    '74364957', '68031779', '71389422', '67937572', '69912671', '73854471',
-    '75008183', '101371376', '75703290', '69533924', '79853544', '77343882',
-    '74887133', '332587', '69758622', '69618413', '77929999', '244293',
-    '334792', '75825136', '75008103', '70196678', '71883965', '74486130',
-    '74693566', '76107119', '76043858', '70252433', '68928364', '74806345',
-    '67848661', '75900326', '71773690', '75008171']
+    "74887117", "71894997", "69443979", "79853548", "101371232", "77857182",
+    "70446772", "68994990", "69141561", "70942310", "70942316", "68298378",
+    "69690156", "74364867", "77874134", "75925043", "73854431", "69206601",
+    "71771457", "101311379", "74777533", "70960269", "71604493", "102216720",
+    "74776437", "75488723", "79815814", "77857132", "77857138", "74952778",
+    "69068486", "648167", "75703410", "74486118", "77857098", "637407",
+    "67849516", "69785503", "71547630", "69068504", "69184074", "74853078",
+    "74890694", "74890698", "75488687", "71138602", "71652378", "68079764",
+    "70619061", "68280153", "73527042", "69764608", "68399025", "244297",
+    "69902658", "68234159", "71495521", "74488395", "73923026", "68280155",
+    "75488747", "69589140", "71342189", "75119214", "79455452", "71774294",
+    "74364957", "68031779", "71389422", "67937572", "69912671", "73854471",
+    "75008183", "101371376", "75703290", "69533924", "79853544", "77343882",
+    "74887133", "332587", "69758622", "69618413", "77929999", "244293",
+    "334792", "75825136", "75008103", "70196678", "71883965", "74486130",
+    "74693566", "76107119", "76043858", "70252433", "68928364", "74806345",
+    "67848661", "75900326", "71773690", "75008171"]
+
+
+def PIL_Image():  # pylint: disable=invalid-name
+  from PIL import Image  # pylint: disable=g-import-not-at-top
+  return Image
 
 
 def _get_case_file_paths(tmp_dir, case, training_fraction=0.95):
@@ -77,14 +82,17 @@ def _get_case_file_paths(tmp_dir, case, training_fraction=0.95):
   Args:
     tmp_dir: str, the root path to which raw images were written, at the
       top level having meta/ and raw/ subdirs.
-    size: int, the size of sub-images to consider (`size`x`size`).
     case: bool, whether obtaining file paths for training (true) or eval
       (false).
     training_fraction: float, the fraction of the sub-image path list to
       consider as the basis for training examples.
 
   Returns:
     list: A list of file paths.
+
+  Raises:
+    ValueError: if images not found in tmp_dir, or if training_fraction would
+      leave no examples for eval.
   """
 
   paths = tf.gfile.Glob("%s/*.jpg" % tmp_dir)
@@ -146,7 +154,7 @@ def maybe_download_image_dataset(image_ids, target_dir):
 
     response.raise_for_status()
 
-    with open(tmp_destination, "w") as f:
+    with tf.gfile.Open(tmp_destination, "w") as f:
       for block in response.iter_content(1024):
         f.write(block)
 
@@ -159,7 +167,6 @@ def random_square_mask(shape, fraction):
   Args:
     shape: tuple, shape of the mask to create.
     fraction: float, fraction of the mask area to populate with `mask_scalar`.
-    mask_scalar: float, the scalar to apply to the otherwise 1-valued mask.
 
   Returns:
     numpy.array: A numpy array storing the mask.
@@ -191,6 +198,8 @@ def _generator(tmp_dir, training, size=_BASE_EXAMPLE_IMAGE_SIZE,
       alternatively, evaluation), determining whether examples in tmp_dir
       prefixed with train or dev will be used.
     size: int, the image size to add to the example annotation.
+    training_fraction: float, the fraction of the sub-image path list to
+      consider as the basis for training examples.
 
   Yields:
     A dictionary representing the images with the following fields:
@@ -207,7 +216,7 @@ def _generator(tmp_dir, training, size=_BASE_EXAMPLE_IMAGE_SIZE,
                                      case=training,
                                      training_fraction=training_fraction)
 
-  image_obj = try_importing_pil_image()
+  image_obj = PIL_Image()
 
   tf.logging.info("Loaded case file paths (n=%s)" % len(image_files))
   height = size
@@ -230,8 +239,7 @@ def _generator(tmp_dir, training, size=_BASE_EXAMPLE_IMAGE_SIZE,
         v_end = v_offset + size - 1
 
         # Extract a sub-image tile.
-        # pylint: disable=invalid-sequence-index
-        subimage = np.uint8(img[h_offset:h_end, v_offset:v_end])
+        subimage = np.uint8(img[h_offset:h_end, v_offset:v_end])  # pylint: disable=invalid-sequence-index
 
         # Filter images that are likely background (not tissue).
         if np.amax(subimage) < 230:
diff --git a/tensor2tensor/data_generators/allen_brain_test.py b/tensor2tensor/data_generators/allen_brain_test.py
@@ -1,29 +1,103 @@
 # coding=utf-8
+# Copyright 2018 The Tensor2Tensor Authors.
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#   http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tests of the Allen Brain Atlas problems."""
 
-import tensorflow as tf
-from tensorflow.contrib.eager.python import tfe
+import os
+import shutil
+import tempfile
+
+import numpy as np
 
 from tensor2tensor.data_generators import allen_brain
-from tensor2tensor.data_generators.allen_brain import _generator
-from tensor2tensor.data_generators.allen_brain_utils import mock_raw_data
-from tensor2tensor.data_generators.allen_brain_utils import TemporaryDirectory
 from tensor2tensor.models import image_transformer_2d
 
+import tensorflow as tf
+
+tfe = tf.contrib.eager
 tfe.enable_eager_execution()
-Modes = tf.estimator.ModeKeys
+Modes = tf.estimator.ModeKeys  # pylint: disable=invalid-name
+
+
+def mock_raw_image(x_dim=1024, y_dim=1024, num_channels=3,
+                   output_path=None, write_image=True):
+  """Generate random `x_dim` by `y_dim`, optionally to `output_path`.
+
+  Args:
+    x_dim: int, the x dimension of generated raw image.
+    y_dim: int, the x dimension of generated raw image.
+    num_channels: int, number of channels in image.
+    output_path: str, path to which to write image.
+    write_image: bool, whether to write the image to output_path.
+
+  Returns:
+    numpy.array: The random `x_dim` by `y_dim` image (i.e. array).
+  """
+
+  rand_shape = (x_dim, y_dim, num_channels)
+
+  if num_channels != 3:
+    raise NotImplementedError("mock_raw_image for channels != 3 not yet "
+                              "implemented.")
+
+  img = np.random.random(rand_shape)
+  img = np.uint8(img*255)
+
+  if write_image:
+    image_obj = allen_brain.PIL_Image()
+    pil_img = image_obj.fromarray(img, mode="RGB")
+    with tf.gfile.Open(output_path, "w") as f:
+      pil_img.save(f, "jpeg")
+
+  return img
+
+
+def mock_raw_data(tmp_dir, raw_dim=1024, num_channels=3, num_images=1):
+  """Mock a raw data download directory with meta and raw subdirs.
+
+  Notes:
+
+    * This utility is shared by tests in both allen_brain_utils and
+      allen_brain so kept here instead of in one of *_test.
+
+  Args:
+    tmp_dir: str, temporary dir in which to mock data.
+    raw_dim: int, the x and y dimension of generated raw imgs.
+    num_channels: int, number of channels in image.
+    num_images: int, number of images to mock.
+  """
+
+  tf.gfile.MakeDirs(tmp_dir)
+
+  for image_id in range(num_images):
+
+    raw_image_path = os.path.join(tmp_dir, "%s.jpg" % image_id)
+
+    mock_raw_image(x_dim=raw_dim, y_dim=raw_dim,
+                   num_channels=num_channels,
+                   output_path=raw_image_path)
+
+
+class TemporaryDirectory(object):
+  """For py2 support of `with tempfile.TemporaryDirectory() as name:`"""
+
+  def __enter__(self):
+    self.name = tempfile.mkdtemp()
+    return self.name
+
+  def __exit__(self, exc_type, exc_value, traceback):
+    shutil.rmtree(self.name)
 
 
 class TestAllenBrain(tf.test.TestCase):
@@ -32,10 +106,6 @@ class TestAllenBrain(tf.test.TestCase):
   def setUp(self):
 
     self.all_problems = [
-        #allen_brain.Img2imgAllenBrain,
-        #allen_brain.Img2imgAllenBrainDim48to64,
-        #allen_brain.Img2imgAllenBrainDim8to32,
-        #allen_brain.Img2imgAllenBrainDim16to32,
         allen_brain.Img2imgAllenBrainDim16to16Paint1
     ]
 
@@ -45,7 +115,7 @@ def test_generator_produces_examples(self):
     for is_training in [True, False]:
       with TemporaryDirectory() as tmp_dir:
         mock_raw_data(tmp_dir, raw_dim=256, num_images=100)
-        for example in _generator(tmp_dir, is_training):
+        for example in allen_brain._generator(tmp_dir, is_training):
           for key in ["image/encoded", "image/format",
                       "image/height", "image/width"]:
             self.assertTrue(key in example.keys())
@@ -170,5 +240,48 @@ def loss_fn(features):
                           256))
 
 
+class TestImageMock(tf.test.TestCase):
+  """Tests of image mocking utility."""
+
+  def test_image_mock_produces_expected_shape(self):
+    """Test that the image mocking utility produces expected shape output."""
+
+    with TemporaryDirectory() as tmp_dir:
+
+      cases = [
+          {
+              "x_dim": 8,
+              "y_dim": 8,
+              "num_channels": 3,
+              "output_path": "/foo",
+              "write_image": True
+          }
+      ]
+
+      for cid, case in enumerate(cases):
+        output_path = os.path.join(tmp_dir, "dummy%s.jpg" % cid)
+        img = mock_raw_image(x_dim=case["x_dim"],
+                             y_dim=case["y_dim"],
+                             num_channels=case["num_channels"],
+                             output_path=output_path,
+                             write_image=case["write_image"])
+
+        self.assertEqual(img.shape, (case["x_dim"], case["y_dim"],
+                                     case["num_channels"]))
+        if case["write_image"]:
+          self.assertTrue(tf.gfile.Exists(output_path))
+
+
+class TestMockRawData(tf.test.TestCase):
+  """Tests of raw data mocking utility."""
+
+  def test_runs(self):
+    """Test that data mocking utility runs for cases expected to succeed."""
+
+    with TemporaryDirectory() as tmp_dir:
+
+      mock_raw_data(tmp_dir, raw_dim=256, num_channels=3, num_images=40)
+
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensor2tensor/data_generators/allen_brain_utils.py b/tensor2tensor/data_generators/allen_brain_utils.py
diff --git a/tensor2tensor/data_generators/allen_brain_utils_test.py b/tensor2tensor/data_generators/allen_brain_utils_test.py