Writing of summaries for TensorBoard is now activated through summaries_path.

FrederikHeber · FrederikHeber · commit 3f2d551347ec · 2018-06-15T23:39:18.000+01:00
- also avoided summary evaluation when not requested. This is quite a gain in
  performance.
- this holds for both sample() and train().
- TESTS: Added regression test on summaries_path for TATiSampler and
  TATiOptimizer.
- DOCU: Added explanation for summaries_path to userguide.
diff --git a/doc/userguide/userguide.xml b/doc/userguide/userguide.xml
@@ -1794,6 +1794,26 @@ sys.path.insert(1,"&lt;path_to_TATi&gt;/lib/python3.5/site-packages/")</programl
       an estimate of the remaining run time is given.</command></note>
     </section>
 
+    <section xml:id="reference.miscellaneous.summaries">
+      <title>Tensorflow summaries</title>
+      <para>Tensorflow delivers a powerful instrument for inspecting the inner
+      workings of its computational graph: TensorBoard.</para>
+      <para>This tool allows also to inspect values such as the activation
+      histogram, the loss and accuracy and many other parameters and
+      values internal to TATi.</para>
+      <para>Supplying a path "/foo/bar" present in the file system using the 
+      <command>summaries_path</command> variable, summaries are
+      automatically written to the path and can be inspected with the following
+      call to tensorboard.</para>
+      <programlisting>tensorboard --logdir /foo/bar</programlisting>
+      <para>The tensorboard essentially comprises a webserver for rendering
+      the nodes of the graph and figures of the inspected values inside a 
+      webpage. On execution it provides a URL that needs to be entered in any
+      webbrowser to access the webpage.</para>
+      <note>The accumulation and writing of the summaries has quite an impact
+      on TATi's overall performance and is therefore swicthed off by default.</note>
+    </section>
+
   </section>
 </chapter>
 
diff --git a/src/TATi/common.py b/src/TATi/common.py
@@ -182,6 +182,8 @@ def add_model_options_to_parser(parser):
         help='Step(s) to parse from parse_parameters_file assuming multiple are present')
     parser.add_argument('--seed', type=int, default=None,
         help='Seed to use for random number generators.')
+    parser.add_argument('--summaries_path', type=str, default=None,
+        help='path to write TensorBoard summaries to')
 
 
 def add_common_options_to_parser(parser):
diff --git a/src/TATi/models/mock_flags.py b/src/TATi/models/mock_flags.py
@@ -49,6 +49,7 @@ def __init__(self,
                  sigma=1.,
                  sigmaA=1.,
                  step_width=0.03,
+                 summaries_path=None,
                  trajectory_file=None,
                  use_reweighting=False,
                  verbose=0
@@ -101,6 +102,7 @@ def __init__(self,
         :param sigma: Scale of noise injected to momentum per step for CCaDL.
         :param sigmaA: Scale of noise in convex combination for CCaDL.
         :param step_width: step width \Delta t to use, e.g. 0.01
+        :param summaries_path: path to write summaries (for TensorBoard) to
         :param trajectory_file: CSV file name to output trajectories of sampling, i.e. weights and evaluated loss function.
         :param use_reweighting:
         :param verbose: how much (debugging) information to print
@@ -150,6 +152,7 @@ def __init__(self,
         self.sigma = sigma
         self.sigmaA = sigmaA
         self.step_width = step_width
+        self.summaries_path = summaries_path
         self.trajectory_file = trajectory_file
         self.use_reweighting = use_reweighting
         self.verbose = verbose
diff --git a/src/TATi/models/model.py b/src/TATi/models/model.py
@@ -314,6 +314,7 @@ def setup_parameters(
             sigma=1.,
             sigmaA=1.,
             step_width=0.03,
+            summaries_path=None,
             trajectory_file=None,
             use_reweighting=False,
             verbose=0):
@@ -361,6 +362,7 @@ def setup_parameters(
                 sigma=sigma,
                 sigmaA=sigmaA,
                 step_width=step_width,
+                summaries_path=summaries_path,
                 trajectory_file=trajectory_file,
                 use_reweighting=use_reweighting,
                 verbose=verbose)
@@ -790,7 +792,10 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
             for walker_index in range(self.FLAGS.number_walkers)]
 
         list_of_nodes = ["sample_step", "accuracy", "global_step", "loss"]
-        test_nodes = [[self.summary]*self.FLAGS.number_walkers]
+        if self.FLAGS.summaries_path is not None:
+            test_nodes = [self.summary]*self.FLAGS.number_walkers
+        else:
+            test_nodes = []
         for item in list_of_nodes:
             test_nodes.append([self.nn[walker_index].get(item) \
                                for walker_index in range(self.FLAGS.number_walkers)])
@@ -889,6 +894,10 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
                 assert(check_accepted[walker_index] == 0)
                 assert(check_rejected[walker_index] == 0)
 
+        # prepare summaries for TensorBoard
+        if self.FLAGS.summaries_path is not None:
+            summary_writer = tf.summary.FileWriter(self.FLAGS.summaries_path, self.sess.graph)
+
         logging.info("Starting to sample")
         logging.info_intervals = max(1, int(self.FLAGS.max_steps / 100))
         last_time = time.process_time()
@@ -980,8 +989,12 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
             # or
             #   tf.run([loss_eval, train_step], ...)
             # is not important. Only a subsequent, distinct tf.run() call would produce a different loss_eval.
-            summary, _, acc, global_step, loss_eval = \
-                self.sess.run(test_nodes, feed_dict=feed_dict)
+            if self.FLAGS.summaries_path is not None:
+                summary, _, acc, global_step, loss_eval = \
+                    self.sess.run(test_nodes, feed_dict=feed_dict)
+            else:
+                _, acc, global_step, loss_eval = \
+                    self.sess.run(test_nodes, feed_dict=feed_dict)
 
             if self.FLAGS.sampler in ["StochasticGradientLangevinDynamics",
                                       "GeometricLangevinAlgorithm_1stOrder",
@@ -1012,6 +1025,12 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
                             self.static_vars["virials"],
                             self.static_vars["noise"]])
 
+            if self.FLAGS.summaries_path is not None:
+                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
+                run_metadata = tf.RunMetadata()
+                summary_writer.add_run_metadata(run_metadata, 'step%d' % current_step)
+                summary_writer.add_summary(summary, current_step)
+
             for walker_index in range(self.FLAGS.number_walkers):
                 if current_step >= self.FLAGS.burn_in_steps:
                     accumulated_loss_nominator[walker_index] += loss_eval[walker_index] * exp(- self.FLAGS.inverse_temperature * loss_eval[walker_index])
@@ -1168,6 +1187,10 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
 
         logging.info("SAMPLED.")
 
+        # close summaries file
+        if self.FLAGS.summaries_path is not None:
+            summary_writer.close()
+
         return run_info, trajectory, averages
 
     def _create_default_feed_dict_with_constants(self, walker_index=0):
@@ -1229,9 +1252,13 @@ def train(self, walker_index=0, return_run_info = False, return_trajectories = F
         assert( walker_index < self.FLAGS.number_walkers)
 
         placeholder_nodes = self.nn[walker_index].get_dict_of_nodes(["learning_rate", "y_"])
-        test_nodes = [self.summary]+self.nn[walker_index].get_list_of_nodes(
+        if self.FLAGS.summaries_path is not None:
+            test_nodes = [self.summary]
+        else:
+            test_nodes = []
+        test_nodes.extend(self.nn[walker_index].get_list_of_nodes(
             ["train_step", "accuracy", "global_step", "loss", "y_", "y"]) \
-                     +[self.static_vars["gradients"]]
+                     +[self.static_vars["gradients"]])
 
         output_width = 8
         output_precision = 8
@@ -1269,8 +1296,13 @@ def train(self, walker_index=0, return_run_info = False, return_trajectories = F
                 np.zeros((steps, no_params)),
                 columns=header)
 
+
         default_feed_dict = self._create_default_feed_dict_with_constants(walker_index)
 
+        # prepare summaries for TensorBoard
+        if self.FLAGS.summaries_path is not None:
+            summary_writer = tf.summary.FileWriter(self.FLAGS.summaries_path, self.sess.graph)
+
         logging.info("Starting to train")
         last_time = time.process_time()
         elapsed_time = 0
@@ -1306,14 +1338,24 @@ def train(self, walker_index=0, return_run_info = False, return_trajectories = F
                     weights_eval = self.weights[walker_index].evaluate(self.sess)
                     biases_eval = self.biases[walker_index].evaluate(self.sess)
 
-            summary, _, acc, global_step, loss_eval, y_true_eval, y_eval, scaled_grad = \
-                self.sess.run(test_nodes, feed_dict=feed_dict)
+            if self.FLAGS.summaries_path is not None:
+                summary, _, acc, global_step, loss_eval, y_true_eval, y_eval, scaled_grad = \
+                    self.sess.run(test_nodes, feed_dict=feed_dict)
+            else:
+                _, acc, global_step, loss_eval, y_true_eval, y_eval, scaled_grad = \
+                    self.sess.run(test_nodes, feed_dict=feed_dict)
 
             gradients, virials = self.sess.run([self.static_vars["gradients"][walker_index],
                                                 self.static_vars["virials"][walker_index]])
             if current_step >= self.FLAGS.burn_in_steps:
                 accumulated_virials += virials
 
+            if self.FLAGS.summaries_path is not None:
+                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
+                run_metadata = tf.RunMetadata()
+                summary_writer.add_run_metadata(run_metadata, 'step%d' % current_step)
+                summary_writer.add_summary(summary, current_step)
+
             if current_step % self.FLAGS.every_nth == 0:
                 current_time = time.process_time()
                 time_elapsed_per_nth_step = current_time - last_time
@@ -1377,6 +1419,10 @@ def train(self, walker_index=0, return_run_info = False, return_trajectories = F
             #logging.debug('y at step %s: %s' % (i, str(y_eval[0:9].transpose())))
         logging.info("TRAINED down to loss %s and accuracy %s." % (loss_eval, acc))
 
+        # close summaries file
+        if self.FLAGS.summaries_path is not None:
+            summary_writer.close()
+
         return run_info, trajectory, averages
 
     def compute_optimal_stepwidth(self, walker_index=0):
diff --git a/src/TATi/models/neuralnetwork.py b/src/TATi/models/neuralnetwork.py
@@ -148,9 +148,6 @@ def create(self, input_layer,
         self.add_losses(y, labels)
         loss = self.set_loss_function(loss_name)
 
-        merged = tf.summary.merge_all()  # Merge all the summaries
-        self.summary_nodes['merged'] = merged
-
         return loss
 
     @staticmethod
diff --git a/tests/regression/DataDrivenSampler/Options/testsuite-datadrivensampler-options-summaries_path.at b/tests/regression/DataDrivenSampler/Options/testsuite-datadrivensampler-options-summaries_path.at
@@ -0,0 +1,43 @@
+#
+#    ThermodynamicAnalyticsToolkit - explore high-dimensional manifold of neural networks
+#    Copyright (C) 2018 The University of Edinburgh
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+### test on option summaries_path
+
+AT_SETUP([Sampler Options - writing summaries])
+AT_KEYWORDS([options tatisampler summaries_path])
+
+# dataset prepared as follows:
+# TATiDatasetWriter --data_type 2 --dimension 10 --seed 426 --noise 0. --train_test_ratio 0 --test_data_file dataset-twoclusters.csv
+
+AT_CHECK([../../../TATiSampler \
+    --batch_data_files ${abs_top_srcdir}/tests/regression/DataDrivenSampler/Options/pre/dataset-twoclusters.csv \
+    --batch_size 10 \
+	--friction_constant 1 \
+	--inverse_temperature 1 \
+	--max_steps 20 \
+	--run_file run-step_width_1e-2.csv \
+	--sampler GeometricLangevinAlgorithm_1stOrder\
+	--seed 426 \
+	--sql_db sqlite.db \
+	--step_width 1e-2 \
+	--summaries_path `pwd`/summaries
+], 0, [stdout], [stderr])
+
+AT_CHECK([test -d summaries], 0, [ignore], [ignore])
+
+AT_CLEANUP
+
diff --git a/tests/regression/DataDrivenSampler/testsuite-datadrivensampler.at b/tests/regression/DataDrivenSampler/testsuite-datadrivensampler.at
@@ -48,6 +48,7 @@ m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-invers
 m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-progress.at])
 m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-sql_db.at])
 m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-step_width.at])
+m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-summaries_path.at])
 
 # tests on saving and restoring
 m4_include([DataDrivenSampler/SavingRestoring/testsuite-datadrivensampler-saving_restoring.at])
diff --git a/tests/regression/DataOptimizer/Options/testsuite-dataoptimizer-options-summaries_path.at b/tests/regression/DataOptimizer/Options/testsuite-dataoptimizer-options-summaries_path.at
@@ -0,0 +1,41 @@
+#
+#    ThermodynamicAnalyticsToolkit - explore high-dimensional manifold of neural networks
+#    Copyright (C) 2018 The University of Edinburgh
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+### test on option summaries_path
+
+AT_SETUP([Optimizer Options - writing summaries])
+AT_KEYWORDS([tatioptimizer summaries_path])
+
+# dataset prepared as follows:
+# TATiDatasetWriter --data_type 2 --dimension 10 --seed 426 --noise 0. --train_test_ratio 0 --test_data_file dataset-twoclusters.csv
+
+AT_CHECK([../../../TATiOptimizer \
+    --batch_data_files ${abs_top_srcdir}/tests/regression/DataOptimizer/Options/pre/dataset-twoclusters.csv \
+    --batch_size 10 \
+	--every_nth 1 \
+	--max_steps 20 \
+	--optimizer GradientDescent \
+	--run_file run-step_width_1e-1.csv \
+	--seed 426 \
+	--sql_db sqlite.db \
+	--step_width 1e-1 \
+	--summaries_path `pwd`/summaries
+], 0, [stdout], [stderr])
+
+AT_CHECK([test -d summaries], 0, [ignore], [ignore])
+
+AT_CLEANUP
diff --git a/tests/regression/DataOptimizer/testsuite-dataoptimizer.at b/tests/regression/DataOptimizer/testsuite-dataoptimizer.at
@@ -36,6 +36,7 @@ m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-every_nth.at])
 m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-progress.at])
 m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-sql_db.at])
 m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-step_width.at])
+m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-summaries_path.at])
 
 # Saving and restoring models
 m4_include([DataOptimizer/SavingRestoring/testsuite-dataoptimizer-saving_restoring.at])
diff --git a/tests/regression/Makefile.am b/tests/regression/Makefile.am
@@ -29,6 +29,7 @@ TESTSCRIPTS += \
 	$(srcdir)/DataDrivenSampler/Options/testsuite-datadrivensampler-options-progress.at \
 	$(srcdir)/DataDrivenSampler/Options/testsuite-datadrivensampler-options-sql_db.at \
 	$(srcdir)/DataDrivenSampler/Options/testsuite-datadrivensampler-options-step_width.at \
+	$(srcdir)/DataDrivenSampler/Options/testsuite-datadrivensampler-options-summaries_path.at \
 	$(srcdir)/DataDrivenSampler/Pipelines/testsuite-datadrivensampler-pipelines.at \
 	$(srcdir)/DataDrivenSampler/Prior/testsuite-datadrivensampler-prior.at \
 	$(srcdir)/DataDrivenSampler/Prior/testsuite-datadrivensampler-prior-lower_boundary.at \
@@ -68,6 +69,7 @@ TESTSCRIPTS += \
 	$(srcdir)/DataOptimizer/Options/testsuite-dataoptimizer-options-progress.at \
 	$(srcdir)/DataOptimizer/Options/testsuite-dataoptimizer-options-sql_db.at \
 	$(srcdir)/DataOptimizer/Options/testsuite-dataoptimizer-options-step_width.at \
+	$(srcdir)/DataOptimizer/Options/testsuite-dataoptimizer-options-summaries_path.at \
 	$(srcdir)/DataOptimizer/Prior/testsuite-dataoptimizer-prior.at \
 	$(srcdir)/DataOptimizer/Prior/testsuite-dataoptimizer-prior-lower_upper_boundary.at \
 	$(srcdir)/DataOptimizer/Prior/testsuite-dataoptimizer-prior-tethering.at \
diff --git a/tests/tensorflow/pre/train_MNIST_pipeline_in_graph.py b/tests/tensorflow/pre/train_MNIST_pipeline_in_graph.py
@@ -76,7 +76,7 @@
 		virials_t = tf.get_variable("virials", dtype=dds_basetype)
 		zero_virials = virials_t.assign(0.)
 
-train_nodes = nn.nn[0].get_list_of_nodes(["merged", "train_step", "accuracy", "global_step","loss"])
+train_nodes = nn.nn[0].get_list_of_nodes(["train_step", "accuracy", "global_step","loss"])
 
 with open(params.output_file, "w") as of:
 	of.write("step,loss,accuracy,gradients\n")
@@ -100,7 +100,7 @@
 		check_gradients, check_virials = nn.sess.run([zero_gradients, zero_virials])
 		assert (abs(check_gradients) < 1e-10)
 		assert (abs(check_virials) < 1e-10)
-		summary, _, acc, global_step, loss_eval = \
+		_, acc, global_step, loss_eval = \
 			nn.sess.run(train_nodes, feed_dict=feed_dict)
 
 		# evaluate loss and gradients accurately