Skip to content

Commit 3f2d551

Browse files
committed
Writing of summaries for TensorBoard is now activated through summaries_path.
- also avoided summary evaluation when not requested. This is quite a gain in performance. - this holds for both sample() and train(). - TESTS: Added regression test on summaries_path for TATiSampler and TATiOptimizer. - DOCU: Added explanation for summaries_path to userguide.
1 parent db21f0f commit 3f2d551

11 files changed

+168
-12
lines changed

doc/userguide/userguide.xml

+20
Original file line numberDiff line numberDiff line change
@@ -1794,6 +1794,26 @@ sys.path.insert(1,"&lt;path_to_TATi&gt;/lib/python3.5/site-packages/")</programl
17941794
an estimate of the remaining run time is given.</command></note>
17951795
</section>
17961796

1797+
<section xml:id="reference.miscellaneous.summaries">
1798+
<title>Tensorflow summaries</title>
1799+
<para>Tensorflow delivers a powerful instrument for inspecting the inner
1800+
workings of its computational graph: TensorBoard.</para>
1801+
<para>This tool allows also to inspect values such as the activation
1802+
histogram, the loss and accuracy and many other parameters and
1803+
values internal to TATi.</para>
1804+
<para>Supplying a path "/foo/bar" present in the file system using the
1805+
<command>summaries_path</command> variable, summaries are
1806+
automatically written to the path and can be inspected with the following
1807+
call to tensorboard.</para>
1808+
<programlisting>tensorboard --logdir /foo/bar</programlisting>
1809+
<para>The tensorboard essentially comprises a webserver for rendering
1810+
the nodes of the graph and figures of the inspected values inside a
1811+
webpage. On execution it provides a URL that needs to be entered in any
1812+
webbrowser to access the webpage.</para>
1813+
<note>The accumulation and writing of the summaries has quite an impact
1814+
on TATi's overall performance and is therefore swicthed off by default.</note>
1815+
</section>
1816+
17971817
</section>
17981818
</chapter>
17991819

src/TATi/common.py

+2
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ def add_model_options_to_parser(parser):
182182
help='Step(s) to parse from parse_parameters_file assuming multiple are present')
183183
parser.add_argument('--seed', type=int, default=None,
184184
help='Seed to use for random number generators.')
185+
parser.add_argument('--summaries_path', type=str, default=None,
186+
help='path to write TensorBoard summaries to')
185187

186188

187189
def add_common_options_to_parser(parser):

src/TATi/models/mock_flags.py

+3
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def __init__(self,
4949
sigma=1.,
5050
sigmaA=1.,
5151
step_width=0.03,
52+
summaries_path=None,
5253
trajectory_file=None,
5354
use_reweighting=False,
5455
verbose=0
@@ -101,6 +102,7 @@ def __init__(self,
101102
:param sigma: Scale of noise injected to momentum per step for CCaDL.
102103
:param sigmaA: Scale of noise in convex combination for CCaDL.
103104
:param step_width: step width \Delta t to use, e.g. 0.01
105+
:param summaries_path: path to write summaries (for TensorBoard) to
104106
:param trajectory_file: CSV file name to output trajectories of sampling, i.e. weights and evaluated loss function.
105107
:param use_reweighting:
106108
:param verbose: how much (debugging) information to print
@@ -150,6 +152,7 @@ def __init__(self,
150152
self.sigma = sigma
151153
self.sigmaA = sigmaA
152154
self.step_width = step_width
155+
self.summaries_path = summaries_path
153156
self.trajectory_file = trajectory_file
154157
self.use_reweighting = use_reweighting
155158
self.verbose = verbose

src/TATi/models/model.py

+53-7
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ def setup_parameters(
314314
sigma=1.,
315315
sigmaA=1.,
316316
step_width=0.03,
317+
summaries_path=None,
317318
trajectory_file=None,
318319
use_reweighting=False,
319320
verbose=0):
@@ -361,6 +362,7 @@ def setup_parameters(
361362
sigma=sigma,
362363
sigmaA=sigmaA,
363364
step_width=step_width,
365+
summaries_path=summaries_path,
364366
trajectory_file=trajectory_file,
365367
use_reweighting=use_reweighting,
366368
verbose=verbose)
@@ -790,7 +792,10 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
790792
for walker_index in range(self.FLAGS.number_walkers)]
791793

792794
list_of_nodes = ["sample_step", "accuracy", "global_step", "loss"]
793-
test_nodes = [[self.summary]*self.FLAGS.number_walkers]
795+
if self.FLAGS.summaries_path is not None:
796+
test_nodes = [self.summary]*self.FLAGS.number_walkers
797+
else:
798+
test_nodes = []
794799
for item in list_of_nodes:
795800
test_nodes.append([self.nn[walker_index].get(item) \
796801
for walker_index in range(self.FLAGS.number_walkers)])
@@ -889,6 +894,10 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
889894
assert(check_accepted[walker_index] == 0)
890895
assert(check_rejected[walker_index] == 0)
891896

897+
# prepare summaries for TensorBoard
898+
if self.FLAGS.summaries_path is not None:
899+
summary_writer = tf.summary.FileWriter(self.FLAGS.summaries_path, self.sess.graph)
900+
892901
logging.info("Starting to sample")
893902
logging.info_intervals = max(1, int(self.FLAGS.max_steps / 100))
894903
last_time = time.process_time()
@@ -980,8 +989,12 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
980989
# or
981990
# tf.run([loss_eval, train_step], ...)
982991
# is not important. Only a subsequent, distinct tf.run() call would produce a different loss_eval.
983-
summary, _, acc, global_step, loss_eval = \
984-
self.sess.run(test_nodes, feed_dict=feed_dict)
992+
if self.FLAGS.summaries_path is not None:
993+
summary, _, acc, global_step, loss_eval = \
994+
self.sess.run(test_nodes, feed_dict=feed_dict)
995+
else:
996+
_, acc, global_step, loss_eval = \
997+
self.sess.run(test_nodes, feed_dict=feed_dict)
985998

986999
if self.FLAGS.sampler in ["StochasticGradientLangevinDynamics",
9871000
"GeometricLangevinAlgorithm_1stOrder",
@@ -1012,6 +1025,12 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
10121025
self.static_vars["virials"],
10131026
self.static_vars["noise"]])
10141027

1028+
if self.FLAGS.summaries_path is not None:
1029+
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
1030+
run_metadata = tf.RunMetadata()
1031+
summary_writer.add_run_metadata(run_metadata, 'step%d' % current_step)
1032+
summary_writer.add_summary(summary, current_step)
1033+
10151034
for walker_index in range(self.FLAGS.number_walkers):
10161035
if current_step >= self.FLAGS.burn_in_steps:
10171036
accumulated_loss_nominator[walker_index] += loss_eval[walker_index] * exp(- self.FLAGS.inverse_temperature * loss_eval[walker_index])
@@ -1168,6 +1187,10 @@ def sample(self, return_run_info = False, return_trajectories = False, return_av
11681187

11691188
logging.info("SAMPLED.")
11701189

1190+
# close summaries file
1191+
if self.FLAGS.summaries_path is not None:
1192+
summary_writer.close()
1193+
11711194
return run_info, trajectory, averages
11721195

11731196
def _create_default_feed_dict_with_constants(self, walker_index=0):
@@ -1229,9 +1252,13 @@ def train(self, walker_index=0, return_run_info = False, return_trajectories = F
12291252
assert( walker_index < self.FLAGS.number_walkers)
12301253

12311254
placeholder_nodes = self.nn[walker_index].get_dict_of_nodes(["learning_rate", "y_"])
1232-
test_nodes = [self.summary]+self.nn[walker_index].get_list_of_nodes(
1255+
if self.FLAGS.summaries_path is not None:
1256+
test_nodes = [self.summary]
1257+
else:
1258+
test_nodes = []
1259+
test_nodes.extend(self.nn[walker_index].get_list_of_nodes(
12331260
["train_step", "accuracy", "global_step", "loss", "y_", "y"]) \
1234-
+[self.static_vars["gradients"]]
1261+
+[self.static_vars["gradients"]])
12351262

12361263
output_width = 8
12371264
output_precision = 8
@@ -1269,8 +1296,13 @@ def train(self, walker_index=0, return_run_info = False, return_trajectories = F
12691296
np.zeros((steps, no_params)),
12701297
columns=header)
12711298

1299+
12721300
default_feed_dict = self._create_default_feed_dict_with_constants(walker_index)
12731301

1302+
# prepare summaries for TensorBoard
1303+
if self.FLAGS.summaries_path is not None:
1304+
summary_writer = tf.summary.FileWriter(self.FLAGS.summaries_path, self.sess.graph)
1305+
12741306
logging.info("Starting to train")
12751307
last_time = time.process_time()
12761308
elapsed_time = 0
@@ -1306,14 +1338,24 @@ def train(self, walker_index=0, return_run_info = False, return_trajectories = F
13061338
weights_eval = self.weights[walker_index].evaluate(self.sess)
13071339
biases_eval = self.biases[walker_index].evaluate(self.sess)
13081340

1309-
summary, _, acc, global_step, loss_eval, y_true_eval, y_eval, scaled_grad = \
1310-
self.sess.run(test_nodes, feed_dict=feed_dict)
1341+
if self.FLAGS.summaries_path is not None:
1342+
summary, _, acc, global_step, loss_eval, y_true_eval, y_eval, scaled_grad = \
1343+
self.sess.run(test_nodes, feed_dict=feed_dict)
1344+
else:
1345+
_, acc, global_step, loss_eval, y_true_eval, y_eval, scaled_grad = \
1346+
self.sess.run(test_nodes, feed_dict=feed_dict)
13111347

13121348
gradients, virials = self.sess.run([self.static_vars["gradients"][walker_index],
13131349
self.static_vars["virials"][walker_index]])
13141350
if current_step >= self.FLAGS.burn_in_steps:
13151351
accumulated_virials += virials
13161352

1353+
if self.FLAGS.summaries_path is not None:
1354+
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
1355+
run_metadata = tf.RunMetadata()
1356+
summary_writer.add_run_metadata(run_metadata, 'step%d' % current_step)
1357+
summary_writer.add_summary(summary, current_step)
1358+
13171359
if current_step % self.FLAGS.every_nth == 0:
13181360
current_time = time.process_time()
13191361
time_elapsed_per_nth_step = current_time - last_time
@@ -1377,6 +1419,10 @@ def train(self, walker_index=0, return_run_info = False, return_trajectories = F
13771419
#logging.debug('y at step %s: %s' % (i, str(y_eval[0:9].transpose())))
13781420
logging.info("TRAINED down to loss %s and accuracy %s." % (loss_eval, acc))
13791421

1422+
# close summaries file
1423+
if self.FLAGS.summaries_path is not None:
1424+
summary_writer.close()
1425+
13801426
return run_info, trajectory, averages
13811427

13821428
def compute_optimal_stepwidth(self, walker_index=0):

src/TATi/models/neuralnetwork.py

-3
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,6 @@ def create(self, input_layer,
148148
self.add_losses(y, labels)
149149
loss = self.set_loss_function(loss_name)
150150

151-
merged = tf.summary.merge_all() # Merge all the summaries
152-
self.summary_nodes['merged'] = merged
153-
154151
return loss
155152

156153
@staticmethod
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#
2+
# ThermodynamicAnalyticsToolkit - explore high-dimensional manifold of neural networks
3+
# Copyright (C) 2018 The University of Edinburgh
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
#
18+
### test on option summaries_path
19+
20+
AT_SETUP([Sampler Options - writing summaries])
21+
AT_KEYWORDS([options tatisampler summaries_path])
22+
23+
# dataset prepared as follows:
24+
# TATiDatasetWriter --data_type 2 --dimension 10 --seed 426 --noise 0. --train_test_ratio 0 --test_data_file dataset-twoclusters.csv
25+
26+
AT_CHECK([../../../TATiSampler \
27+
--batch_data_files ${abs_top_srcdir}/tests/regression/DataDrivenSampler/Options/pre/dataset-twoclusters.csv \
28+
--batch_size 10 \
29+
--friction_constant 1 \
30+
--inverse_temperature 1 \
31+
--max_steps 20 \
32+
--run_file run-step_width_1e-2.csv \
33+
--sampler GeometricLangevinAlgorithm_1stOrder\
34+
--seed 426 \
35+
--sql_db sqlite.db \
36+
--step_width 1e-2 \
37+
--summaries_path `pwd`/summaries
38+
], 0, [stdout], [stderr])
39+
40+
AT_CHECK([test -d summaries], 0, [ignore], [ignore])
41+
42+
AT_CLEANUP
43+

tests/regression/DataDrivenSampler/testsuite-datadrivensampler.at

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-invers
4848
m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-progress.at])
4949
m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-sql_db.at])
5050
m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-step_width.at])
51+
m4_include([DataDrivenSampler/Options/testsuite-datadrivensampler-options-summaries_path.at])
5152

5253
# tests on saving and restoring
5354
m4_include([DataDrivenSampler/SavingRestoring/testsuite-datadrivensampler-saving_restoring.at])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#
2+
# ThermodynamicAnalyticsToolkit - explore high-dimensional manifold of neural networks
3+
# Copyright (C) 2018 The University of Edinburgh
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
#
18+
### test on option summaries_path
19+
20+
AT_SETUP([Optimizer Options - writing summaries])
21+
AT_KEYWORDS([tatioptimizer summaries_path])
22+
23+
# dataset prepared as follows:
24+
# TATiDatasetWriter --data_type 2 --dimension 10 --seed 426 --noise 0. --train_test_ratio 0 --test_data_file dataset-twoclusters.csv
25+
26+
AT_CHECK([../../../TATiOptimizer \
27+
--batch_data_files ${abs_top_srcdir}/tests/regression/DataOptimizer/Options/pre/dataset-twoclusters.csv \
28+
--batch_size 10 \
29+
--every_nth 1 \
30+
--max_steps 20 \
31+
--optimizer GradientDescent \
32+
--run_file run-step_width_1e-1.csv \
33+
--seed 426 \
34+
--sql_db sqlite.db \
35+
--step_width 1e-1 \
36+
--summaries_path `pwd`/summaries
37+
], 0, [stdout], [stderr])
38+
39+
AT_CHECK([test -d summaries], 0, [ignore], [ignore])
40+
41+
AT_CLEANUP

tests/regression/DataOptimizer/testsuite-dataoptimizer.at

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-every_nth.at])
3636
m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-progress.at])
3737
m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-sql_db.at])
3838
m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-step_width.at])
39+
m4_include([DataOptimizer/Options/testsuite-dataoptimizer-options-summaries_path.at])
3940

4041
# Saving and restoring models
4142
m4_include([DataOptimizer/SavingRestoring/testsuite-dataoptimizer-saving_restoring.at])

tests/regression/Makefile.am

+2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ TESTSCRIPTS += \
2929
$(srcdir)/DataDrivenSampler/Options/testsuite-datadrivensampler-options-progress.at \
3030
$(srcdir)/DataDrivenSampler/Options/testsuite-datadrivensampler-options-sql_db.at \
3131
$(srcdir)/DataDrivenSampler/Options/testsuite-datadrivensampler-options-step_width.at \
32+
$(srcdir)/DataDrivenSampler/Options/testsuite-datadrivensampler-options-summaries_path.at \
3233
$(srcdir)/DataDrivenSampler/Pipelines/testsuite-datadrivensampler-pipelines.at \
3334
$(srcdir)/DataDrivenSampler/Prior/testsuite-datadrivensampler-prior.at \
3435
$(srcdir)/DataDrivenSampler/Prior/testsuite-datadrivensampler-prior-lower_boundary.at \
@@ -68,6 +69,7 @@ TESTSCRIPTS += \
6869
$(srcdir)/DataOptimizer/Options/testsuite-dataoptimizer-options-progress.at \
6970
$(srcdir)/DataOptimizer/Options/testsuite-dataoptimizer-options-sql_db.at \
7071
$(srcdir)/DataOptimizer/Options/testsuite-dataoptimizer-options-step_width.at \
72+
$(srcdir)/DataOptimizer/Options/testsuite-dataoptimizer-options-summaries_path.at \
7173
$(srcdir)/DataOptimizer/Prior/testsuite-dataoptimizer-prior.at \
7274
$(srcdir)/DataOptimizer/Prior/testsuite-dataoptimizer-prior-lower_upper_boundary.at \
7375
$(srcdir)/DataOptimizer/Prior/testsuite-dataoptimizer-prior-tethering.at \

tests/tensorflow/pre/train_MNIST_pipeline_in_graph.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
virials_t = tf.get_variable("virials", dtype=dds_basetype)
7777
zero_virials = virials_t.assign(0.)
7878

79-
train_nodes = nn.nn[0].get_list_of_nodes(["merged", "train_step", "accuracy", "global_step","loss"])
79+
train_nodes = nn.nn[0].get_list_of_nodes(["train_step", "accuracy", "global_step","loss"])
8080

8181
with open(params.output_file, "w") as of:
8282
of.write("step,loss,accuracy,gradients\n")
@@ -100,7 +100,7 @@
100100
check_gradients, check_virials = nn.sess.run([zero_gradients, zero_virials])
101101
assert (abs(check_gradients) < 1e-10)
102102
assert (abs(check_virials) < 1e-10)
103-
summary, _, acc, global_step, loss_eval = \
103+
_, acc, global_step, loss_eval = \
104104
nn.sess.run(train_nodes, feed_dict=feed_dict)
105105

106106
# evaluate loss and gradients accurately

0 commit comments

Comments
 (0)