diff --git a/key_value_memory/memn2n_kv.py b/key_value_memory/memn2n_kv.py index 7521f7e..7ea265f 100644 --- a/key_value_memory/memn2n_kv.py +++ b/key_value_memory/memn2n_kv.py @@ -33,7 +33,7 @@ def add_gradient_noise(t, stddev=1e-3, name=None): 0.001 was said to be a good fixed value for memory networks [2]. """ - with tf.op_scope([t, stddev], name, "add_gradient_noise") as name: + with tf.name_scope(name, "add_gradient_noise", [t, stddev]) as name: t = tf.convert_to_tensor(t, name="t") gn = tf.random_normal(tf.shape(t), stddev=stddev) return tf.add(t, gn, name=name) @@ -44,11 +44,11 @@ def zero_nil_slot(t, name=None): The nil_slot is a dummy slot and should not be trained and influence the training algorithm. """ - with tf.op_scope([t], name, "zero_nil_slot") as name: + with tf.name_scope(name, "zero_nil_slot", [t]) as name: t = tf.convert_to_tensor(t, name="t") s = tf.shape(t)[1] z = tf.zeros(tf.pack([1, s])) - return tf.concat(0, [z, tf.slice(t, [1, 0], [-1, -1])], name=name) + return tf.concat([z, tf.slice(t, [1, 0], [-1, -1])], 0, name=name) class MemN2N_KV(object): """Key Value Memory Network.""" @@ -120,10 +120,10 @@ def __init__(self, batch_size, vocab_size, # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): nil_word_slot = tf.zeros([1, embedding_size]) - self.W = tf.concat(0, [nil_word_slot, tf.get_variable('W', shape=[vocab_size-1, embedding_size], - initializer=tf.contrib.layers.xavier_initializer())]) - self.W_memory = tf.concat(0, [nil_word_slot, tf.get_variable('W_memory', shape=[vocab_size-1, embedding_size], - initializer=tf.contrib.layers.xavier_initializer())]) + self.W = tf.concat([nil_word_slot, tf.get_variable('W', shape=[vocab_size-1, embedding_size], + initializer=tf.contrib.layers.xavier_initializer())], 0) + self.W_memory = tf.concat([nil_word_slot, tf.get_variable('W_memory', shape=[vocab_size-1, embedding_size], + initializer=tf.contrib.layers.xavier_initializer())], 0) # self.W_memory = self.W self._nil_vars = set([self.W.name, self.W_memory.name]) # shape: [batch_size, query_size, embedding_size] @@ -186,7 +186,7 @@ def __init__(self, batch_size, vocab_size, #logits = tf.nn.dropout(tf.matmul(o, self.B) + logits_bias, self.keep_prob) probs = tf.nn.softmax(tf.cast(logits, tf.float32)) - cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, tf.cast(self._labels, tf.float32), name='cross_entropy') + cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf.cast(self._labels, tf.float32), name='cross_entropy') cross_entropy_sum = tf.reduce_sum(cross_entropy, name="cross_entropy_sum") # loss op diff --git a/key_value_memory/single.py b/key_value_memory/single.py index 739920b..2d674d8 100644 --- a/key_value_memory/single.py +++ b/key_value_memory/single.py @@ -4,10 +4,11 @@ from __future__ import print_function from data_utils import load_task, vectorize_data -from sklearn import cross_validation, metrics +from sklearn import model_selection, metrics from memn2n_kv import MemN2N_KV from itertools import chain from six.moves import range +from functools import reduce import tensorflow as tf import numpy as np @@ -44,7 +45,7 @@ word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) max_story_size = max(map(len, (s for s, _, _ in data))) -mean_story_size = int(np.mean(map(len, (s for s, _, _ in data)))) +mean_story_size = int(np.mean(list(map(len, (s for s, _, _ in data))))) sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data))) query_size = max(map(len, (q for _, q, _ in data))) memory_size = min(FLAGS.memory_size, max_story_size) @@ -57,7 +58,7 @@ # train/validation/test sets S, Q, A = vectorize_data(train, word_idx, sentence_size, memory_size) -trainS, valS, trainQ, valQ, trainA, valA = cross_validation.train_test_split(S, Q, A, test_size=.1) +trainS, valS, trainQ, valQ, trainA, valA = model_selection.train_test_split(S, Q, A, test_size=.1) testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, memory_size) print("Training set shape", trainS.shape) @@ -76,7 +77,7 @@ val_labels = np.argmax(valA, axis=1) batch_size = FLAGS.batch_size -batches = zip(range(0, n_train-batch_size, batch_size), range(batch_size, n_train, batch_size)) +batches = list(zip(range(0, n_train-batch_size, batch_size), range(batch_size, n_train, batch_size))) with tf.Graph().as_default(): session_conf = tf.ConfigProto( @@ -110,7 +111,7 @@ nil_grads_and_vars.append((g, v)) train_op = optimizer.apply_gradients(nil_grads_and_vars, name="train_op", global_step=global_step) - sess.run(tf.initialize_all_variables()) + sess.run(tf.global_variables_initializer()) def train_step(s, q, a): feed_dict = {