ruidan · americast · Aug 24, 2019 · Aug 24, 2019 · Aug 24, 2019 · Aug 24, 2019
diff --git a/README.md b/README.md
@@ -6,15 +6,15 @@ You can find the pre-processed datasets and the pre-trained word embeddings in [
 
 You can also download the original datasets of Restaurant domain and Beer domain in [[Download]](https://drive.google.com/open?id=1qzbTiJ2IL5ATZYNMp2DRkHvbFYsnOVAQ). For preprocessing, put the decompressed zip file in the main folder and run 
 ```
-python word2vec.py
-python preprocess.py
+python3 word2vec.py
+python3 preprocess.py
 ```
 respectively in code/ . The preprocessed files and trained word embeddings for each domain will be saved in a folder preprocessed_data/.
 
 ## Train
 Under code/ and type the following command for training:
 ```
-THEANO_FLAGS="device=gpu0,floatX=float32" python train.py \
+KERAS_BACKEND=theano THEANO_FLAGS="device=gpu0,floatX=float32" python train.py \
 --emb ../preprocessed_data/$domain/w2v_embedding \
 --domain $domain \
 -o output_dir \
@@ -26,7 +26,7 @@ After training, two output files will be saved in code/output_dir/$domain/: 1) *
 ## Evaluation
 Under code/ and type the following command:
 ```
-THEANO_FLAGS="device=gpu0,floatX=float32" python evaluation.py \
+THEANO_FLAGS="device=gpu0,floatX=float32" python3 evaluation.py \
 --domain $domain \
 -o output_dir \
 ```
@@ -40,7 +40,7 @@ One example of trained model for the restaurant domain has been put in pre_train
 
 ## Dependencies
 
-python 2
+python 3
 
 * keras 1.2.1
 * theano 0.9.0

diff --git a/code/preprocess.py b/code/preprocess.py
@@ -39,12 +39,12 @@ def preprocess_test(domain):
             out2.write(label+'\n')
 
 def preprocess(domain):
-    print '\t'+domain+' train set ...'
+    print( '\t'+domain+' train set ...')
     preprocess_train(domain)
-    print '\t'+domain+' test set ...'
+    print( '\t'+domain+' test set ...')
     preprocess_test(domain)
 
-print 'Preprocessing raw review sentences ...'
+print( 'Preprocessing raw review sentences ...')
 preprocess('restaurant')
 preprocess('beer')
 

diff --git a/code/reader.py b/code/reader.py
@@ -30,7 +30,7 @@ def create_vocab(domain, maxlen=0, vocab_size=0):
                     word_freqs[w] = 1
                 total_words += 1
 
-    print ('   %i total words, %i unique words' % (total_words, unique_words))
+    print( ('   %i total words, %i unique words' % (total_words, unique_words)))
     sorted_word_freqs = sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
 
     vocab = {'<pad>':0, '<unk>':1, '<num>':2}
@@ -41,7 +41,7 @@ def create_vocab(domain, maxlen=0, vocab_size=0):
         if vocab_size > 0 and index > vocab_size + 2:
             break
     if vocab_size > 0:
-        print ('  keep the top %i words' % vocab_size)
+        print( ('  keep the top %i words' % vocab_size))
 
     #Write (vocab, frequence) to a txt file
     vocab_file = codecs.open('../preprocessed_data/%s/vocab' % domain, mode='w', encoding='utf8')
@@ -86,19 +86,19 @@ def read_dataset(domain, phase, vocab, maxlen):
         if maxlen_x < len(indices):
             maxlen_x = len(indices)
 
-    print '   <num> hit rate: %.2f%%, <unk> hit rate: %.2f%%' % (100*num_hit/total, 100*unk_hit/total)
+    print( '   <num> hit rate: %.2f%%, <unk> hit rate: %.2f%%' % (100*num_hit/total, 100*unk_hit/total))
     return data_x, maxlen_x
 
 
 
 def get_data(domain, vocab_size=0, maxlen=0):
-    print 'Reading data from', domain
-    print ' Creating vocab ...'
+    print( 'Reading data from', domain)
+    print( ' Creating vocab ...')
     vocab = create_vocab(domain, maxlen, vocab_size)
-    print ' Reading dataset ...'
-    print '  train set'
+    print( ' Reading dataset ...')
+    print( '  train set')
     train_x, train_maxlen = read_dataset(domain, 'train', vocab, maxlen)
-    print '  test set'
+    print( '  test set')
     test_x, test_maxlen = read_dataset(domain, 'test', vocab, maxlen)
     maxlen = max(train_maxlen, test_maxlen)
     return vocab, train_x, test_x, maxlen
@@ -107,7 +107,7 @@ def get_data(domain, vocab_size=0, maxlen=0):
 
 if __name__ == "__main__":
     vocab, train_x, test_x, maxlen = get_data('restaurant')
-    print len(train_x)
-    print len(test_x)
-    print maxlen
+    print( len(train_x))
+    print( len(test_x))
+    print( maxlen)
 
diff --git a/code/run_script.sh b/code/run_script.sh
@@ -1,6 +1,4 @@
-
-
-THEANO_FLAGS="device=gpu0,floatX=float32" python train.py \
+KERAS_BACKEND=theano THEANO_FLAGS="device=gpu0,floatX=float32" python3 train.py \
 --emb ../preprocessed_data/restaurant/w2v_embedding \
 --domain restaurant \
 -o output_dir \

diff --git a/code/train.py b/code/train.py
@@ -54,8 +54,8 @@
 train_x = sequence.pad_sequences(train_x, maxlen=overall_maxlen)
 test_x = sequence.pad_sequences(test_x, maxlen=overall_maxlen)
 
-print 'Number of training examples: ', len(train_x)
-print 'Length of vocab: ', len(vocab)
+print ('Number of training examples: ', len(train_x))
+print ('Length of vocab: ', len(vocab))
 
 def sentence_batch_generator(data, batch_size):
     n_batch = len(data) / batch_size
@@ -129,13 +129,13 @@ def max_margin_loss(y_true, y_pred):
 batches_per_epoch = 1000
 
 min_loss = float('inf')
-for ii in xrange(args.epochs):
+for ii in range(args.epochs):
     t0 = time()
     loss, max_margin_loss = 0., 0.
 
-    for b in tqdm(xrange(batches_per_epoch)):
-        sen_input = sen_gen.next()
-        neg_input = neg_gen.next()
+    for b in tqdm(range(batches_per_epoch)):
+        sen_input = next(sen_gen)
+        neg_input = next(neg_gen)
 
         batch_loss, batch_max_margin_loss = model.train_on_batch([sen_input, neg_input], np.ones((args.batch_size, 1)))
         loss += batch_loss / batches_per_epoch
@@ -157,8 +157,8 @@ def max_margin_loss(y_true, y_pred):
             sims = word_emb.dot(desc.T)
             ordered_words = np.argsort(sims)[::-1]
             desc_list = [vocab_inv[w] for w in ordered_words[:100]]
-            print 'Aspect %d:' % ind
-            print desc_list
+            print ('Aspect %d:' % ind)
+            print (desc_list)
             aspect_file.write('Aspect %d:\n' % ind)
             aspect_file.write(' '.join(desc_list) + '\n\n')
 

diff --git a/code/w2vEmbReader.py b/code/w2vEmbReader.py
@@ -41,7 +41,7 @@ def get_emb_given_word(self, word):
 
     def get_emb_matrix_given_vocab(self, vocab, emb_matrix):
         counter = 0.
-        for word, index in vocab.iteritems():
+        for word, index in vocab.items():
             try:
                 emb_matrix[index] = self.embeddings[word]
                 counter += 1

diff --git a/code/word2vec.py b/code/word2vec.py
@@ -18,7 +18,7 @@ def main(domain):
     model.save(model_file)
 
 
-print 'Pre-training word embeddings ...'
+print( 'Pre-training word embeddings ...')
 main('restaurant')
 main('beer')
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 keras==1.2.1
 theano==0.9.0
-numpy==1.13.3
+numpy==1.17.0
 scikit-learn
 gensim==0.12.4
 tqdm