Merge branch 'master' into add-non-.py-files-to-MANIFEST

Peter Izsak · web-flow · commit 30172ad31daf · 2019-04-02T19:04:13.000+03:00
diff --git a/doc/source/absa.rst b/doc/source/absa.rst
@@ -153,8 +153,8 @@ Arguments:
 References
 ==========
 
-.. [1] `Simple and Accurate Dependency Parsing Using Bidirectional LSTM Feature Representations <https://transacl.org/ojs/index.php/tacl/article/view/885/198>`__, Kiperwasser, E., & Goldberg, Y, Transactions Of The Association For Computational Linguistics (2106), 4, 313-327.
-.. [2] `Opinion word expansion and target extraction through double propagation <https://dl.acm.org/citation.cfm?id=1970422>`__, Guang  Qiu,  Bing  Liu,  Jiajun  Bu,  and  Chun  Chen, In Computational Linguistics, volume 37(1).
-.. [3] `Mining and Summarizing Customer Reviews <http://dx.doi.org/10.1145/1014052.1014073>`__, Minqing Hu and Bing Liu, Proceedings of the tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2004), pp. 168-177, 2004.
+.. [1] `Simple and Accurate Dependency Parsing Using Bidirectional LSTM Feature Representations <https://transacl.org/ojs/index.php/tacl/article/view/885/198>`__, Eliyahu Kiperwasser and Yoav Goldberg. 2016. Transactions of the Association of Computational Linguistics, 4:313–327.
+.. [2] `Opinion Word Expansion and Target Extraction through Double Propagation <https://dl.acm.org/citation.cfm?id=1970422>`__, Guang Qiu, Bing Liu, Jiajun Bu, and Chun Chen. 2011. Computational Linguistics, 37(1): 9–27.
+.. [3] `Mining and Summarizing Customer Reviews <http://dx.doi.org/10.1145/1014052.1014073>`__, Minqing Hu and Bing Liu. 2004. In Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD ’04, pages 168–177.
 
 .. _Spacy: https://spacy.io
diff --git a/doc/source/absa_solution.rst b/doc/source/absa_solution.rst
@@ -31,7 +31,7 @@ a statistical analysis of sentiment towards specific aspects in the inference da
 
 The solution uses the opinion and aspect lexicons that are generated using NLP Architect's ABSA
 training model (see blue blocks in 'flow' diagram below). For more details regarding the training step
-see :doc:`ABSA <absa>`
+see :doc:`ABSA <absa>`.
 
 The solution encapsulates the ABSA inference model (green blocks) and adds on top of it a
 statistical analysis module (grey block) for calculating the amount of positive and negative sentiment
@@ -61,23 +61,24 @@ blue and opinion terms are colored in green/red:
 Solution execution
 ==================
 
-The solution execution is divided to two parts - training A and inference:
+The solution execution is divided to two parts - training and inference:
 
 Training
 ========
 
-See training under :doc:`ABSA <absa>`
+See training under :doc:`ABSA <absa>`.
 
 
 Inference
 =========
 
+
 Full code example is available at ``examples/absa/solution/absa_solution.py``.
 There are two training modes:
 
-1. Providing solution data in a raw text format. In this case the solution flow will
-apply the dependency parser to the data:
 
+**1.** Providing solution data in a raw text format. In this case the solution flow will
+apply the dependency parser to the data:
 
 .. code:: python
 
@@ -86,7 +87,7 @@ apply the dependency parser to the data:
             data='/path/to/text/file/or/directory')
 
 
-Arguments:
+**Arguments:**
 
 ``aspect_lex'=/path/to/aspects.csv'``  - path to aspect lexicon (csv file) that was produced by the training phase.
 aspect.csv may be manually edited for grouping alias aspect names (e.g. 'drinks' and 'beverages')
@@ -98,7 +99,7 @@ together. Simply copy all alias names to the same line in the csv file.
 separated by newlines or a single csv file containing one doc per line or a directory containing one raw
 text file per document.
 
-Notes:
+**Notes:**
 
 a. For demonstration purposes we provide a sample of tripadvisor.co.uk
 restaurants reviews under the `Creative Commons Attribution-Share-Alike 3.0 License <https://creativecommons.org/licenses/by-sa/3.0/>`__ (Copyright 2018 Wikimedia Foundation).
@@ -107,8 +108,8 @@ The dataset can is located at ``datasets/absa/tripadvisor_co_uk-travel_restauran
 b. By default, when the execution terminates, a browser window is opened, displaying the
 visualization UI.
 
-
-2. Providing parsed training data. In this case the solution flow skips the parsing step:
+|
+| **2.** Providing parsed training data. In this case the solution flow skips the parsing step:
 
 .. code:: python
 
@@ -117,7 +118,7 @@ visualization UI.
                             parsed_data='/path/to/parsed/directory',
                             ui=False)
 
-Note:
+**Note:**
 
 Setting ``ui=False`` disables the UI and enables to get the sentiment statistics as dataframe. This
 enables the user to use those statistics as input to his own custom built visualization.
diff --git a/doc/source/index.rst b/doc/source/index.rst
@@ -32,7 +32,7 @@
 .. toctree::
    :hidden:
    :maxdepth: 1
-   :caption: NLP/NLU Components
+   :caption: NLP/NLU Models
 
    Aspect Based Sentiment Analysis <absa.rst>
    chunker.rst
@@ -43,20 +43,14 @@
    word_sense.rst
    np2vec.rst
    supervised_sentiment.rst
+   reading_comprehension.rst
+   memn2n.rst
    TCN Language Model <tcn.rst>
    Unsupervised Crosslingual Embeddings <crosslingual_emb.rst>
    Cross Document Co-Reference <cross_doc_coref.rst>
    Semantic Relation Identification  <identifying_semantic_relation.rst>
    Sparse Neural Machine Translation <sparse_gnmt.rst>
 
-.. toctree::
-   :hidden:
-   :maxdepth: 1
-   :caption: End to End Models
-
-   reading_comprehension.rst
-   memn2n.rst
-
 .. toctree::
    :hidden:
    :maxdepth: 1
diff --git a/doc/source/main.rst b/doc/source/main.rst
@@ -42,6 +42,7 @@ The library contains state-of-art and novel NLP and NLU models in a variety of t
 - Reading comprehension
 - Language modeling using Temporal Convolution Network
 - Unsupervised Crosslingual Word Embedding
+- Aspect Based Sentiment Analysis
 - Supervised sentiment analysis
 - Sparse and quantized neural machine translation
 - Relation Identification and cross document coreference
diff --git a/examples/ner/interactive.py b/examples/ner/interactive.py
@@ -82,6 +82,7 @@ def vectorize(doc, w_vocab, c_vocab):
         doc_vec = vectorize(text_arr, word_vocab, char_vocab)
         seq_len = np.array([len(text_arr)]).reshape(-1, 1)
         inputs = list(doc_vec)
+        # pylint: disable=no-member
         if model.crf_mode == 'pad':
             inputs = list(doc_vec) + [seq_len]
         doc_ner = model.predict(inputs, batch_size=1).argmax(2).flatten()
diff --git a/examples/ner/train.py b/examples/ner/train.py
@@ -108,13 +108,13 @@ def validate_input_args(input_args):
     y_train = keras.utils.to_categorical(y_train, num_y_labels)
 
     ner_model = NERCRF(use_cudnn=args.use_cudnn)
+    # pylint: disable=unexpected-keyword-arg
     ner_model.build(args.word_length,
                     num_y_labels,
                     vocabulary_size,
                     char_vocabulary_size,
                     word_embedding_dims=args.word_embedding_dims,
                     char_embedding_dims=args.character_embedding_dims,
-                    word_lstm_dims=args.char_features_lstm_dims,
                     tagger_lstm_dims=args.entity_tagger_lstm_dims,
                     dropout=args.dropout)
 
@@ -126,9 +126,9 @@ def validate_input_args(input_args):
 
     train_inputs = [x_train, x_char_train]
     test_inputs = [x_test, x_char_test]
-    if not args.use_cudnn:
-        train_inputs.append(np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
-        test_inputs.append(np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))
+
+    train_inputs.append(np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
+    test_inputs.append(np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))
 
     conll_cb = ConllCallback(test_inputs, y_test, dataset.y_labels.vocab,
                              batch_size=args.b)
diff --git a/nlp_architect/api/ner_api.py b/nlp_architect/api/ner_api.py
@@ -31,8 +31,8 @@ class NerApi(AbstractApi):
     NER model API
     """
     model_dir = str(LIBRARY_OUT / 'ner-pretrained')
-    pretrained_model = path.join(model_dir, 'model.h5')
-    pretrained_model_info = path.join(model_dir, 'model_info.dat')
+    pretrained_model = path.join(model_dir, 'model_v4.h5')
+    pretrained_model_info = path.join(model_dir, 'model_info_v4.dat')
 
     def __init__(self, prompt=True):
         self.model = None
@@ -71,10 +71,10 @@ def _download_pretrained_model(self, prompt=True):
                     sys.exit(0)
             download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
                                      '/models/ner/',
-                                     'model.h5', self.pretrained_model)
+                                     'model_v4.h5', self.pretrained_model)
             download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
                                      '/models/ner/',
-                                     'model_info.dat', self.pretrained_model_info)
+                                     'model_info_v4.dat', self.pretrained_model_info)
             print('Done.')
 
     def load_model(self):
@@ -129,8 +129,8 @@ def inference(self, doc):
         doc_vec = self.vectorize(text_arr, self.word_vocab, self.char_vocab)
         seq_len = np.array([len(text_arr)]).reshape(-1, 1)
         inputs = list(doc_vec)
-        if self.model.crf_mode == 'pad':
-            inputs = list(doc_vec) + [seq_len]
+        # pylint: disable=no-member
+        inputs = list(doc_vec) + [seq_len]
         doc_ner = self.model.predict(inputs, batch_size=1).argmax(2).flatten()
         tags = [self.y_vocab.get(n, None) for n in doc_ner]
         return self.pretty_print(text_arr, tags)
diff --git a/nlp_architect/contrib/tensorflow/python/keras/layers/crf.py b/nlp_architect/contrib/tensorflow/python/keras/layers/crf.py
@@ -14,10 +14,9 @@
 # limitations under the License.
 # ******************************************************************************
 import tensorflow as tf
-from tensorflow import convert_to_tensor, keras
 
 
-class CRF(keras.layers.Layer):
+class CRF(tf.keras.layers.Layer):
     """
     Conditional Random Field layer (tf.keras)
     `CRF` can be used as the last layer in a network (as a classifier). Input shape (features)
@@ -29,55 +28,36 @@ class CRF(keras.layers.Layer):
 
     Args:
         num_labels (int): the number of labels to tag each temporal input.
-        mode (string, optional): operation mode, 'reg' for regular full sequence learning (all
-            sequences have equal length), or 'pad' for using with supplied sequence lengths (useful
-            for padded sequences)
 
     Input shape:
-        'reg' mode - nD tensor with shape `(batch_size, sentence length, num_classes)`.
-        'pad' mode - tuple of `(batch_size, sentence length, num_classes)`, `(batch_size, 1)`
+        nD tensor with shape `(batch_size, sentence length, num_classes)`.
 
     Output shape:
         nD tensor with shape: `(batch_size, sentence length, num_classes)`.
     """
-    def __init__(self, num_classes, mode='reg', **kwargs):
+
+    def __init__(self, num_classes, **kwargs):
         self.transitions = None
         super(CRF, self).__init__(**kwargs)
         # num of output labels
         self.output_dim = int(num_classes)
-        self.mode = mode
-        if self.mode == 'pad':
-            self.input_spec = [keras.layers.InputSpec(min_ndim=3),
-                               keras.layers.InputSpec(min_ndim=2)]
-        elif self.mode == 'reg':
-            self.input_spec = keras.layers.InputSpec(min_ndim=3)
-        else:
-            raise ValueError
-        self.supports_masking = True
+        self.input_spec = tf.keras.layers.InputSpec(min_ndim=3)
+        self.supports_masking = False
         self.sequence_lengths = None
 
     def get_config(self):
         config = {
             'output_dim': self.output_dim,
-            'mode': self.mode,
             'supports_masking': self.supports_masking,
             'transitions': tf.keras.backend.eval(self.transitions)
         }
         base_config = super(CRF, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def build(self, input_shape):
-        if self.mode == 'pad':
-            assert len(input_shape) == 2
-            assert len(input_shape[0]) == 3
-            assert len(input_shape[1]) == 2
-            f_shape = tf.TensorShape(input_shape[0])
-            input_spec = [keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]}),
-                          keras.layers.InputSpec(min_ndim=2, axes={-1: 1}, dtype=tf.int32)]
-        else:
-            assert len(input_shape) == 3
-            f_shape = tf.TensorShape(input_shape)
-            input_spec = keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})
+        assert len(input_shape) == 3
+        f_shape = tf.TensorShape(input_shape)
+        input_spec = tf.keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})
 
         if f_shape[-1] is None:
             raise ValueError('The last dimension of the inputs to `CRF` '
@@ -92,21 +72,26 @@ def build(self, input_shape):
                                            trainable=True)
         self.built = True
 
-    def call(self, inputs, **kwargs):
-        if self.mode == 'pad':
-            sequences = convert_to_tensor(inputs[0], dtype=self.dtype)
-            self.sequence_lengths = tf.keras.backend.flatten(inputs[-1])
+    # pylint: disable=arguments-differ
+    def call(self, inputs, sequence_lengths=None, **kwargs):
+        sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
+        if sequence_lengths is not None:
+            assert len(sequence_lengths.shape) == 2
+            assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
+            seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
+            assert seq_len_shape[1] == 1
+            self.sequence_lengths = tf.keras.backend.flatten(sequence_lengths)
         else:
-            sequences = convert_to_tensor(inputs, dtype=self.dtype)
-            shape = tf.shape(inputs)
-            self.sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
+            self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * \
+                (tf.shape(inputs)[1])
+
         viterbi_sequence, _ = tf.contrib.crf.crf_decode(sequences, self.transitions,
                                                         self.sequence_lengths)
-        output = keras.backend.one_hot(viterbi_sequence, self.output_dim)
-        return keras.backend.in_train_phase(sequences, output)
+        output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
+        return tf.keras.backend.in_train_phase(sequences, output)
 
     def loss(self, y_true, y_pred):
-        y_pred = convert_to_tensor(y_pred, dtype=self.dtype)
+        y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
         log_likelihood, self.transitions = \
             tf.contrib.crf.crf_log_likelihood(y_pred,
                                               tf.cast(tf.keras.backend.argmax(y_true),
@@ -116,12 +101,8 @@ def loss(self, y_true, y_pred):
         return tf.reduce_mean(-log_likelihood)
 
     def compute_output_shape(self, input_shape):
-        if self.mode == 'pad':
-            data_shape = input_shape[0]
-        else:
-            data_shape = input_shape
-        tf.TensorShape(data_shape).assert_has_rank(3)
-        return data_shape[:2] + (self.output_dim,)
+        tf.TensorShape(input_shape).assert_has_rank(3)
+        return input_shape[:2] + (self.output_dim,)
 
     @property
     def viterbi_accuracy(self):
@@ -130,7 +111,7 @@ def accuracy(y_true, y_pred):
             sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
             viterbi_sequence, _ = tf.contrib.crf.crf_decode(y_pred, self.transitions,
                                                             sequence_lengths)
-            output = keras.backend.one_hot(viterbi_sequence, self.output_dim)
+            output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
             return tf.keras.metrics.categorical_accuracy(y_true, output)
         accuracy.func_name = 'viterbi_accuracy'
         return accuracy
diff --git a/nlp_architect/data/sequential_tagging.py b/nlp_architect/data/sequential_tagging.py
@@ -119,7 +119,6 @@ def _read_file(self, filepath):
         with open(filepath, encoding='utf-8') as fp:
             data = fp.readlines()
             data = [d.strip() for d in data]
-            data = [d for d in data if 'DOCSTART' not in d]
             sentences = self._split_into_sentences(data)
             parsed_sentences = [self._parse_sentence(s) for s in sentences if len(s) > 0]
         return parsed_sentences
diff --git a/nlp_architect/models/chunker.py b/nlp_architect/models/chunker.py
@@ -134,7 +134,7 @@ def build(self,
 
         model = tf.keras.Model(input_src, [pos_out, chunks_out])
         if optimizer is None:
-            self.optimizer = tf.train.AdamOptimizer()
+            self.optimizer = tf.keras.optimizers.Adam(0.001, clipnorm=5.)
         else:
             self.optimizer = optimizer
         model.compile(optimizer=self.optimizer,
diff --git a/nlp_architect/models/ner_crf.py b/nlp_architect/models/ner_crf.py
diff --git a/requirements.txt b/requirements.txt