Fixed NER with TF1.12 (#356)

Peter Izsak · web-flow · commit 2acd47b829d7 · 2019-04-02T18:27:11.000+03:00
* Fixed NER with TF1.12
diff --git a/examples/ner/interactive.py b/examples/ner/interactive.py
@@ -82,6 +82,7 @@ def vectorize(doc, w_vocab, c_vocab):
         doc_vec = vectorize(text_arr, word_vocab, char_vocab)
         seq_len = np.array([len(text_arr)]).reshape(-1, 1)
         inputs = list(doc_vec)
+        # pylint: disable=no-member
         if model.crf_mode == 'pad':
             inputs = list(doc_vec) + [seq_len]
         doc_ner = model.predict(inputs, batch_size=1).argmax(2).flatten()
diff --git a/examples/ner/train.py b/examples/ner/train.py
@@ -108,13 +108,13 @@ def validate_input_args(input_args):
     y_train = keras.utils.to_categorical(y_train, num_y_labels)
 
     ner_model = NERCRF(use_cudnn=args.use_cudnn)
+    # pylint: disable=unexpected-keyword-arg
     ner_model.build(args.word_length,
                     num_y_labels,
                     vocabulary_size,
                     char_vocabulary_size,
                     word_embedding_dims=args.word_embedding_dims,
                     char_embedding_dims=args.character_embedding_dims,
-                    word_lstm_dims=args.char_features_lstm_dims,
                     tagger_lstm_dims=args.entity_tagger_lstm_dims,
                     dropout=args.dropout)
 
@@ -126,9 +126,9 @@ def validate_input_args(input_args):
 
     train_inputs = [x_train, x_char_train]
     test_inputs = [x_test, x_char_test]
-    if not args.use_cudnn:
-        train_inputs.append(np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
-        test_inputs.append(np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))
+
+    train_inputs.append(np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
+    test_inputs.append(np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))
 
     conll_cb = ConllCallback(test_inputs, y_test, dataset.y_labels.vocab,
                              batch_size=args.b)
diff --git a/nlp_architect/api/ner_api.py b/nlp_architect/api/ner_api.py
@@ -31,8 +31,8 @@ class NerApi(AbstractApi):
     NER model API
     """
     model_dir = str(LIBRARY_OUT / 'ner-pretrained')
-    pretrained_model = path.join(model_dir, 'model.h5')
-    pretrained_model_info = path.join(model_dir, 'model_info.dat')
+    pretrained_model = path.join(model_dir, 'model_v4.h5')
+    pretrained_model_info = path.join(model_dir, 'model_info_v4.dat')
 
     def __init__(self, prompt=True):
         self.model = None
@@ -71,10 +71,10 @@ def _download_pretrained_model(self, prompt=True):
                     sys.exit(0)
             download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
                                      '/models/ner/',
-                                     'model.h5', self.pretrained_model)
+                                     'model_v4.h5', self.pretrained_model)
             download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
                                      '/models/ner/',
-                                     'model_info.dat', self.pretrained_model_info)
+                                     'model_info_v4.dat', self.pretrained_model_info)
             print('Done.')
 
     def load_model(self):
@@ -129,8 +129,8 @@ def inference(self, doc):
         doc_vec = self.vectorize(text_arr, self.word_vocab, self.char_vocab)
         seq_len = np.array([len(text_arr)]).reshape(-1, 1)
         inputs = list(doc_vec)
-        if self.model.crf_mode == 'pad':
-            inputs = list(doc_vec) + [seq_len]
+        # pylint: disable=no-member
+        inputs = list(doc_vec) + [seq_len]
         doc_ner = self.model.predict(inputs, batch_size=1).argmax(2).flatten()
         tags = [self.y_vocab.get(n, None) for n in doc_ner]
         return self.pretty_print(text_arr, tags)
diff --git a/nlp_architect/contrib/tensorflow/python/keras/layers/crf.py b/nlp_architect/contrib/tensorflow/python/keras/layers/crf.py
@@ -14,10 +14,9 @@
 # limitations under the License.
 # ******************************************************************************
 import tensorflow as tf
-from tensorflow import convert_to_tensor, keras
 
 
-class CRF(keras.layers.Layer):
+class CRF(tf.keras.layers.Layer):
     """
     Conditional Random Field layer (tf.keras)
     `CRF` can be used as the last layer in a network (as a classifier). Input shape (features)
@@ -29,55 +28,36 @@ class CRF(keras.layers.Layer):
 
     Args:
         num_labels (int): the number of labels to tag each temporal input.
-        mode (string, optional): operation mode, 'reg' for regular full sequence learning (all
-            sequences have equal length), or 'pad' for using with supplied sequence lengths (useful
-            for padded sequences)
 
     Input shape:
-        'reg' mode - nD tensor with shape `(batch_size, sentence length, num_classes)`.
-        'pad' mode - tuple of `(batch_size, sentence length, num_classes)`, `(batch_size, 1)`
+        nD tensor with shape `(batch_size, sentence length, num_classes)`.
 
     Output shape:
         nD tensor with shape: `(batch_size, sentence length, num_classes)`.
     """
-    def __init__(self, num_classes, mode='reg', **kwargs):
+
+    def __init__(self, num_classes, **kwargs):
         self.transitions = None
         super(CRF, self).__init__(**kwargs)
         # num of output labels
         self.output_dim = int(num_classes)
-        self.mode = mode
-        if self.mode == 'pad':
-            self.input_spec = [keras.layers.InputSpec(min_ndim=3),
-                               keras.layers.InputSpec(min_ndim=2)]
-        elif self.mode == 'reg':
-            self.input_spec = keras.layers.InputSpec(min_ndim=3)
-        else:
-            raise ValueError
-        self.supports_masking = True
+        self.input_spec = tf.keras.layers.InputSpec(min_ndim=3)
+        self.supports_masking = False
         self.sequence_lengths = None
 
     def get_config(self):
         config = {
             'output_dim': self.output_dim,
-            'mode': self.mode,
             'supports_masking': self.supports_masking,
             'transitions': tf.keras.backend.eval(self.transitions)
         }
         base_config = super(CRF, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def build(self, input_shape):
-        if self.mode == 'pad':
-            assert len(input_shape) == 2
-            assert len(input_shape[0]) == 3
-            assert len(input_shape[1]) == 2
-            f_shape = tf.TensorShape(input_shape[0])
-            input_spec = [keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]}),
-                          keras.layers.InputSpec(min_ndim=2, axes={-1: 1}, dtype=tf.int32)]
-        else:
-            assert len(input_shape) == 3
-            f_shape = tf.TensorShape(input_shape)
-            input_spec = keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})
+        assert len(input_shape) == 3
+        f_shape = tf.TensorShape(input_shape)
+        input_spec = tf.keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})
 
         if f_shape[-1] is None:
             raise ValueError('The last dimension of the inputs to `CRF` '
@@ -92,21 +72,26 @@ def build(self, input_shape):
                                            trainable=True)
         self.built = True
 
-    def call(self, inputs, **kwargs):
-        if self.mode == 'pad':
-            sequences = convert_to_tensor(inputs[0], dtype=self.dtype)
-            self.sequence_lengths = tf.keras.backend.flatten(inputs[-1])
+    # pylint: disable=arguments-differ
+    def call(self, inputs, sequence_lengths=None, **kwargs):
+        sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
+        if sequence_lengths is not None:
+            assert len(sequence_lengths.shape) == 2
+            assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
+            seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
+            assert seq_len_shape[1] == 1
+            self.sequence_lengths = tf.keras.backend.flatten(sequence_lengths)
         else:
-            sequences = convert_to_tensor(inputs, dtype=self.dtype)
-            shape = tf.shape(inputs)
-            self.sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
+            self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * \
+                (tf.shape(inputs)[1])
+
         viterbi_sequence, _ = tf.contrib.crf.crf_decode(sequences, self.transitions,
                                                         self.sequence_lengths)
-        output = keras.backend.one_hot(viterbi_sequence, self.output_dim)
-        return keras.backend.in_train_phase(sequences, output)
+        output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
+        return tf.keras.backend.in_train_phase(sequences, output)
 
     def loss(self, y_true, y_pred):
-        y_pred = convert_to_tensor(y_pred, dtype=self.dtype)
+        y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
         log_likelihood, self.transitions = \
             tf.contrib.crf.crf_log_likelihood(y_pred,
                                               tf.cast(tf.keras.backend.argmax(y_true),
@@ -116,12 +101,8 @@ def loss(self, y_true, y_pred):
         return tf.reduce_mean(-log_likelihood)
 
     def compute_output_shape(self, input_shape):
-        if self.mode == 'pad':
-            data_shape = input_shape[0]
-        else:
-            data_shape = input_shape
-        tf.TensorShape(data_shape).assert_has_rank(3)
-        return data_shape[:2] + (self.output_dim,)
+        tf.TensorShape(input_shape).assert_has_rank(3)
+        return input_shape[:2] + (self.output_dim,)
 
     @property
     def viterbi_accuracy(self):
@@ -130,7 +111,7 @@ def accuracy(y_true, y_pred):
             sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
             viterbi_sequence, _ = tf.contrib.crf.crf_decode(y_pred, self.transitions,
                                                             sequence_lengths)
-            output = keras.backend.one_hot(viterbi_sequence, self.output_dim)
+            output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
             return tf.keras.metrics.categorical_accuracy(y_true, output)
         accuracy.func_name = 'viterbi_accuracy'
         return accuracy
diff --git a/nlp_architect/data/sequential_tagging.py b/nlp_architect/data/sequential_tagging.py
@@ -119,7 +119,6 @@ def _read_file(self, filepath):
         with open(filepath, encoding='utf-8') as fp:
             data = fp.readlines()
             data = [d.strip() for d in data]
-            data = [d for d in data if 'DOCSTART' not in d]
             sentences = self._split_into_sentences(data)
             parsed_sentences = [self._parse_sentence(s) for s in sentences if len(s) > 0]
         return parsed_sentences
diff --git a/nlp_architect/models/chunker.py b/nlp_architect/models/chunker.py
@@ -134,7 +134,7 @@ def build(self,
 
         model = tf.keras.Model(input_src, [pos_out, chunks_out])
         if optimizer is None:
-            self.optimizer = tf.train.AdamOptimizer()
+            self.optimizer = tf.keras.optimizers.Adam(0.001, clipnorm=5.)
         else:
             self.optimizer = optimizer
         model.compile(optimizer=self.optimizer,
diff --git a/nlp_architect/models/ner_crf.py b/nlp_architect/models/ner_crf.py
@@ -38,10 +38,8 @@ def __init__(self, use_cudnn=False):
         self.char_vocab_size = None
         self.word_embedding_dims = None
         self.char_embedding_dims = None
-        self.word_lstm_dims = None
         self.tagger_lstm_dims = None
         self.dropout = None
-        self.crf_mode = None
         self.use_cudnn = use_cudnn
 
     def build(self,
@@ -51,10 +49,8 @@ def build(self,
               char_vocab_size,
               word_embedding_dims=100,
               char_embedding_dims=16,
-              word_lstm_dims=20,
               tagger_lstm_dims=200,
-              dropout=0.5,
-              crf_mode='pad'):
+              dropout=0.5):
         """
         Build a NERCRF model
 
@@ -65,24 +61,17 @@ def build(self,
             char_vocab_size (int): character vocabulary size
             word_embedding_dims (int): word embedding dimensions
             char_embedding_dims (int): character embedding dimensions
-            word_lstm_dims (int): character LSTM feature extractor output dimensions
             tagger_lstm_dims (int): word tagger LSTM output dimensions
             dropout (float): dropout rate
-            crf_mode (string): CRF operation mode, select 'pad'/'reg' for supplied sequences in
-                input or full sequence tagging. ('reg' is forced when use_cudnn=True)
         """
         self.word_length = word_length
         self.target_label_dims = target_label_dims
         self.word_vocab_size = word_vocab_size
         self.char_vocab_size = char_vocab_size
         self.word_embedding_dims = word_embedding_dims
         self.char_embedding_dims = char_embedding_dims
-        self.word_lstm_dims = word_lstm_dims
         self.tagger_lstm_dims = tagger_lstm_dims
         self.dropout = dropout
-        self.crf_mode = crf_mode
-
-        assert crf_mode in ('pad', 'reg'), 'crf_mode is invalid'
 
         # build word input
         words_input = tf.keras.layers.Input(shape=(None,), name='words_input')
@@ -117,23 +106,17 @@ def build(self,
 
         inputs = [words_input, word_chars_input]
 
-        if self.use_cudnn:
-            self.crf_mode = 'reg'
-        with tf.device('/cpu:0'):
-            crf = CRF(self.target_label_dims, mode=self.crf_mode, name='ner_crf')
-            if self.crf_mode == 'pad':
-                sequence_lengths = tf.keras.layers.Input(batch_shape=(None, 1), dtype='int32')
-                predictions = crf([bilstm, sequence_lengths])
-                inputs.append(sequence_lengths)
-            else:
-                predictions = crf(bilstm)
+        sequence_lengths = tf.keras.layers.Input(shape=(1,), dtype='int32', name='seq_lens')
+        inputs.append(sequence_lengths)
+        crf = CRF(self.target_label_dims, name='ner_crf')
+        predictions = crf(inputs=bilstm, sequence_lengths=sequence_lengths)
 
         # compile the model
         model = tf.keras.Model(inputs=inputs,
                                outputs=predictions)
         model.compile(loss={'ner_crf': crf.loss},
-                      optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.),
-                      metrics=[crf.viterbi_accuracy])
+                      optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.))
+
         self.model = model
 
     def _rnn_cell(self, units, **kwargs):