Skip to content
This repository was archived by the owner on Nov 8, 2022. It is now read-only.

Commit 2acd47b

Browse files
author
Peter Izsak
authored
Fixed NER with TF1.12 (#356)
* Fixed NER with TF1.12
1 parent 1b9d133 commit 2acd47b

File tree

7 files changed

+46
-82
lines changed

7 files changed

+46
-82
lines changed

examples/ner/interactive.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def vectorize(doc, w_vocab, c_vocab):
8282
doc_vec = vectorize(text_arr, word_vocab, char_vocab)
8383
seq_len = np.array([len(text_arr)]).reshape(-1, 1)
8484
inputs = list(doc_vec)
85+
# pylint: disable=no-member
8586
if model.crf_mode == 'pad':
8687
inputs = list(doc_vec) + [seq_len]
8788
doc_ner = model.predict(inputs, batch_size=1).argmax(2).flatten()

examples/ner/train.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,13 @@ def validate_input_args(input_args):
108108
y_train = keras.utils.to_categorical(y_train, num_y_labels)
109109

110110
ner_model = NERCRF(use_cudnn=args.use_cudnn)
111+
# pylint: disable=unexpected-keyword-arg
111112
ner_model.build(args.word_length,
112113
num_y_labels,
113114
vocabulary_size,
114115
char_vocabulary_size,
115116
word_embedding_dims=args.word_embedding_dims,
116117
char_embedding_dims=args.character_embedding_dims,
117-
word_lstm_dims=args.char_features_lstm_dims,
118118
tagger_lstm_dims=args.entity_tagger_lstm_dims,
119119
dropout=args.dropout)
120120

@@ -126,9 +126,9 @@ def validate_input_args(input_args):
126126

127127
train_inputs = [x_train, x_char_train]
128128
test_inputs = [x_test, x_char_test]
129-
if not args.use_cudnn:
130-
train_inputs.append(np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
131-
test_inputs.append(np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))
129+
130+
train_inputs.append(np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
131+
test_inputs.append(np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))
132132

133133
conll_cb = ConllCallback(test_inputs, y_test, dataset.y_labels.vocab,
134134
batch_size=args.b)

nlp_architect/api/ner_api.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ class NerApi(AbstractApi):
3131
NER model API
3232
"""
3333
model_dir = str(LIBRARY_OUT / 'ner-pretrained')
34-
pretrained_model = path.join(model_dir, 'model.h5')
35-
pretrained_model_info = path.join(model_dir, 'model_info.dat')
34+
pretrained_model = path.join(model_dir, 'model_v4.h5')
35+
pretrained_model_info = path.join(model_dir, 'model_info_v4.dat')
3636

3737
def __init__(self, prompt=True):
3838
self.model = None
@@ -71,10 +71,10 @@ def _download_pretrained_model(self, prompt=True):
7171
sys.exit(0)
7272
download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
7373
'/models/ner/',
74-
'model.h5', self.pretrained_model)
74+
'model_v4.h5', self.pretrained_model)
7575
download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
7676
'/models/ner/',
77-
'model_info.dat', self.pretrained_model_info)
77+
'model_info_v4.dat', self.pretrained_model_info)
7878
print('Done.')
7979

8080
def load_model(self):
@@ -129,8 +129,8 @@ def inference(self, doc):
129129
doc_vec = self.vectorize(text_arr, self.word_vocab, self.char_vocab)
130130
seq_len = np.array([len(text_arr)]).reshape(-1, 1)
131131
inputs = list(doc_vec)
132-
if self.model.crf_mode == 'pad':
133-
inputs = list(doc_vec) + [seq_len]
132+
# pylint: disable=no-member
133+
inputs = list(doc_vec) + [seq_len]
134134
doc_ner = self.model.predict(inputs, batch_size=1).argmax(2).flatten()
135135
tags = [self.y_vocab.get(n, None) for n in doc_ner]
136136
return self.pretty_print(text_arr, tags)

nlp_architect/contrib/tensorflow/python/keras/layers/crf.py

Lines changed: 27 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@
1414
# limitations under the License.
1515
# ******************************************************************************
1616
import tensorflow as tf
17-
from tensorflow import convert_to_tensor, keras
1817

1918

20-
class CRF(keras.layers.Layer):
19+
class CRF(tf.keras.layers.Layer):
2120
"""
2221
Conditional Random Field layer (tf.keras)
2322
`CRF` can be used as the last layer in a network (as a classifier). Input shape (features)
@@ -29,55 +28,36 @@ class CRF(keras.layers.Layer):
2928
3029
Args:
3130
num_labels (int): the number of labels to tag each temporal input.
32-
mode (string, optional): operation mode, 'reg' for regular full sequence learning (all
33-
sequences have equal length), or 'pad' for using with supplied sequence lengths (useful
34-
for padded sequences)
3531
3632
Input shape:
37-
'reg' mode - nD tensor with shape `(batch_size, sentence length, num_classes)`.
38-
'pad' mode - tuple of `(batch_size, sentence length, num_classes)`, `(batch_size, 1)`
33+
nD tensor with shape `(batch_size, sentence length, num_classes)`.
3934
4035
Output shape:
4136
nD tensor with shape: `(batch_size, sentence length, num_classes)`.
4237
"""
43-
def __init__(self, num_classes, mode='reg', **kwargs):
38+
39+
def __init__(self, num_classes, **kwargs):
4440
self.transitions = None
4541
super(CRF, self).__init__(**kwargs)
4642
# num of output labels
4743
self.output_dim = int(num_classes)
48-
self.mode = mode
49-
if self.mode == 'pad':
50-
self.input_spec = [keras.layers.InputSpec(min_ndim=3),
51-
keras.layers.InputSpec(min_ndim=2)]
52-
elif self.mode == 'reg':
53-
self.input_spec = keras.layers.InputSpec(min_ndim=3)
54-
else:
55-
raise ValueError
56-
self.supports_masking = True
44+
self.input_spec = tf.keras.layers.InputSpec(min_ndim=3)
45+
self.supports_masking = False
5746
self.sequence_lengths = None
5847

5948
def get_config(self):
6049
config = {
6150
'output_dim': self.output_dim,
62-
'mode': self.mode,
6351
'supports_masking': self.supports_masking,
6452
'transitions': tf.keras.backend.eval(self.transitions)
6553
}
6654
base_config = super(CRF, self).get_config()
6755
return dict(list(base_config.items()) + list(config.items()))
6856

6957
def build(self, input_shape):
70-
if self.mode == 'pad':
71-
assert len(input_shape) == 2
72-
assert len(input_shape[0]) == 3
73-
assert len(input_shape[1]) == 2
74-
f_shape = tf.TensorShape(input_shape[0])
75-
input_spec = [keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]}),
76-
keras.layers.InputSpec(min_ndim=2, axes={-1: 1}, dtype=tf.int32)]
77-
else:
78-
assert len(input_shape) == 3
79-
f_shape = tf.TensorShape(input_shape)
80-
input_spec = keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})
58+
assert len(input_shape) == 3
59+
f_shape = tf.TensorShape(input_shape)
60+
input_spec = tf.keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})
8161

8262
if f_shape[-1] is None:
8363
raise ValueError('The last dimension of the inputs to `CRF` '
@@ -92,21 +72,26 @@ def build(self, input_shape):
9272
trainable=True)
9373
self.built = True
9474

95-
def call(self, inputs, **kwargs):
96-
if self.mode == 'pad':
97-
sequences = convert_to_tensor(inputs[0], dtype=self.dtype)
98-
self.sequence_lengths = tf.keras.backend.flatten(inputs[-1])
75+
# pylint: disable=arguments-differ
76+
def call(self, inputs, sequence_lengths=None, **kwargs):
77+
sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
78+
if sequence_lengths is not None:
79+
assert len(sequence_lengths.shape) == 2
80+
assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
81+
seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
82+
assert seq_len_shape[1] == 1
83+
self.sequence_lengths = tf.keras.backend.flatten(sequence_lengths)
9984
else:
100-
sequences = convert_to_tensor(inputs, dtype=self.dtype)
101-
shape = tf.shape(inputs)
102-
self.sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
85+
self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * \
86+
(tf.shape(inputs)[1])
87+
10388
viterbi_sequence, _ = tf.contrib.crf.crf_decode(sequences, self.transitions,
10489
self.sequence_lengths)
105-
output = keras.backend.one_hot(viterbi_sequence, self.output_dim)
106-
return keras.backend.in_train_phase(sequences, output)
90+
output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
91+
return tf.keras.backend.in_train_phase(sequences, output)
10792

10893
def loss(self, y_true, y_pred):
109-
y_pred = convert_to_tensor(y_pred, dtype=self.dtype)
94+
y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
11095
log_likelihood, self.transitions = \
11196
tf.contrib.crf.crf_log_likelihood(y_pred,
11297
tf.cast(tf.keras.backend.argmax(y_true),
@@ -116,12 +101,8 @@ def loss(self, y_true, y_pred):
116101
return tf.reduce_mean(-log_likelihood)
117102

118103
def compute_output_shape(self, input_shape):
119-
if self.mode == 'pad':
120-
data_shape = input_shape[0]
121-
else:
122-
data_shape = input_shape
123-
tf.TensorShape(data_shape).assert_has_rank(3)
124-
return data_shape[:2] + (self.output_dim,)
104+
tf.TensorShape(input_shape).assert_has_rank(3)
105+
return input_shape[:2] + (self.output_dim,)
125106

126107
@property
127108
def viterbi_accuracy(self):
@@ -130,7 +111,7 @@ def accuracy(y_true, y_pred):
130111
sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
131112
viterbi_sequence, _ = tf.contrib.crf.crf_decode(y_pred, self.transitions,
132113
sequence_lengths)
133-
output = keras.backend.one_hot(viterbi_sequence, self.output_dim)
114+
output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
134115
return tf.keras.metrics.categorical_accuracy(y_true, output)
135116
accuracy.func_name = 'viterbi_accuracy'
136117
return accuracy

nlp_architect/data/sequential_tagging.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ def _read_file(self, filepath):
119119
with open(filepath, encoding='utf-8') as fp:
120120
data = fp.readlines()
121121
data = [d.strip() for d in data]
122-
data = [d for d in data if 'DOCSTART' not in d]
123122
sentences = self._split_into_sentences(data)
124123
parsed_sentences = [self._parse_sentence(s) for s in sentences if len(s) > 0]
125124
return parsed_sentences

nlp_architect/models/chunker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def build(self,
134134

135135
model = tf.keras.Model(input_src, [pos_out, chunks_out])
136136
if optimizer is None:
137-
self.optimizer = tf.train.AdamOptimizer()
137+
self.optimizer = tf.keras.optimizers.Adam(0.001, clipnorm=5.)
138138
else:
139139
self.optimizer = optimizer
140140
model.compile(optimizer=self.optimizer,

nlp_architect/models/ner_crf.py

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,8 @@ def __init__(self, use_cudnn=False):
3838
self.char_vocab_size = None
3939
self.word_embedding_dims = None
4040
self.char_embedding_dims = None
41-
self.word_lstm_dims = None
4241
self.tagger_lstm_dims = None
4342
self.dropout = None
44-
self.crf_mode = None
4543
self.use_cudnn = use_cudnn
4644

4745
def build(self,
@@ -51,10 +49,8 @@ def build(self,
5149
char_vocab_size,
5250
word_embedding_dims=100,
5351
char_embedding_dims=16,
54-
word_lstm_dims=20,
5552
tagger_lstm_dims=200,
56-
dropout=0.5,
57-
crf_mode='pad'):
53+
dropout=0.5):
5854
"""
5955
Build a NERCRF model
6056
@@ -65,24 +61,17 @@ def build(self,
6561
char_vocab_size (int): character vocabulary size
6662
word_embedding_dims (int): word embedding dimensions
6763
char_embedding_dims (int): character embedding dimensions
68-
word_lstm_dims (int): character LSTM feature extractor output dimensions
6964
tagger_lstm_dims (int): word tagger LSTM output dimensions
7065
dropout (float): dropout rate
71-
crf_mode (string): CRF operation mode, select 'pad'/'reg' for supplied sequences in
72-
input or full sequence tagging. ('reg' is forced when use_cudnn=True)
7366
"""
7467
self.word_length = word_length
7568
self.target_label_dims = target_label_dims
7669
self.word_vocab_size = word_vocab_size
7770
self.char_vocab_size = char_vocab_size
7871
self.word_embedding_dims = word_embedding_dims
7972
self.char_embedding_dims = char_embedding_dims
80-
self.word_lstm_dims = word_lstm_dims
8173
self.tagger_lstm_dims = tagger_lstm_dims
8274
self.dropout = dropout
83-
self.crf_mode = crf_mode
84-
85-
assert crf_mode in ('pad', 'reg'), 'crf_mode is invalid'
8675

8776
# build word input
8877
words_input = tf.keras.layers.Input(shape=(None,), name='words_input')
@@ -117,23 +106,17 @@ def build(self,
117106

118107
inputs = [words_input, word_chars_input]
119108

120-
if self.use_cudnn:
121-
self.crf_mode = 'reg'
122-
with tf.device('/cpu:0'):
123-
crf = CRF(self.target_label_dims, mode=self.crf_mode, name='ner_crf')
124-
if self.crf_mode == 'pad':
125-
sequence_lengths = tf.keras.layers.Input(batch_shape=(None, 1), dtype='int32')
126-
predictions = crf([bilstm, sequence_lengths])
127-
inputs.append(sequence_lengths)
128-
else:
129-
predictions = crf(bilstm)
109+
sequence_lengths = tf.keras.layers.Input(shape=(1,), dtype='int32', name='seq_lens')
110+
inputs.append(sequence_lengths)
111+
crf = CRF(self.target_label_dims, name='ner_crf')
112+
predictions = crf(inputs=bilstm, sequence_lengths=sequence_lengths)
130113

131114
# compile the model
132115
model = tf.keras.Model(inputs=inputs,
133116
outputs=predictions)
134117
model.compile(loss={'ner_crf': crf.loss},
135-
optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.),
136-
metrics=[crf.viterbi_accuracy])
118+
optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.))
119+
137120
self.model = model
138121

139122
def _rnn_cell(self, units, **kwargs):

0 commit comments

Comments
 (0)