Skip to content
This repository was archived by the owner on Nov 8, 2022. It is now read-only.

Commit 4d8cca4

Browse files
authored
0.4 SDL patch and update to global library variables (#369)
* Changed global library variable type to Path-like; Added missing with statement when opening resources * fixed library path and style issues * converted `Path` to str in `bist_parser.py`
1 parent 9c12c76 commit 4d8cca4

File tree

23 files changed

+146
-173
lines changed

23 files changed

+146
-173
lines changed

examples/cross_doc_coref/cross_doc_coref_sieves.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,17 @@
3737

3838
def run_example(cdc_settings):
3939
event_mentions_topics = Topics()
40-
event_mentions_topics.create_from_file(LIBRARY_ROOT
41-
+ '/datasets/ecb/ecb_all_event_mentions.json')
40+
event_mentions_topics.create_from_file(str(LIBRARY_ROOT / 'datasets' / 'ecb'
41+
/ 'ecb_all_event_mentions.json'))
4242

4343
event_clusters = None
4444
if cdc_settings.event_config.run_evaluation:
4545
logger.info('Running event coreference resolution')
4646
event_clusters = run_event_coref(event_mentions_topics, cdc_settings)
4747

4848
entity_mentions_topics = Topics()
49-
entity_mentions_topics.create_from_file(LIBRARY_ROOT
50-
+ '/datasets/ecb/ecb_all_entity_mentions.json')
49+
entity_mentions_topics.create_from_file(str(LIBRARY_ROOT / 'datasets' / 'ecb'
50+
/ 'ecb_all_entity_mentions.json'))
5151
entity_clusters = None
5252
if cdc_settings.entity_config.run_evaluation:
5353
logger.info('Running entity coreference resolution')

examples/cross_doc_coref/relation_extraction_example.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@
3737
def run_example():
3838
logger.info('Running relation extraction example......')
3939
computed = ComputedRelationExtraction()
40-
ref_dict = ReferentDictRelationExtraction(ref_dict=LIBRARY_ROOT + '/datasets/coref.dict1.tsv')
40+
ref_dict = ReferentDictRelationExtraction(ref_dict=str(LIBRARY_ROOT / 'datasets'
41+
/ 'coref.dict1.tsv'))
4142
vo = VerboceanRelationExtraction(
42-
vo_file=LIBRARY_ROOT + '/datasets/verbocean.unrefined.2004-05-20.txt')
43+
vo_file=str(LIBRARY_ROOT / 'datasets' / 'verbocean.unrefined.2004-05-20.txt'))
4344
wiki = WikipediaRelationExtraction()
4445
wn = WordnetRelationExtraction()
4546
embed = WordEmbeddingRelationExtraction(method=EmbeddingMethod.ELMO)

examples/sparse_gnmt/inference.py

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -132,37 +132,35 @@ def inference(ckpt_path,
132132
if hparams.quantize_ckpt or hparams.from_quantized_ckpt:
133133
model_helper.add_quatization_variables(infer_model)
134134

135-
sess = tf.Session(
136-
graph=infer_model.graph, config=utils.get_config_proto())
137-
with infer_model.graph.as_default():
138-
load_fn = model_helper.load_model if not hparams.from_quantized_ckpt \
139-
else model_helper.load_quantized_model
140-
if hparams.quantize_ckpt:
141-
load_fn(infer_model.model, ckpt_path, sess, 'infer')
142-
load_fn = model_helper.load_quantized_model
143-
ckpt_path = os.path.join(hparams.out_dir, 'quant_' + os.path.basename(ckpt_path))
144-
model_helper.quantize_checkpoint(sess, ckpt_path)
145-
loaded_infer_model = load_fn(infer_model.model, ckpt_path, sess, 'infer')
146-
147-
if num_workers == 1:
148-
single_worker_inference(
149-
sess,
150-
infer_model,
151-
loaded_infer_model,
152-
inference_input_file,
153-
inference_output_file,
154-
hparams)
155-
else:
156-
multi_worker_inference(
157-
sess,
158-
infer_model,
159-
loaded_infer_model,
160-
inference_input_file,
161-
inference_output_file,
162-
hparams,
163-
num_workers=num_workers,
164-
jobid=jobid)
165-
sess.close()
135+
with tf.Session(graph=infer_model.graph, config=utils.get_config_proto()) as sess:
136+
with infer_model.graph.as_default():
137+
load_fn = model_helper.load_model if not hparams.from_quantized_ckpt \
138+
else model_helper.load_quantized_model
139+
if hparams.quantize_ckpt:
140+
load_fn(infer_model.model, ckpt_path, sess, 'infer')
141+
load_fn = model_helper.load_quantized_model
142+
ckpt_path = os.path.join(hparams.out_dir, 'quant_' + os.path.basename(ckpt_path))
143+
model_helper.quantize_checkpoint(sess, ckpt_path)
144+
loaded_infer_model = load_fn(infer_model.model, ckpt_path, sess, 'infer')
145+
146+
if num_workers == 1:
147+
single_worker_inference(
148+
sess,
149+
infer_model,
150+
loaded_infer_model,
151+
inference_input_file,
152+
inference_output_file,
153+
hparams)
154+
else:
155+
multi_worker_inference(
156+
sess,
157+
infer_model,
158+
loaded_infer_model,
159+
inference_input_file,
160+
inference_output_file,
161+
hparams,
162+
num_workers=num_workers,
163+
jobid=jobid)
166164

167165

168166
def single_worker_inference(sess,

examples/sparse_gnmt/train.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -645,14 +645,13 @@ def train(hparams, scope=None, target_session=""):
645645

646646
if avg_ckpts:
647647
best_model_dir = getattr(hparams, "avg_best_" + metric + "_dir")
648-
summary_writer = tf.summary.FileWriter(
649-
os.path.join(best_model_dir, summary_name), infer_model.graph)
650-
result_summary, best_global_step, _ = run_full_eval(
651-
best_model_dir, infer_model, infer_sess, eval_model, eval_sess,
652-
hparams, summary_writer, sample_src_data, sample_tgt_data)
653-
print_step_info("# Averaged Best %s, " % metric, best_global_step, info,
654-
result_summary, log_f)
655-
summary_writer.close()
648+
with tf.summary.FileWriter(os.path.join(best_model_dir, summary_name),
649+
infer_model.graph) as summary_writer:
650+
result_summary, best_global_step, _ = run_full_eval(
651+
best_model_dir, infer_model, infer_sess, eval_model, eval_sess,
652+
hparams, summary_writer, sample_src_data, sample_tgt_data)
653+
print_step_info("# Averaged Best %s, " % metric, best_global_step, info,
654+
result_summary, log_f)
656655

657656
return final_eval_metrics, global_step
658657

nlp_architect/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
# limitations under the License.
1515
# ******************************************************************************
1616
from os import path
17+
from pathlib import Path
1718

18-
LIBRARY_PATH = path.dirname(path.realpath(__file__))
19-
LIBRARY_ROOT = path.dirname(LIBRARY_PATH)
19+
LIBRARY_PATH = Path(path.realpath(__file__)).parent
20+
LIBRARY_ROOT = LIBRARY_PATH.parent
21+
LIBRARY_OUT = Path(Path.home()) / 'nlp-architect' / 'cache'
22+
LIBRARY_DATASETS = LIBRARY_ROOT / 'datasets'

nlp_architect/api/intent_extraction_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919

2020
from nlp_architect.api.abstract_api import AbstractApi
2121
from nlp_architect.models.intent_extraction import MultiTaskIntentModel, Seq2SeqIntentModel
22-
from nlp_architect.utils import LIBRARY_STORAGE_PATH
22+
from nlp_architect import LIBRARY_OUT
2323
from nlp_architect.utils.generic import pad_sentences
2424
from nlp_architect.utils.io import download_unlicensed_file
2525
from nlp_architect.utils.text import SpacyInstance, bio_to_spans
2626

2727

2828
class IntentExtractionApi(AbstractApi):
29-
model_dir = path.join(LIBRARY_STORAGE_PATH, 'intent-pretrained')
29+
model_dir = str(LIBRARY_OUT / 'intent-pretrained')
3030
pretrained_model_info = path.join(model_dir, 'model_info.dat')
3131
pretrained_model = path.join(model_dir, 'model.h5')
3232

nlp_architect/api/machine_comprehension_api.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
from nlp_architect.api.abstract_api import AbstractApi
3030
from nlp_architect.models.matchlstm_ansptr import MatchLSTMAnswerPointer
31-
from nlp_architect.utils import LIBRARY_STORAGE_PATH
31+
from nlp_architect import LIBRARY_OUT
3232
from nlp_architect.utils.generic import license_prompt
3333
from nlp_architect.utils.io import download_unlicensed_file
3434
from nlp_architect.utils.mrc_utils import (
@@ -39,7 +39,7 @@ class MachineComprehensionApi(AbstractApi):
3939
"""
4040
Machine Comprehension API
4141
"""
42-
dir = os.path.join(LIBRARY_STORAGE_PATH, 'mrc-pretrained')
42+
dir = str(LIBRARY_OUT / 'mrc-pretrained')
4343
data_path = os.path.join(dir, 'mrc_data', 'data')
4444
data_dir = os.path.join(dir, 'mrc_data')
4545
model_dir = os.path.join(dir, 'mrc_trained_model')
@@ -97,12 +97,10 @@ def download_model(self):
9797
download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
9898
'/models/mrc/',
9999
'mrc_model.zip', model_zipfile)
100-
data_zip_ref = zipfile.ZipFile(data_zipfile, 'r')
101-
data_zip_ref.extractall(self.data_dir)
102-
data_zip_ref.close()
103-
model_zip_ref = zipfile.ZipFile(model_zipfile, 'r')
104-
model_zip_ref.extractall(self.model_dir)
105-
model_zip_ref.close()
100+
with zipfile.ZipFile(data_zipfile) as data_zip_ref:
101+
data_zip_ref.extractall(self.data_dir)
102+
with zipfile.ZipFile(model_zipfile) as model_zip_ref:
103+
model_zip_ref.extractall(self.model_dir)
106104

107105
def load_model(self):
108106
select_device = 'GPU'

nlp_architect/api/ner_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from nlp_architect.api.abstract_api import AbstractApi
2222
from nlp_architect.models.ner_crf import NERCRF
23-
from nlp_architect.utils import LIBRARY_STORAGE_PATH
23+
from nlp_architect import LIBRARY_OUT
2424
from nlp_architect.utils.generic import pad_sentences
2525
from nlp_architect.utils.io import download_unlicensed_file
2626
from nlp_architect.utils.text import SpacyInstance, bio_to_spans
@@ -30,7 +30,7 @@ class NerApi(AbstractApi):
3030
"""
3131
NER model API
3232
"""
33-
model_dir = path.join(LIBRARY_STORAGE_PATH, 'ner-pretrained')
33+
model_dir = str(LIBRARY_OUT / 'ner-pretrained')
3434
pretrained_model = path.join(model_dir, 'model.h5')
3535
pretrained_model_info = path.join(model_dir, 'model_info.dat')
3636

nlp_architect/cmd.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222

2323
import pytest
2424

25-
from nlp_architect import LIBRARY_ROOT, LIBRARY_PATH
26-
from nlp_architect.utils import LIBRARY_STORAGE_PATH, ansi2html
25+
from nlp_architect import LIBRARY_ROOT, LIBRARY_PATH, LIBRARY_OUT
26+
from nlp_architect.utils import ansi2html
2727
from nlp_architect.version import NLP_ARCHITECT_VERSION
2828

2929

@@ -33,7 +33,7 @@ def run_cmd(command):
3333

3434
class DocsCommand(object):
3535
cmd_name = 'doc'
36-
docs_source = os.path.join(LIBRARY_ROOT, 'doc')
36+
docs_source = LIBRARY_ROOT / 'doc'
3737

3838
def __init__(self, subparsers):
3939
parser = subparsers.add_parser(DocsCommand.cmd_name,
@@ -48,8 +48,7 @@ def run_docs(_):
4848
print('Re-building documentation')
4949
run_cmd(base_cmd + ' clean')
5050
run_cmd(base_cmd + ' html')
51-
print('Documentation built in: {}'.format(os.path.join(DocsCommand.docs_source,
52-
'build', 'html')))
51+
print('Documentation built in: {}'.format(DocsCommand.docs_source / 'build' / 'html'))
5352
print('To view documents point your browser to: http://localhost:8000')
5453

5554
class HTTPHandler(SimpleHTTPRequestHandler):
@@ -64,7 +63,7 @@ def __init__(self, base_path, server_address, request_handler_class=HTTPHandler)
6463
self.base_path = base_path
6564
BaseHTTPServer.__init__(self, server_address, request_handler_class)
6665

67-
web_dir = os.path.join(os.path.join(DocsCommand.docs_source, 'build', 'html'))
66+
web_dir = DocsCommand.docs_source / 'build' / 'html'
6867
httpd = HTTPServer(web_dir, ("", 8000))
6968
httpd.serve_forever()
7069

@@ -76,7 +75,7 @@ class StyleCommand(object):
7675
'nlp_architect',
7776
'tests'
7877
]
79-
files_to_check = [os.path.join(LIBRARY_ROOT, f) for f in check_dirs]
78+
files_to_check = [str(LIBRARY_ROOT / f) for f in check_dirs]
8079

8180
def __init__(self, subparsers):
8281
parser = subparsers.add_parser(StyleCommand.cmd_name,
@@ -109,11 +108,11 @@ def run_check(args):
109108
@staticmethod
110109
def run_flake():
111110
print('Running flake8 ...\n')
112-
flake8_config = os.path.join(LIBRARY_ROOT, 'setup.cfg')
113-
os.makedirs(LIBRARY_STORAGE_PATH, exist_ok=True)
114-
flake8_out = os.path.join(LIBRARY_STORAGE_PATH, 'flake8.txt')
115-
flake8_html_out = os.path.join(LIBRARY_STORAGE_PATH, 'flake_html')
116-
111+
flake8_config = str(LIBRARY_ROOT / 'setup.cfg')
112+
os.makedirs(LIBRARY_OUT, exist_ok=True)
113+
flake8_out = str(LIBRARY_OUT / 'flake8.txt')
114+
flake8_html_out = str(LIBRARY_OUT / 'flake_html')
115+
print('HHH:' + flake8_html_out)
117116
try:
118117
os.remove(flake8_out)
119118
shutil.rmtree(flake8_html_out, ignore_errors=True)
@@ -135,10 +134,10 @@ def run_flake():
135134
@staticmethod
136135
def run_pylint():
137136
print('Running pylint ...\n')
138-
pylint_config = os.path.join(LIBRARY_ROOT, 'pylintrc')
139-
os.makedirs(LIBRARY_STORAGE_PATH, exist_ok=True)
140-
pylint_out = os.path.join(LIBRARY_STORAGE_PATH, 'pylint.txt')
141-
html_out = os.path.join(LIBRARY_STORAGE_PATH, 'pylint.html')
137+
pylint_config = LIBRARY_ROOT / 'pylintrc'
138+
os.makedirs(LIBRARY_OUT, exist_ok=True)
139+
pylint_out = str(LIBRARY_OUT / 'pylint.txt')
140+
html_out = str(LIBRARY_OUT / 'pylint.html')
142141

143142
cmd = 'pylint -j 4 {} --rcfile {} --score=n'\
144143
.format(' '.join(StyleCommand.files_to_check), pylint_config)
@@ -167,7 +166,7 @@ def __init__(self, subparsers):
167166
def run_tests(args):
168167
# run all tests
169168
print('\nrunning NLP Architect tests ...')
170-
tests_dir = os.path.join(LIBRARY_ROOT, 'tests')
169+
tests_dir = str(LIBRARY_ROOT / 'tests')
171170
tests = None
172171
if args.f:
173172
specific_test_file = args.f
@@ -218,7 +217,7 @@ def __init__(self, subparsers):
218217
@staticmethod
219218
def run_server(args):
220219
port = args.port
221-
serve_file = os.path.join(LIBRARY_PATH, 'server', 'serve.py')
220+
serve_file = LIBRARY_PATH / 'server' / 'serve.py'
222221
cmd_str = 'hug -p {} -f {}'.format(port, serve_file)
223222
run_cmd(cmd_str)
224223

nlp_architect/data/cdc_resources/embedding/embed_glove.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,15 @@ def __init__(self, glove_file):
3434
def load_glove_for_vocab(glove_filename):
3535
vocab = []
3636
embd = []
37-
with open(glove_filename, 'r') as glove_file:
38-
for line in glove_file.readlines():
37+
with open(glove_filename) as glove_file:
38+
for line in glove_file:
3939
row = line.strip().split(' ')
4040
word = row[0]
4141
vocab.append(word)
4242
embd.append(row[1:])
4343

4444
embeddings = np.asarray(embd, dtype=float)
45-
word_to_ix = {}
46-
for word in vocab:
47-
word_to_ix[word] = len(word_to_ix)
48-
45+
word_to_ix = {word: i for i, word in enumerate(vocab)}
4946
return word_to_ix, embeddings
5047

5148

0 commit comments

Comments
 (0)