IntelLabs
diff --git a/‎examples/cross_doc_coref/cross_doc_coref_sieves.py
Lines changed: 4 additions & 4 deletions b/‎examples/cross_doc_coref/cross_doc_coref_sieves.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/cross_doc_coref/relation_extraction_example.py
Lines changed: 3 additions & 2 deletions b/‎examples/cross_doc_coref/relation_extraction_example.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎examples/sparse_gnmt/inference.py
Lines changed: 29 additions & 31 deletions b/‎examples/sparse_gnmt/inference.py
Lines changed: 29 additions & 31 deletions
diff --git a/‎examples/sparse_gnmt/train.py
Lines changed: 7 additions & 8 deletions b/‎examples/sparse_gnmt/train.py
Lines changed: 7 additions & 8 deletions
diff --git a/‎nlp_architect/__init__.py
Lines changed: 5 additions & 2 deletions b/‎nlp_architect/__init__.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎nlp_architect/api/intent_extraction_api.py
Lines changed: 2 additions & 2 deletions b/‎nlp_architect/api/intent_extraction_api.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎nlp_architect/api/machine_comprehension_api.py
Lines changed: 6 additions & 8 deletions b/‎nlp_architect/api/machine_comprehension_api.py
Lines changed: 6 additions & 8 deletions
diff --git a/‎nlp_architect/api/ner_api.py
Lines changed: 2 additions & 2 deletions b/‎nlp_architect/api/ner_api.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎nlp_architect/cmd.py
Lines changed: 17 additions & 18 deletions b/‎nlp_architect/cmd.py
Lines changed: 17 additions & 18 deletions
diff --git a/‎nlp_architect/data/cdc_resources/embedding/embed_glove.py
Lines changed: 3 additions & 6 deletions b/‎nlp_architect/data/cdc_resources/embedding/embed_glove.py
Lines changed: 3 additions & 6 deletions
@@ -37,17 +37,17 @@
 
 def run_example(cdc_settings):
     event_mentions_topics = Topics()
-    event_mentions_topics.create_from_file(LIBRARY_ROOT
-                                           + '/datasets/ecb/ecb_all_event_mentions.json')
+    event_mentions_topics.create_from_file(str(LIBRARY_ROOT / 'datasets' / 'ecb'
+                                               / 'ecb_all_event_mentions.json'))
 
     event_clusters = None
     if cdc_settings.event_config.run_evaluation:
         logger.info('Running event coreference resolution')
         event_clusters = run_event_coref(event_mentions_topics, cdc_settings)
 
     entity_mentions_topics = Topics()
-    entity_mentions_topics.create_from_file(LIBRARY_ROOT
-                                            + '/datasets/ecb/ecb_all_entity_mentions.json')
+    entity_mentions_topics.create_from_file(str(LIBRARY_ROOT / 'datasets' / 'ecb'
+                                                / 'ecb_all_entity_mentions.json'))
     entity_clusters = None
     if cdc_settings.entity_config.run_evaluation:
         logger.info('Running entity coreference resolution')
 
@@ -37,9 +37,10 @@
 def run_example():
     logger.info('Running relation extraction example......')
     computed = ComputedRelationExtraction()
-    ref_dict = ReferentDictRelationExtraction(ref_dict=LIBRARY_ROOT + '/datasets/coref.dict1.tsv')
+    ref_dict = ReferentDictRelationExtraction(ref_dict=str(LIBRARY_ROOT / 'datasets'
+                                                           / 'coref.dict1.tsv'))
     vo = VerboceanRelationExtraction(
-        vo_file=LIBRARY_ROOT + '/datasets/verbocean.unrefined.2004-05-20.txt')
+        vo_file=str(LIBRARY_ROOT / 'datasets' / 'verbocean.unrefined.2004-05-20.txt'))
     wiki = WikipediaRelationExtraction()
     wn = WordnetRelationExtraction()
     embed = WordEmbeddingRelationExtraction(method=EmbeddingMethod.ELMO)
 
@@ -132,37 +132,35 @@ def inference(ckpt_path,
     if hparams.quantize_ckpt or hparams.from_quantized_ckpt:
         model_helper.add_quatization_variables(infer_model)
 
-    sess = tf.Session(
-        graph=infer_model.graph, config=utils.get_config_proto())
-    with infer_model.graph.as_default():
-        load_fn = model_helper.load_model if not hparams.from_quantized_ckpt \
-            else model_helper.load_quantized_model
-        if hparams.quantize_ckpt:
-            load_fn(infer_model.model, ckpt_path, sess, 'infer')
-            load_fn = model_helper.load_quantized_model
-            ckpt_path = os.path.join(hparams.out_dir, 'quant_' + os.path.basename(ckpt_path))
-            model_helper.quantize_checkpoint(sess, ckpt_path)
-        loaded_infer_model = load_fn(infer_model.model, ckpt_path, sess, 'infer')
-
-    if num_workers == 1:
-        single_worker_inference(
-            sess,
-            infer_model,
-            loaded_infer_model,
-            inference_input_file,
-            inference_output_file,
-            hparams)
-    else:
-        multi_worker_inference(
-            sess,
-            infer_model,
-            loaded_infer_model,
-            inference_input_file,
-            inference_output_file,
-            hparams,
-            num_workers=num_workers,
-            jobid=jobid)
-    sess.close()
+    with tf.Session(graph=infer_model.graph, config=utils.get_config_proto()) as sess:
+        with infer_model.graph.as_default():
+            load_fn = model_helper.load_model if not hparams.from_quantized_ckpt \
+                else model_helper.load_quantized_model
+            if hparams.quantize_ckpt:
+                load_fn(infer_model.model, ckpt_path, sess, 'infer')
+                load_fn = model_helper.load_quantized_model
+                ckpt_path = os.path.join(hparams.out_dir, 'quant_' + os.path.basename(ckpt_path))
+                model_helper.quantize_checkpoint(sess, ckpt_path)
+            loaded_infer_model = load_fn(infer_model.model, ckpt_path, sess, 'infer')
+
+        if num_workers == 1:
+            single_worker_inference(
+                sess,
+                infer_model,
+                loaded_infer_model,
+                inference_input_file,
+                inference_output_file,
+                hparams)
+        else:
+            multi_worker_inference(
+                sess,
+                infer_model,
+                loaded_infer_model,
+                inference_input_file,
+                inference_output_file,
+                hparams,
+                num_workers=num_workers,
+                jobid=jobid)
 
 
 def single_worker_inference(sess,
 
@@ -645,14 +645,13 @@ def train(hparams, scope=None, target_session=""):
 
         if avg_ckpts:
             best_model_dir = getattr(hparams, "avg_best_" + metric + "_dir")
-            summary_writer = tf.summary.FileWriter(
-                os.path.join(best_model_dir, summary_name), infer_model.graph)
-            result_summary, best_global_step, _ = run_full_eval(
-                best_model_dir, infer_model, infer_sess, eval_model, eval_sess,
-                hparams, summary_writer, sample_src_data, sample_tgt_data)
-            print_step_info("# Averaged Best %s, " % metric, best_global_step, info,
-                            result_summary, log_f)
-            summary_writer.close()
+            with tf.summary.FileWriter(os.path.join(best_model_dir, summary_name),
+                                       infer_model.graph) as summary_writer:
+                result_summary, best_global_step, _ = run_full_eval(
+                    best_model_dir, infer_model, infer_sess, eval_model, eval_sess,
+                    hparams, summary_writer, sample_src_data, sample_tgt_data)
+                print_step_info("# Averaged Best %s, " % metric, best_global_step, info,
+                                result_summary, log_f)
 
     return final_eval_metrics, global_step
 
 
@@ -14,6 +14,9 @@
 # limitations under the License.
 # ******************************************************************************
 from os import path
+from pathlib import Path
 
-LIBRARY_PATH = path.dirname(path.realpath(__file__))
-LIBRARY_ROOT = path.dirname(LIBRARY_PATH)
+LIBRARY_PATH = Path(path.realpath(__file__)).parent
+LIBRARY_ROOT = LIBRARY_PATH.parent
+LIBRARY_OUT = Path(Path.home()) / 'nlp-architect' / 'cache'
+LIBRARY_DATASETS = LIBRARY_ROOT / 'datasets'
@@ -19,14 +19,14 @@
 
 from nlp_architect.api.abstract_api import AbstractApi
 from nlp_architect.models.intent_extraction import MultiTaskIntentModel, Seq2SeqIntentModel
-from nlp_architect.utils import LIBRARY_STORAGE_PATH
+from nlp_architect import LIBRARY_OUT
 from nlp_architect.utils.generic import pad_sentences
 from nlp_architect.utils.io import download_unlicensed_file
 from nlp_architect.utils.text import SpacyInstance, bio_to_spans
 
 
 class IntentExtractionApi(AbstractApi):
-    model_dir = path.join(LIBRARY_STORAGE_PATH, 'intent-pretrained')
+    model_dir = str(LIBRARY_OUT / 'intent-pretrained')
     pretrained_model_info = path.join(model_dir, 'model_info.dat')
     pretrained_model = path.join(model_dir, 'model.h5')
 
 
@@ -28,7 +28,7 @@
 
 from nlp_architect.api.abstract_api import AbstractApi
 from nlp_architect.models.matchlstm_ansptr import MatchLSTMAnswerPointer
-from nlp_architect.utils import LIBRARY_STORAGE_PATH
+from nlp_architect import LIBRARY_OUT
 from nlp_architect.utils.generic import license_prompt
 from nlp_architect.utils.io import download_unlicensed_file
 from nlp_architect.utils.mrc_utils import (
@@ -39,7 +39,7 @@ class MachineComprehensionApi(AbstractApi):
     """
     Machine Comprehension API
     """
-    dir = os.path.join(LIBRARY_STORAGE_PATH, 'mrc-pretrained')
+    dir = str(LIBRARY_OUT / 'mrc-pretrained')
     data_path = os.path.join(dir, 'mrc_data', 'data')
     data_dir = os.path.join(dir, 'mrc_data')
     model_dir = os.path.join(dir, 'mrc_trained_model')
@@ -97,12 +97,10 @@ def download_model(self):
             download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
                                      '/models/mrc/',
                                      'mrc_model.zip', model_zipfile)
-            data_zip_ref = zipfile.ZipFile(data_zipfile, 'r')
-            data_zip_ref.extractall(self.data_dir)
-            data_zip_ref.close()
-            model_zip_ref = zipfile.ZipFile(model_zipfile, 'r')
-            model_zip_ref.extractall(self.model_dir)
-            model_zip_ref.close()
+            with zipfile.ZipFile(data_zipfile) as data_zip_ref:
+                data_zip_ref.extractall(self.data_dir)
+            with zipfile.ZipFile(model_zipfile) as model_zip_ref:
+                model_zip_ref.extractall(self.model_dir)
 
     def load_model(self):
         select_device = 'GPU'
 
@@ -20,7 +20,7 @@
 
 from nlp_architect.api.abstract_api import AbstractApi
 from nlp_architect.models.ner_crf import NERCRF
-from nlp_architect.utils import LIBRARY_STORAGE_PATH
+from nlp_architect import LIBRARY_OUT
 from nlp_architect.utils.generic import pad_sentences
 from nlp_architect.utils.io import download_unlicensed_file
 from nlp_architect.utils.text import SpacyInstance, bio_to_spans
@@ -30,7 +30,7 @@ class NerApi(AbstractApi):
     """
     NER model API
     """
-    model_dir = path.join(LIBRARY_STORAGE_PATH, 'ner-pretrained')
+    model_dir = str(LIBRARY_OUT / 'ner-pretrained')
     pretrained_model = path.join(model_dir, 'model.h5')
     pretrained_model_info = path.join(model_dir, 'model_info.dat')
 
 
@@ -22,8 +22,8 @@
 
 import pytest
 
-from nlp_architect import LIBRARY_ROOT, LIBRARY_PATH
-from nlp_architect.utils import LIBRARY_STORAGE_PATH, ansi2html
+from nlp_architect import LIBRARY_ROOT, LIBRARY_PATH, LIBRARY_OUT
+from nlp_architect.utils import ansi2html
 from nlp_architect.version import NLP_ARCHITECT_VERSION
 
 
@@ -33,7 +33,7 @@ def run_cmd(command):
 
 class DocsCommand(object):
     cmd_name = 'doc'
-    docs_source = os.path.join(LIBRARY_ROOT, 'doc')
+    docs_source = LIBRARY_ROOT / 'doc'
 
     def __init__(self, subparsers):
         parser = subparsers.add_parser(DocsCommand.cmd_name,
@@ -48,8 +48,7 @@ def run_docs(_):
         print('Re-building documentation')
         run_cmd(base_cmd + ' clean')
         run_cmd(base_cmd + ' html')
-        print('Documentation built in: {}'.format(os.path.join(DocsCommand.docs_source,
-                                                               'build', 'html')))
+        print('Documentation built in: {}'.format(DocsCommand.docs_source / 'build' / 'html'))
         print('To view documents point your browser to: http://localhost:8000')
 
         class HTTPHandler(SimpleHTTPRequestHandler):
@@ -64,7 +63,7 @@ def __init__(self, base_path, server_address, request_handler_class=HTTPHandler)
                 self.base_path = base_path
                 BaseHTTPServer.__init__(self, server_address, request_handler_class)
 
-        web_dir = os.path.join(os.path.join(DocsCommand.docs_source, 'build', 'html'))
+        web_dir = DocsCommand.docs_source / 'build' / 'html'
         httpd = HTTPServer(web_dir, ("", 8000))
         httpd.serve_forever()
 
@@ -76,7 +75,7 @@ class StyleCommand(object):
         'nlp_architect',
         'tests'
     ]
-    files_to_check = [os.path.join(LIBRARY_ROOT, f) for f in check_dirs]
+    files_to_check = [str(LIBRARY_ROOT / f) for f in check_dirs]
 
     def __init__(self, subparsers):
         parser = subparsers.add_parser(StyleCommand.cmd_name,
@@ -109,11 +108,11 @@ def run_check(args):
     @staticmethod
     def run_flake():
         print('Running flake8 ...\n')
-        flake8_config = os.path.join(LIBRARY_ROOT, 'setup.cfg')
-        os.makedirs(LIBRARY_STORAGE_PATH, exist_ok=True)
-        flake8_out = os.path.join(LIBRARY_STORAGE_PATH, 'flake8.txt')
-        flake8_html_out = os.path.join(LIBRARY_STORAGE_PATH, 'flake_html')
-
+        flake8_config = str(LIBRARY_ROOT / 'setup.cfg')
+        os.makedirs(LIBRARY_OUT, exist_ok=True)
+        flake8_out = str(LIBRARY_OUT / 'flake8.txt')
+        flake8_html_out = str(LIBRARY_OUT / 'flake_html')
+        print('HHH:' + flake8_html_out)
         try:
             os.remove(flake8_out)
             shutil.rmtree(flake8_html_out, ignore_errors=True)
@@ -135,10 +134,10 @@ def run_flake():
     @staticmethod
     def run_pylint():
         print('Running pylint ...\n')
-        pylint_config = os.path.join(LIBRARY_ROOT, 'pylintrc')
-        os.makedirs(LIBRARY_STORAGE_PATH, exist_ok=True)
-        pylint_out = os.path.join(LIBRARY_STORAGE_PATH, 'pylint.txt')
-        html_out = os.path.join(LIBRARY_STORAGE_PATH, 'pylint.html')
+        pylint_config = LIBRARY_ROOT / 'pylintrc'
+        os.makedirs(LIBRARY_OUT, exist_ok=True)
+        pylint_out = str(LIBRARY_OUT / 'pylint.txt')
+        html_out = str(LIBRARY_OUT / 'pylint.html')
 
         cmd = 'pylint -j 4 {} --rcfile {} --score=n'\
             .format(' '.join(StyleCommand.files_to_check), pylint_config)
@@ -167,7 +166,7 @@ def __init__(self, subparsers):
     def run_tests(args):
         # run all tests
         print('\nrunning NLP Architect tests ...')
-        tests_dir = os.path.join(LIBRARY_ROOT, 'tests')
+        tests_dir = str(LIBRARY_ROOT / 'tests')
         tests = None
         if args.f:
             specific_test_file = args.f
@@ -218,7 +217,7 @@ def __init__(self, subparsers):
     @staticmethod
     def run_server(args):
         port = args.port
-        serve_file = os.path.join(LIBRARY_PATH, 'server', 'serve.py')
+        serve_file = LIBRARY_PATH / 'server' / 'serve.py'
         cmd_str = 'hug -p {} -f {}'.format(port, serve_file)
         run_cmd(cmd_str)
 
 
@@ -34,18 +34,15 @@ def __init__(self, glove_file):
     def load_glove_for_vocab(glove_filename):
         vocab = []
         embd = []
-        with open(glove_filename, 'r') as glove_file:
-            for line in glove_file.readlines():
+        with open(glove_filename) as glove_file:
+            for line in glove_file:
                 row = line.strip().split(' ')
                 word = row[0]
                 vocab.append(word)
                 embd.append(row[1:])
 
         embeddings = np.asarray(embd, dtype=float)
-        word_to_ix = {}
-        for word in vocab:
-            word_to_ix[word] = len(word_to_ix)
-
+        word_to_ix = {word: i for i, word in enumerate(vocab)}
         return word_to_ix, embeddings