Document the inference tool

AlexGustafsson · AlexGustafsson · commit b6acc8649dd1 · 2020-12-31T12:06:56.000+01:00
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ _Predicted labels for some randomly chosen samples. Format: prediction (confiden
 
 ## Table of Contents
 
+[Quickstart](#quickstart)<br />
 [Dataset](#dataset)<br />
 [Development](#development)<br />
 [Development - Quickstart](#development-quickstart)<br />
@@ -32,6 +33,52 @@ _Predicted labels for some randomly chosen samples. Format: prediction (confiden
 [Development - Quickstart - Training and Evaluation](#development-quickstart-training)<br />
 [Development - Tools](#development-tools)
 
+## Quickstart
+<a name="quickstart"></a>
+
+_Note: These instructions are only for inference using the pre-trained model._
+
+First download the latest release from [releases](https://github.com/AlexGustafsson/compdec/releases). The release contains three files; a pre-trained model, a python script and a Dockerfile.
+
+If you wish not to install all the prerequisites mentioned under [Development - Quickstart](#development-quickstart)<br />, build the Docker image instead like so:
+
+```sh
+cd compdec
+docker build -t compdec .
+```
+
+Now you may the script natively or via Docker:
+
+```sh
+# Docker
+docker run -it -v "$/path/to/samples:/samples" compdec /samples/unknown-file1.bin /samples/unknown-file2.bin
+# Native
+python3 ./compdec.py /path/to/samples/unknown-file1.bin /path/to/samples/unknown-file2.bin
+```
+
+The tool will produce output like so:
+
+```
+/path/to/samples/unknown-file1.bin
+7z       : 0.00%
+brotli   : 0.00%
+bzip2    : 0.00%
+compress : 0.00%
+gzip     : 0.00%
+lz4      : 100.00%
+rar      : 0.00%
+zip      : 0.00%
+/path/to/samples/unknown-file2.bin
+7z       : 0.00%
+brotli   : 0.00%
+bzip2    : 0.00%
+compress : 100.00%
+gzip     : 0.00%
+lz4      : 0.00%
+rar      : 0.00%
+zip      : 0.00%
+```
+
 ## Dataset
 <a name="dataset"></a>
 
diff --git a/compdec/compdec.py b/compdec/compdec.py
@@ -40,6 +40,8 @@ def print_version() -> None:
     print("Model hash: {}".format(hash))
 
 def load_samples_from_file(sample_path):
+    import numpy
+
     with open(sample_path, "rb") as sample_file:
         sample_file.seek(0, 2)
         file_size = sample_file.tell()
@@ -57,13 +59,13 @@ def load_samples_from_file(sample_path):
             samples.append(sample)
         return samples
 
-def predict(file_paths, model_path):
+def predict(sample_paths, model_path):
     import tensorflow
     import numpy
 
     model = tensorflow.keras.models.load_model(model_path)
-    for file_path in file_paths:
-        samples = dataset_utilities.load_samples_from_file(sample_path)
+    for sample_path in sample_paths:
+        samples = load_samples_from_file(sample_path)
 
         if len(samples) == 0:
             print("There are no chunks big enough in the sample file. Expected at least {}B".format(CHUNK_SIZE))
@@ -77,9 +79,9 @@ def softmax(predictions):
         prediction_sum = sum(predictions)
         normalized_predictions = softmax(prediction_sum)
 
-        print(file_path)
-        for i in range(len(dataset_utilities.CLASS_NAMES)):
-            print("{:9}: {:2.2f}%".format(dataset_utilities.CLASS_NAMES[i], normalized_predictions[i] * 100))
+        print(sample_path)
+        for i in range(len(CLASS_NAMES)):
+            print("{:9}: {:2.2f}%".format(CLASS_NAMES[i], normalized_predictions[i] * 100))
 
 def main():
     parser = ArgumentParser(add_help=False)