matthoffman · qurikuduo · Oct 25, 2022 · Oct 25, 2022 · Oct 31, 2022 · Oct 31, 2022
diff --git a/README.md b/README.md
@@ -17,11 +17,14 @@ Pipenv:
 pipenv install -r requirements.txt
 pipenv shell
 ```
+Make sure  h5py with version 2.10.0 in your env. 
 
 Virtualenv is similar, but there's really no reason to use virtualenv instead of Pipenv anymore.
 
 
 ## Retraining the model
+### the model in this repo was already retrained on oct. 2022.
+
 There is a trained model checked into the `models` directory. If you'd like to train your own, you'll first need to 
 download the training data from S3: 
 ```
@@ -47,19 +50,67 @@ potentially run it for, say, 5 epochs and still get good accuracy with half the
 `python degas/runner train-model --epochs 5 data/processed `
 
 
-## Making predictions
+## Making predictions (EDITED: specified version of tensorflow and tensorflow serving)
 
 Since this project uses Tensorflow as the underlying deep learning library, the recommended way to use this for 
 inference is to use [Tensorflow Serving](https://www.tensorflow.org/serving/). 
 
 You should be able to serve it using:
+
 ```
-docker run -p 8501:8501 \
-  --mount type=bind,source=models/degas,target=/models/degas\
-  -e MODEL_NAME=degas -t tensorflow/serving
+'docker run -p 8501:8501 \
+  --mount type=bind,source=/Users/yourUserName/PycharmProjects/degas/models/degas,target=/models/degas\
+  -e MODEL_NAME=degas -t tensorflow/serving:1.11.0'
 ```
 See [Tensorflow Serving docs](https://www.tensorflow.org/serving/docker) for more information about available options.
 
+show model info:
+http://localhost:8501/v1/models/degas
+model metadata:
+http://localhost:8501/v1/models/degas/metadata
+make a predict:
+http://localhost:8501/v1/models/degas:predict
+post json is :
+
+```
+{
+ "instances": [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,12,21,29,29,21,26,19,12
+,17,29,27]
+,[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,15,2,38,4,3,36,8,9,19,33,0,1,12
+,17,29,27]
+,[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+,0,0,0,0,0,0,0,0,0,0,0,0,37,37,37,12,34,37,23,34,34,19,32,12
+,17,29,27]]
+}
+
+```
+It represents domains as bellow：
+"www.google.com", "a2x43v89es01.com", "www.twitter.com" 
+
+and the response will be:
+```
+{
+    "predictions": [
+        [
+            4.54876e-11
+        ],
+        [
+            0.723077
+        ],
+        [
+            2.9277e-18
+        ]
+    ]
+}
+```
+
+so the result of predictions to "www.google.com", "www.a2x43v89es0-1.com", "www.twitter.com" is: false, ture, false.
+
 
 # About Degas
 

diff --git a/degas/model/predict.py b/degas/model/predict.py
@@ -34,12 +34,16 @@ def load_model(version=1) -> Model:
     return model
 
 
-def predict(model: Model, domains: np.ndarray) -> np.ndarray:
+def predict(model: Model, domains: np.ndarray, batch_size: int = 32, steps = None,  max_queue_size: int = 10,  workers: int = 1) -> np.ndarray:
     """
     Given a list of domains as input, returns a list of booleans, where True means it is predicted to be a DGA, and
     false means it is predicted to be benign
     """
-    predictions = model.predict_on_batch(prep_data(domains))
+    # predictions = model.predict(prep_data(domains), batch_size=batch_size,steps=steps, max_queue_size=max_queue_size, workers=workers)
+
+    # parameter workers not present in tensorflow 1.11.0
+    predictions = model.predict(prep_data(domains), batch_size=batch_size, steps=steps, max_queue_size=max_queue_size)
+    # predictions = model.predict_on_batch(prep_data(domains))
     return predictions
 
 

diff --git a/degas/model/train.py b/degas/model/train.py
@@ -153,6 +153,7 @@ def run_kfold(data: pd.DataFrame, num_epochs=100, kfold_splits=2, batch_size=256
 
 
 def main(input_filepath: str, epochs: int = 100, kfold_splits: int = 3) -> None:
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
     logging.info("load up some data")
     input_path = Path(input_filepath)
     # if the input was a directory, add our default filename.

diff --git a/environment.yml b/environment.yml
@@ -11,6 +11,8 @@ dependencies:
  - seaborn
  - matplotlib
  - Click
- - python-dotenv
+ - python-dotenv=0.20.0
  - typing
- - tensorflow=1.12.0
+ - tensorflow=1.11.0
+ - h5py=2.10.0
+
diff --git a/models/degas/1/nyu_model.h5 b/models/degas/1/nyu_model.h5
diff --git a/models/degas/1/saved_model.pb b/models/degas/1/saved_model.pb
diff --git a/models/degas/1/variables/variables.data-00000-of-00001 b/models/degas/1/variables/variables.data-00000-of-00001
diff --git a/models/degas/1/variables/variables.index b/models/degas/1/variables/variables.index
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,9 @@
-python>=3.6.7
-tensorflow=1.12.0
+python >= 3.6.7
+tensorflow == 1.11.0
 pandas>=0.22
 scikit-learn>=0.20
 numpy
-python-dotenv
+python-dotenv==0.20.0
 typing
 Click
 matplotlib

diff --git a/tests/model/test_train.py b/tests/model/test_train.py
@@ -1,3 +1,5 @@
+import os
+
 from ..context import degas
 from tensorflow.python.keras.models import Model
 
@@ -6,6 +8,7 @@
 
 
 def test_build_model():
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
     model: Model = degas.model.train.build_model()
     # this validates that we can build and compile it w/o error, which catches the most common issues in model creation
     print("Model: {}".format(model))