aditya-vora · romamartyanov · May 26, 2019 · May 27, 2019 · May 27, 2019 · May 27, 2019
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ Code for FCHD - A fast and accurate head detector
 This is the code for FCHD - A Fast and accurate head detector. See [the paper](https://arxiv.org/abs/1809.08766) for details and [video](https://youtu.be/gRPA7Hqk3VQ) for demo.
 
 ## Dependencies
-- The code is tested on Ubuntu 16.04. 
+- The code is tested on Ubuntu 16.04.
 
 - install PyTorch >=0.4 with GPU (code are GPU-only), refer to [official website](http://pytorch.org)
 
@@ -27,10 +27,16 @@ This is the code for FCHD - A Fast and accurate head detector. See [the paper](h
 
 ## Training
 1) Download the caffe pre-trained VGG16 from the following [link](https://drive.google.com/open?id=10AwNitG-5gq-YEJcG9iihosiOu7vAnfO). Store this pre-trained model in `data/pretrained_model ` folder.
-
-2) Download the BRAINWASH dataset from the [official website](https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/software-and-datasets/). Unzip it and store the dataset in the `data/ ` folder. 
 
-3) Make appropriate settings in `src/config.py ` file regarding the updated paths.
+2) Download the BRAINWASH dataset from the [official website](https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/software-and-datasets/). Unzip it and store the dataset in the `data/ ` folder.
+
+3) Make appropriate settings in `src/config.py ` file regarding the updated paths, if required. The default paths set in `src/config.py ` are:
+```
+brainwash_dataset_root_path = 'data/brainwash_raw'
+hollywood_dataset_root_path = 'data/HollywoodHeads'
+caffe_pretrain_path = 'data/pretrained_model/vgg16_caffe.pth'
+```
+All paths are relative to the root directory. You can put the aforementioned files under these paths and use the model as-is without changing anything.
 
 4) Start visdom server for visualization:
 ```Bash
@@ -39,14 +45,25 @@ python -m visdom.server
 5) Run the following command to train the model: `python train.py `.
 
 ## Demo
-1) Download the best performing model from the following [link](https://drive.google.com/open?id=1DbE4tAkaFYOEItwuIQhlbZypuIPDrArM). 
+1) Download the best performing model from the following [link](https://drive.google.com/open?id=1DbE4tAkaFYOEItwuIQhlbZypuIPDrArM).
+
+2) Store the head detection model in `checkpoints/ ` folder.
+
+3) Download the caffe pre-trained VGG16 from the following [link](https://drive.google.com/open?id=10AwNitG-5gq-YEJcG9iihosiOu7vAnfO). Store this pre-trained model in `data/pretrained_model ` folder.
 
-2) Store the head detection model in `checkpoints/ ` folder. 
+4) Start visdom server for visualization.:
+```Bash
+python -m visdom.server
+```
 
-3) Run the following python command from the root folder. 
+4) Run the following python command from the root folder.
 ```Shell
-python head_detection_demo.py --img_path <test_image_name> --model_path <model_path>
+python head_detection_demo.py --img_path <test_image_path> --model_path <model_path>
 ```
+_You can drop the `--model_path ` argument if  you have stored the head detection model under `checkpoints/ `._
+
+5) The output of the model will be stored in a directory named `output/ ` in the same folder.
+
 ## Results
 |              Method              |     AP     |
 | :--------------------------------------: | :---------: |
@@ -56,8 +73,8 @@ python head_detection_demo.py --img_path <test_image_name> --model_path <model_p
 | ReInspect, Lhungarian [1] | 0.78 |
 | **Ours** | **0.70** |
 
-## Runtime 
-- Runs at 5fps on NVidia Quadro M1000M GPU with 512 CUDA cores. 
+## Runtime
+- Runs at 5fps on NVidia Quadro M1000M GPU with 512 CUDA cores.
 
 ## Acknowledgement
 This work builds on many of the excellent works:

diff --git a/head_detection_demo.py b/head_detection_demo.py
@@ -8,19 +8,16 @@
 from PIL import Image
 import numpy as np
 from data.dataset import preprocess
-import matplotlib.pyplot as plt 
+import matplotlib.pyplot as plt
 import src.array_tool as at
 from src.vis_tool import visdom_bbox
 import argparse
 import src.utils as utils
 from src.config import opt
 import time
 
-SAVE_FLAG = 0
-THRESH = 0.01
-IM_RESIZE = False
 
-def read_img(path):
+def read_img(path, IM_RESIZE=False):
     f = Image.open(path)
     if IM_RESIZE:
         f = f.resize((640,480), Image.ANTIALIAS)
@@ -29,14 +26,16 @@ def read_img(path):
     img_raw = np.asarray(f, dtype=np.uint8)
     img_raw_final = img_raw.copy()
     img = np.asarray(f, dtype=np.float32)
-    _, H, W = img.shape
+    # _, H, W = img.shape
     img = img.transpose((2,0,1))
+    _, H, W = img.shape
     img = preprocess(img)
     _, o_H, o_W = img.shape
     scale = o_H / H
     return img, img_raw_final, scale
 
-def detect(img_path, model_path):
+
+def detect(img_path, model_path, SAVE_FLAG=0, THRESH=0.01):
     file_id = utils.get_file_id(img_path)
     img, img_raw, scale = read_img(img_path)
     head_detector = Head_Detector_VGG16(ratios=[1], anchor_scales=[2,4])
@@ -52,21 +51,24 @@ def detect(img_path, model_path):
     print ("[INFO] Head detection over. Time taken: {:.4f} s".format(tt))
     for i in range(pred_bboxes_.shape[0]):
         ymin, xmin, ymax, xmax = pred_bboxes_[i,:]
-        utils.draw_bounding_box_on_image_array(img_raw,ymin, xmin, ymax, xmax)
+        utils.draw_bounding_box_on_image_array(img_raw, ymin/scale, xmin/scale, ymax/scale, xmax/scale)
     plt.axis('off')
     plt.imshow(img_raw)
     if SAVE_FLAG == 1:
+        if not os.path.exists(opt.test_output_path):  # Create the directory
+            os.makedirs(opt.test_output_path)  # If it doesn't exist
+
         plt.savefig(os.path.join(opt.test_output_path, file_id+'.png'), bbox_inches='tight', pad_inches=0)
     else:
-        plt.show()    
+        plt.show()
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--img_path", type=str, help="test image path")
-    parser.add_argument("--model_path", type=str, default='./checkpoints/sess:2/head_detector08120858_0.682282441835')
+    parser.add_argument("--model_path", type=str, default='./checkpoints/head_detector_final')
     args = parser.parse_args()
-    detect(args.img_path, args.model_path)
+    detect(args.img_path, args.model_path, SAVE_FLAG=1)
     # model_path = './checkpoints/sess:2/head_detector08120858_0.682282441835'
 
     # test_data_list_path = os.path.join(opt.data_root_path, 'brainwash_test.idl')
@@ -83,6 +85,3 @@ def detect(img_path, model_path):
     #         src_path = os.path.join(opt.data_root_path, img_path.replace('"',''))
     #         detect(src_path, model_path, save_idx)
     #         save_idx += 1
-
-
-
diff --git a/src/config.py b/src/config.py
@@ -1,15 +1,16 @@
 from pprint import pprint
 
 class Config:
-    brainwash_dataset_root_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/brainwash_raw'
-    hollywood_dataset_root_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/HollywoodHeads'
+    brainwash_dataset_root_path = './data/'
+    hollywood_dataset_root_path = './data/'
     min_size = 600  # image resize
-    max_size = 1000 # image resize 
+    max_size = 1000 # image resize
     caffe_pretrain = True
-    caffe_pretrain_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/pretrained_model/vgg16_caffe.pth'
-    model_save_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/checkpoints'
+    caffe_pretrain_path = './data/pretrained_model/vgg16_caffe.pth'
+    model_save_path = './checkpoints'
     # sigma for l1_smooth_loss
     rpn_sigma = 3.
+    test_output_path = './output'
 
     weight_decay = 0.0005
     lr_decay = 0.1  # 1e-3 -> 1e-4
@@ -22,7 +23,7 @@ class Config:
     pretrained_model = 'vgg16'
 
     epoch = 15
-    
+
     use_adam = False # Use Adam optimizer
     use_chainer = False # try match everything as chainer
     def _parse(self, kwargs):

diff --git a/src/creator_tool.py b/src/creator_tool.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*- 
+# -*- coding: utf-8 -*-
 
 import numpy as np
 import cupy as cp
@@ -126,12 +126,15 @@ def __call__(self, loc, score,
             roi[:, slice(0, 4, 2)], 0, img_size[0])
         roi[:, slice(1, 4, 2)] = np.clip(
             roi[:, slice(1, 4, 2)], 0, img_size[1])
-        
+
 
         min_size = self.min_size * scale
         hs = roi[:, 2] - roi[:, 0]
         ws = roi[:, 3] - roi[:, 1]
 
+        min_size = np.array(min_size)
+        hs = np.array(hs)
+        ws = np.array(ws)
         keep = np.where((hs >= min_size) & (ws >= min_size))[0]
         roi = roi[keep, :]
         score = score[keep]

diff --git a/src/nms/_nms_gpu_post.c b/src/nms/_nms_gpu_post.c
diff --git a/src/nms/build.py b/src/nms/build.py
@@ -1,8 +1,15 @@
 from distutils.core import setup
 from distutils.extension import Extension
 from Cython.Distutils import build_ext
+import numpy
 
-ext_modules = [Extension("_nms_gpu_post", ["_nms_gpu_post.pyx"])]
+ext_modules = [
+    Extension(
+        "_nms_gpu_post",
+        ["_nms_gpu_post.pyx"],
+        include_dirs=[numpy.get_include()]
+        )
+    ]
 setup(
     name="Hello pyx",
     cmdclass={'build_ext': build_ext},

diff --git a/src/nms/non_maximum_suppression.py b/src/nms/non_maximum_suppression.py
@@ -15,7 +15,7 @@
     from ._nms_gpu_post_py import _nms_gpu_post
 
 
-@cp.util.memoize(for_each_device=True)
+@cp.memoize(for_each_device=True)
 def _load_kernel(kernel_name, code, options=()):
     cp.cuda.runtime.free(0)
     assert isinstance(options, tuple)