aditya-vora · IAmSuyogJadhav · May 26, 2019 · May 27, 2019 · May 27, 2019 · May 27, 2019
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ Code for FCHD - A fast and accurate head detector
 This is the code for FCHD - A Fast and accurate head detector. See [the paper](https://arxiv.org/abs/1809.08766) for details and [video](https://youtu.be/gRPA7Hqk3VQ) for demo.
 
 ## Dependencies
-- The code is tested on Ubuntu 16.04. 
+- The code is tested on Ubuntu 16.04.
 
 - install PyTorch >=0.4 with GPU (code are GPU-only), refer to [official website](http://pytorch.org)
 
@@ -27,10 +27,16 @@ This is the code for FCHD - A Fast and accurate head detector. See [the paper](h
 
 ## Training
 1) Download the caffe pre-trained VGG16 from the following [link](https://drive.google.com/open?id=10AwNitG-5gq-YEJcG9iihosiOu7vAnfO). Store this pre-trained model in `data/pretrained_model ` folder.
-
-2) Download the BRAINWASH dataset from the [official website](https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/software-and-datasets/). Unzip it and store the dataset in the `data/ ` folder. 
 
-3) Make appropriate settings in `src/config.py ` file regarding the updated paths.
+2) Download the BRAINWASH dataset from the [official website](https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/software-and-datasets/). Unzip it and store the dataset in the `data/ ` folder.
+
+3) Make appropriate settings in `src/config.py ` file regarding the updated paths, if required. The default paths set in `src/config.py ` are:
+```
+brainwash_dataset_root_path = 'data/brainwash_raw'
+hollywood_dataset_root_path = 'data/HollywoodHeads'
+caffe_pretrain_path = 'data/pretrained_model/vgg16_caffe.pth'
+```
+All paths are relative to the root directory. You can put the aforementioned files under these paths and use the model as-is without changing anything.
 
 4) Start visdom server for visualization:
 ```Bash
@@ -39,14 +45,25 @@ python -m visdom.server
 5) Run the following command to train the model: `python train.py `.
 
 ## Demo
-1) Download the best performing model from the following [link](https://drive.google.com/open?id=1DbE4tAkaFYOEItwuIQhlbZypuIPDrArM). 
+1) Download the best performing model from the following [link](https://drive.google.com/open?id=1DbE4tAkaFYOEItwuIQhlbZypuIPDrArM).
+
+2) Store the head detection model in `checkpoints/ ` folder.
+
+3) Download the caffe pre-trained VGG16 from the following [link](https://drive.google.com/open?id=10AwNitG-5gq-YEJcG9iihosiOu7vAnfO). Store this pre-trained model in `data/pretrained_model ` folder.
 
-2) Store the head detection model in `checkpoints/ ` folder. 
+4) Start visdom server for visualization.:
+```Bash
+python -m visdom.server
+```
 
-3) Run the following python command from the root folder. 
+4) Run the following python command from the root folder.
 ```Shell
-python head_detection_demo.py --img_path <test_image_name> --model_path <model_path>
+python head_detection_demo.py --img_path <test_image_path> --model_path <model_path>
 ```
+_You can drop the `--model_path ` argument if  you have stored the head detection model under `checkpoints/ `._
+
+5) The output of the model will be stored in a directory named `output/ ` in the same folder.
+
 ## Results
 |              Method              |     AP     |
 | :--------------------------------------: | :---------: |
@@ -56,8 +73,8 @@ python head_detection_demo.py --img_path <test_image_name> --model_path <model_p
 | ReInspect, Lhungarian [1] | 0.78 |
 | **Ours** | **0.70** |
 
-## Runtime 
-- Runs at 5fps on NVidia Quadro M1000M GPU with 512 CUDA cores. 
+## Runtime
+- Runs at 5fps on NVidia Quadro M1000M GPU with 512 CUDA cores.
 
 ## Acknowledgement
 This work builds on many of the excellent works:

diff --git a/head_detection_demo.py b/head_detection_demo.py
@@ -8,19 +8,16 @@
 from PIL import Image
 import numpy as np
 from data.dataset import preprocess
-import matplotlib.pyplot as plt 
+import matplotlib.pyplot as plt
 import src.array_tool as at
 from src.vis_tool import visdom_bbox
 import argparse
 import src.utils as utils
 from src.config import opt
 import time
 
-SAVE_FLAG = 0
-THRESH = 0.01
-IM_RESIZE = False
 
-def read_img(path):
+def read_img(path, IM_RESIZE=False):
     f = Image.open(path)
     if IM_RESIZE:
         f = f.resize((640,480), Image.ANTIALIAS)
@@ -29,14 +26,16 @@ def read_img(path):
     img_raw = np.asarray(f, dtype=np.uint8)
     img_raw_final = img_raw.copy()
     img = np.asarray(f, dtype=np.float32)
-    _, H, W = img.shape
+    # _, H, W = img.shape
     img = img.transpose((2,0,1))
+    _, H, W = img.shape
     img = preprocess(img)
     _, o_H, o_W = img.shape
     scale = o_H / H
     return img, img_raw_final, scale
 
-def detect(img_path, model_path):
+
+def detect(img_path, model_path, SAVE_FLAG=0, THRESH=0.01):
     file_id = utils.get_file_id(img_path)
     img, img_raw, scale = read_img(img_path)
     head_detector = Head_Detector_VGG16(ratios=[1], anchor_scales=[2,4])
@@ -52,21 +51,24 @@ def detect(img_path, model_path):
     print ("[INFO] Head detection over. Time taken: {:.4f} s".format(tt))
     for i in range(pred_bboxes_.shape[0]):
         ymin, xmin, ymax, xmax = pred_bboxes_[i,:]
-        utils.draw_bounding_box_on_image_array(img_raw,ymin, xmin, ymax, xmax)
+        utils.draw_bounding_box_on_image_array(img_raw, ymin/scale, xmin/scale, ymax/scale, xmax/scale)
     plt.axis('off')
     plt.imshow(img_raw)
     if SAVE_FLAG == 1:
+        if not os.path.exists(opt.test_output_path):  # Create the directory
+            os.makedirs(opt.test_output_path)  # If it doesn't exist
+
         plt.savefig(os.path.join(opt.test_output_path, file_id+'.png'), bbox_inches='tight', pad_inches=0)
     else:
-        plt.show()    
+        plt.show()
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--img_path", type=str, help="test image path")
-    parser.add_argument("--model_path", type=str, default='./checkpoints/sess:2/head_detector08120858_0.682282441835')
+    parser.add_argument("--model_path", type=str, default='./checkpoints/head_detector_final')
     args = parser.parse_args()
-    detect(args.img_path, args.model_path)
+    detect(args.img_path, args.model_path, SAVE_FLAG=1)
     # model_path = './checkpoints/sess:2/head_detector08120858_0.682282441835'
 
     # test_data_list_path = os.path.join(opt.data_root_path, 'brainwash_test.idl')
@@ -83,6 +85,3 @@ def detect(img_path, model_path):
     #         src_path = os.path.join(opt.data_root_path, img_path.replace('"',''))
     #         detect(src_path, model_path, save_idx)
     #         save_idx += 1
-
-
-
diff --git a/src/config.py b/src/config.py
@@ -1,15 +1,16 @@
 from pprint import pprint
 
 class Config:
-    brainwash_dataset_root_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/brainwash_raw'
-    hollywood_dataset_root_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/HollywoodHeads'
+    brainwash_dataset_root_path = './data/'
+    hollywood_dataset_root_path = './data/'
     min_size = 600  # image resize
-    max_size = 1000 # image resize 
+    max_size = 1000 # image resize
     caffe_pretrain = True
-    caffe_pretrain_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/pretrained_model/vgg16_caffe.pth'
-    model_save_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/checkpoints'
+    caffe_pretrain_path = './data/pretrained_model/vgg16_caffe.pth'
+    model_save_path = './checkpoints'
     # sigma for l1_smooth_loss
     rpn_sigma = 3.
+    test_output_path = './output'
 
     weight_decay = 0.0005
     lr_decay = 0.1  # 1e-3 -> 1e-4
@@ -22,7 +23,7 @@ class Config:
     pretrained_model = 'vgg16'
 
     epoch = 15
-    
+
     use_adam = False # Use Adam optimizer
     use_chainer = False # try match everything as chainer
     def _parse(self, kwargs):

diff --git a/src/creator_tool.py b/src/creator_tool.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*- 
+# -*- coding: utf-8 -*-
 
 import numpy as np
 import cupy as cp
@@ -126,12 +126,15 @@ def __call__(self, loc, score,
             roi[:, slice(0, 4, 2)], 0, img_size[0])
         roi[:, slice(1, 4, 2)] = np.clip(
             roi[:, slice(1, 4, 2)], 0, img_size[1])
-        
+
 
         min_size = self.min_size * scale
         hs = roi[:, 2] - roi[:, 0]
         ws = roi[:, 3] - roi[:, 1]
 
+        min_size = np.array(min_size)
+        hs = np.array(hs)
+        ws = np.array(ws)
         keep = np.where((hs >= min_size) & (ws >= min_size))[0]
         roi = roi[keep, :]
         score = score[keep]

diff --git a/src/nms/build.py b/src/nms/build.py
@@ -1,8 +1,15 @@
 from distutils.core import setup
 from distutils.extension import Extension
 from Cython.Distutils import build_ext
+import numpy
 
-ext_modules = [Extension("_nms_gpu_post", ["_nms_gpu_post.pyx"])]
+ext_modules = [
+    Extension(
+        "_nms_gpu_post",
+        ["_nms_gpu_post.pyx"],
+        include_dirs=[numpy.get_include()]
+        )
+    ]
 setup(
     name="Hello pyx",
     cmdclass={'build_ext': build_ext},

diff --git a/train.py b/train.py
@@ -1,7 +1,7 @@
 from __future__ import division
 
 import os
-import numpy as np 
+import numpy as np
 from torch.autograd import Variable
 from torch.utils import data as data_
 import torch
@@ -13,21 +13,21 @@
 from src.head_detector_vgg16 import Head_Detector_VGG16
 from trainer import Head_Detector_Trainer
 from src.config import opt
-import src.utils as utils 
+import src.utils as utils
 from data.dataset import Dataset, inverse_normalize
 import src.array_tool as at
 from src.vis_tool import visdom_bbox
 from src.bbox_tools import bbox_iou
 
-dataset_name = 'hollywood'
-phases = ['train', 'val', 'test']
+dataset_name = 'brainwash'
+phases = ['train', 'val']
 data_check_flag = False
 
 def eval(dataloader, head_detector):
     """
     Given the dataloader of the test split compute the
-    average corLoc of the dataset using the head detector 
-    model given as the argument to the function. 
+    average corLoc of the dataset using the head detector
+    model given as the argument to the function.
     """
     test_img_num = 0
     test_corrLoc = 0.0
@@ -51,44 +51,44 @@ def eval(dataloader, head_detector):
             test_img_num += 1
     return test_corrLoc / test_img_num
 
-def train():    
+def train():
     # Get the dataset
     for phase in phases:
         if phase == 'train':
             if dataset_name == 'hollywood':
                 train_data_list_path = os.path.join(opt.hollywood_dataset_root_path, 'hollywood_train.idl')
-                train_data_list = utils.get_phase_data_list(train_data_list_path, dataset_name)    
+                train_data_list = utils.get_phase_data_list(train_data_list_path, dataset_name)
             if dataset_name == 'brainwash':
                 train_data_list_path = os.path.join(opt.brainwash_dataset_root_path, 'brainwash_train.idl')
                 train_data_list = utils.get_phase_data_list(train_data_list_path, dataset_name)
         elif phase == 'val':
             if dataset_name == 'hollywood':
                 val_data_list_path = os.path.join(opt.hollywood_dataset_root_path, 'hollywood_val.idl')
-                val_data_list = utils.get_phase_data_list(val_data_list_path, dataset_name)    
+                val_data_list = utils.get_phase_data_list(val_data_list_path, dataset_name)
             if dataset_name == 'brainwash':
                 val_data_list_path = os.path.join(opt.brainwash_dataset_root_path, 'brainwash_val.idl')
-                val_data_list = utils.get_phase_data_list(val_data_list_path, dataset_name)      
+                val_data_list = utils.get_phase_data_list(val_data_list_path, dataset_name)
         elif phase == 'test':
             if dataset_name == 'hollywood':
                 test_data_list_path = os.path.join(opt.hollywood_dataset_root_path, 'hollywood_test.idl')
-                test_data_list = utils.get_phase_data_list(test_data_list_path, dataset_name)    
+                test_data_list = utils.get_phase_data_list(test_data_list_path, dataset_name)
             if dataset_name == 'brainwash':
                 test_data_list_path = os.path.join(opt.brainwash_dataset_root_path, 'brainwash_test.idl')
-                test_data_list = utils.get_phase_data_list(test_data_list_path, dataset_name)      
-    
+                test_data_list = utils.get_phase_data_list(test_data_list_path, dataset_name)
+
     print "Number of images for training: %s" %(len(train_data_list))
-    print "Number of images for val: %s" %(len(val_data_list))
-    print "Number of images for test: %s" %(len(test_data_list))
+    if 'val' in phase: print "Number of images for val: %s" %(len(val_data_list))
+    if 'test'  in phase: print "Number of images for test: %s" %(len(test_data_list))
 
-    if data_check_flag: 
+    if data_check_flag:
         utils.check_loaded_data(train_data_list[random.randint(1,len(train_data_list))])
-        utils.check_loaded_data(val_data_list[random.randint(1,len(val_data_list))])
-        utils.check_loaded_data(test_data_list[random.randint(1,len(test_data_list))])
+        if 'val' in phase: utils.check_loaded_data(val_data_list[random.randint(1,len(val_data_list))])
+        if 'test' in phase: utils.check_loaded_data(test_data_list[random.randint(1,len(test_data_list))])
 
     # Load the train dataset
     train_dataset = Dataset(train_data_list)
     test_dataset = Dataset(val_data_list)
-    print "Load data." 
+    print "Load data."
 
     train_dataloader = data_.DataLoader(train_dataset, batch_size=1,shuffle=True, num_workers=1)
     test_dataloader = data_.DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=1)
@@ -104,7 +104,7 @@ def train():
             img, bbox = img.cuda().float(), bbox_.cuda()
             img, bbox = Variable(img), Variable(bbox)
             _, _, _ = trainer.train_step(img, bbox, scale)
-            print "Forward and backward pass done."
+            print "\rImage no. {}: Done!".format(ii),
             if (ii+1) % opt.plot_every == 0:
                 trainer.vis.plot_many(trainer.get_meter_data())
                 ori_img_ = inverse_normalize(at.tonumpy(img[0]))
@@ -116,16 +116,16 @@ def train():
                 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
 
         avg_test_CorrLoc = eval(test_dataloader, head_detector_vgg16)
-
+        print
         print("Epoch {} of {}.".format(epoch+1, opt.epoch))
         print("  test average corrLoc accuracy:\t\t{:.3f}".format(avg_test_CorrLoc))
-		
+
         model_save_path = trainer.save(best_map=avg_test_CorrLoc)
-		
+
         if epoch == 8:
             trainer.load(model_save_path)
             trainer.head_detector.scale_lr(opt.lr_decay)
             lr_ = lr_ * opt.lr_decay
 
 if __name__ == "__main__":
-    train()
+    train()