Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Code for FCHD - A fast and accurate head detector
This is the code for FCHD - A Fast and accurate head detector. See [the paper](https://arxiv.org/abs/1809.08766) for details and [video](https://youtu.be/gRPA7Hqk3VQ) for demo.

## Dependencies
- The code is tested on Ubuntu 16.04.
- The code is tested on Ubuntu 16.04.

- install PyTorch >=0.4 with GPU (code are GPU-only), refer to [official website](http://pytorch.org)

Expand All @@ -27,10 +27,16 @@ This is the code for FCHD - A Fast and accurate head detector. See [the paper](h

## Training
1) Download the caffe pre-trained VGG16 from the following [link](https://drive.google.com/open?id=10AwNitG-5gq-YEJcG9iihosiOu7vAnfO). Store this pre-trained model in `data/pretrained_model ` folder.

2) Download the BRAINWASH dataset from the [official website](https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/software-and-datasets/). Unzip it and store the dataset in the `data/ ` folder.

3) Make appropriate settings in `src/config.py ` file regarding the updated paths.
2) Download the BRAINWASH dataset from the [official website](https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/software-and-datasets/). Unzip it and store the dataset in the `data/ ` folder.

3) Make appropriate settings in `src/config.py ` file regarding the updated paths, if required. The default paths set in `src/config.py ` are:
```
brainwash_dataset_root_path = 'data/brainwash_raw'
hollywood_dataset_root_path = 'data/HollywoodHeads'
caffe_pretrain_path = 'data/pretrained_model/vgg16_caffe.pth'
```
All paths are relative to the root directory. You can put the aforementioned files under these paths and use the model as-is without changing anything.

4) Start visdom server for visualization:
```Bash
Expand All @@ -39,14 +45,25 @@ python -m visdom.server
5) Run the following command to train the model: `python train.py `.

## Demo
1) Download the best performing model from the following [link](https://drive.google.com/open?id=1DbE4tAkaFYOEItwuIQhlbZypuIPDrArM).
1) Download the best performing model from the following [link](https://drive.google.com/open?id=1DbE4tAkaFYOEItwuIQhlbZypuIPDrArM).

2) Store the head detection model in `checkpoints/ ` folder.

3) Download the caffe pre-trained VGG16 from the following [link](https://drive.google.com/open?id=10AwNitG-5gq-YEJcG9iihosiOu7vAnfO). Store this pre-trained model in `data/pretrained_model ` folder.

2) Store the head detection model in `checkpoints/ ` folder.
4) Start visdom server for visualization.:
```Bash
python -m visdom.server
```

3) Run the following python command from the root folder.
4) Run the following python command from the root folder.
```Shell
python head_detection_demo.py --img_path <test_image_name> --model_path <model_path>
python head_detection_demo.py --img_path <test_image_path> --model_path <model_path>
```
_You can drop the `--model_path ` argument if you have stored the head detection model under `checkpoints/ `._

5) The output of the model will be stored in a directory named `output/ ` in the same folder.

## Results
| Method | AP |
| :--------------------------------------: | :---------: |
Expand All @@ -56,8 +73,8 @@ python head_detection_demo.py --img_path <test_image_name> --model_path <model_p
| ReInspect, Lhungarian [1] | 0.78 |
| **Ours** | **0.70** |

## Runtime
- Runs at 5fps on NVidia Quadro M1000M GPU with 512 CUDA cores.
## Runtime
- Runs at 5fps on NVidia Quadro M1000M GPU with 512 CUDA cores.

## Acknowledgement
This work builds on many of the excellent works:
Expand Down
27 changes: 13 additions & 14 deletions head_detection_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,16 @@
from PIL import Image
import numpy as np
from data.dataset import preprocess
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import src.array_tool as at
from src.vis_tool import visdom_bbox
import argparse
import src.utils as utils
from src.config import opt
import time

SAVE_FLAG = 0
THRESH = 0.01
IM_RESIZE = False

def read_img(path):
def read_img(path, IM_RESIZE=False):
f = Image.open(path)
if IM_RESIZE:
f = f.resize((640,480), Image.ANTIALIAS)
Expand All @@ -29,14 +26,16 @@ def read_img(path):
img_raw = np.asarray(f, dtype=np.uint8)
img_raw_final = img_raw.copy()
img = np.asarray(f, dtype=np.float32)
_, H, W = img.shape
# _, H, W = img.shape
img = img.transpose((2,0,1))
_, H, W = img.shape
img = preprocess(img)
_, o_H, o_W = img.shape
scale = o_H / H
return img, img_raw_final, scale

def detect(img_path, model_path):

def detect(img_path, model_path, SAVE_FLAG=0, THRESH=0.01):
file_id = utils.get_file_id(img_path)
img, img_raw, scale = read_img(img_path)
head_detector = Head_Detector_VGG16(ratios=[1], anchor_scales=[2,4])
Expand All @@ -52,21 +51,24 @@ def detect(img_path, model_path):
print ("[INFO] Head detection over. Time taken: {:.4f} s".format(tt))
for i in range(pred_bboxes_.shape[0]):
ymin, xmin, ymax, xmax = pred_bboxes_[i,:]
utils.draw_bounding_box_on_image_array(img_raw,ymin, xmin, ymax, xmax)
utils.draw_bounding_box_on_image_array(img_raw, ymin/scale, xmin/scale, ymax/scale, xmax/scale)
plt.axis('off')
plt.imshow(img_raw)
if SAVE_FLAG == 1:
if not os.path.exists(opt.test_output_path): # Create the directory
os.makedirs(opt.test_output_path) # If it doesn't exist

plt.savefig(os.path.join(opt.test_output_path, file_id+'.png'), bbox_inches='tight', pad_inches=0)
else:
plt.show()
plt.show()


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--img_path", type=str, help="test image path")
parser.add_argument("--model_path", type=str, default='./checkpoints/sess:2/head_detector08120858_0.682282441835')
parser.add_argument("--model_path", type=str, default='./checkpoints/head_detector_final')
args = parser.parse_args()
detect(args.img_path, args.model_path)
detect(args.img_path, args.model_path, SAVE_FLAG=1)
# model_path = './checkpoints/sess:2/head_detector08120858_0.682282441835'

# test_data_list_path = os.path.join(opt.data_root_path, 'brainwash_test.idl')
Expand All @@ -83,6 +85,3 @@ def detect(img_path, model_path):
# src_path = os.path.join(opt.data_root_path, img_path.replace('"',''))
# detect(src_path, model_path, save_idx)
# save_idx += 1



13 changes: 7 additions & 6 deletions src/config.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from pprint import pprint

class Config:
brainwash_dataset_root_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/brainwash_raw'
hollywood_dataset_root_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/HollywoodHeads'
brainwash_dataset_root_path = './data/'
hollywood_dataset_root_path = './data/'
min_size = 600 # image resize
max_size = 1000 # image resize
max_size = 1000 # image resize
caffe_pretrain = True
caffe_pretrain_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/data/pretrained_model/vgg16_caffe.pth'
model_save_path = '/home/aditya-tyco/Desktop/aditya_personal_projects/head_detection_v5/checkpoints'
caffe_pretrain_path = './data/pretrained_model/vgg16_caffe.pth'
model_save_path = './checkpoints'
# sigma for l1_smooth_loss
rpn_sigma = 3.
test_output_path = './output'

weight_decay = 0.0005
lr_decay = 0.1 # 1e-3 -> 1e-4
Expand All @@ -22,7 +23,7 @@ class Config:
pretrained_model = 'vgg16'

epoch = 15

use_adam = False # Use Adam optimizer
use_chainer = False # try match everything as chainer
def _parse(self, kwargs):
Expand Down
7 changes: 5 additions & 2 deletions src/creator_tool.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-

import numpy as np
import cupy as cp
Expand Down Expand Up @@ -126,12 +126,15 @@ def __call__(self, loc, score,
roi[:, slice(0, 4, 2)], 0, img_size[0])
roi[:, slice(1, 4, 2)] = np.clip(
roi[:, slice(1, 4, 2)], 0, img_size[1])


min_size = self.min_size * scale
hs = roi[:, 2] - roi[:, 0]
ws = roi[:, 3] - roi[:, 1]

min_size = np.array(min_size)
hs = np.array(hs)
ws = np.array(ws)
keep = np.where((hs >= min_size) & (ws >= min_size))[0]
roi = roi[keep, :]
score = score[keep]
Expand Down
9 changes: 8 additions & 1 deletion src/nms/build.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy

ext_modules = [Extension("_nms_gpu_post", ["_nms_gpu_post.pyx"])]
ext_modules = [
Extension(
"_nms_gpu_post",
["_nms_gpu_post.pyx"],
include_dirs=[numpy.get_include()]
)
]
setup(
name="Hello pyx",
cmdclass={'build_ext': build_ext},
Expand Down
48 changes: 24 additions & 24 deletions train.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import division

import os
import numpy as np
import numpy as np
from torch.autograd import Variable
from torch.utils import data as data_
import torch
Expand All @@ -13,21 +13,21 @@
from src.head_detector_vgg16 import Head_Detector_VGG16
from trainer import Head_Detector_Trainer
from src.config import opt
import src.utils as utils
import src.utils as utils
from data.dataset import Dataset, inverse_normalize
import src.array_tool as at
from src.vis_tool import visdom_bbox
from src.bbox_tools import bbox_iou

dataset_name = 'hollywood'
phases = ['train', 'val', 'test']
dataset_name = 'brainwash'
phases = ['train', 'val']
data_check_flag = False

def eval(dataloader, head_detector):
"""
Given the dataloader of the test split compute the
average corLoc of the dataset using the head detector
model given as the argument to the function.
average corLoc of the dataset using the head detector
model given as the argument to the function.
"""
test_img_num = 0
test_corrLoc = 0.0
Expand All @@ -51,44 +51,44 @@ def eval(dataloader, head_detector):
test_img_num += 1
return test_corrLoc / test_img_num

def train():
def train():
# Get the dataset
for phase in phases:
if phase == 'train':
if dataset_name == 'hollywood':
train_data_list_path = os.path.join(opt.hollywood_dataset_root_path, 'hollywood_train.idl')
train_data_list = utils.get_phase_data_list(train_data_list_path, dataset_name)
train_data_list = utils.get_phase_data_list(train_data_list_path, dataset_name)
if dataset_name == 'brainwash':
train_data_list_path = os.path.join(opt.brainwash_dataset_root_path, 'brainwash_train.idl')
train_data_list = utils.get_phase_data_list(train_data_list_path, dataset_name)
elif phase == 'val':
if dataset_name == 'hollywood':
val_data_list_path = os.path.join(opt.hollywood_dataset_root_path, 'hollywood_val.idl')
val_data_list = utils.get_phase_data_list(val_data_list_path, dataset_name)
val_data_list = utils.get_phase_data_list(val_data_list_path, dataset_name)
if dataset_name == 'brainwash':
val_data_list_path = os.path.join(opt.brainwash_dataset_root_path, 'brainwash_val.idl')
val_data_list = utils.get_phase_data_list(val_data_list_path, dataset_name)
val_data_list = utils.get_phase_data_list(val_data_list_path, dataset_name)
elif phase == 'test':
if dataset_name == 'hollywood':
test_data_list_path = os.path.join(opt.hollywood_dataset_root_path, 'hollywood_test.idl')
test_data_list = utils.get_phase_data_list(test_data_list_path, dataset_name)
test_data_list = utils.get_phase_data_list(test_data_list_path, dataset_name)
if dataset_name == 'brainwash':
test_data_list_path = os.path.join(opt.brainwash_dataset_root_path, 'brainwash_test.idl')
test_data_list = utils.get_phase_data_list(test_data_list_path, dataset_name)
test_data_list = utils.get_phase_data_list(test_data_list_path, dataset_name)

print "Number of images for training: %s" %(len(train_data_list))
print "Number of images for val: %s" %(len(val_data_list))
print "Number of images for test: %s" %(len(test_data_list))
if 'val' in phase: print "Number of images for val: %s" %(len(val_data_list))
if 'test' in phase: print "Number of images for test: %s" %(len(test_data_list))

if data_check_flag:
if data_check_flag:
utils.check_loaded_data(train_data_list[random.randint(1,len(train_data_list))])
utils.check_loaded_data(val_data_list[random.randint(1,len(val_data_list))])
utils.check_loaded_data(test_data_list[random.randint(1,len(test_data_list))])
if 'val' in phase: utils.check_loaded_data(val_data_list[random.randint(1,len(val_data_list))])
if 'test' in phase: utils.check_loaded_data(test_data_list[random.randint(1,len(test_data_list))])

# Load the train dataset
train_dataset = Dataset(train_data_list)
test_dataset = Dataset(val_data_list)
print "Load data."
print "Load data."

train_dataloader = data_.DataLoader(train_dataset, batch_size=1,shuffle=True, num_workers=1)
test_dataloader = data_.DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=1)
Expand All @@ -104,7 +104,7 @@ def train():
img, bbox = img.cuda().float(), bbox_.cuda()
img, bbox = Variable(img), Variable(bbox)
_, _, _ = trainer.train_step(img, bbox, scale)
print "Forward and backward pass done."
print "\rImage no. {}: Done!".format(ii),
if (ii+1) % opt.plot_every == 0:
trainer.vis.plot_many(trainer.get_meter_data())
ori_img_ = inverse_normalize(at.tonumpy(img[0]))
Expand All @@ -116,16 +116,16 @@ def train():
trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')

avg_test_CorrLoc = eval(test_dataloader, head_detector_vgg16)

print
print("Epoch {} of {}.".format(epoch+1, opt.epoch))
print(" test average corrLoc accuracy:\t\t{:.3f}".format(avg_test_CorrLoc))

model_save_path = trainer.save(best_map=avg_test_CorrLoc)

if epoch == 8:
trainer.load(model_save_path)
trainer.head_detector.scale_lr(opt.lr_decay)
lr_ = lr_ * opt.lr_decay

if __name__ == "__main__":
train()
train()