From e1f90bb3dba7c960c3a6240f7cec53b9fd3e9cd2 Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Fri, 10 Feb 2023 13:02:37 +0800
Subject: [PATCH 001/116] fix tal assigner cr (#7722)
---
ppdet/modeling/assigners/task_aligned_assigner_cr.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ppdet/modeling/assigners/task_aligned_assigner_cr.py b/ppdet/modeling/assigners/task_aligned_assigner_cr.py
index 5c5097604d5..4558d6e8ec7 100644
--- a/ppdet/modeling/assigners/task_aligned_assigner_cr.py
+++ b/ppdet/modeling/assigners/task_aligned_assigner_cr.py
@@ -178,4 +178,4 @@ def forward(self,
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
assigned_scores = assigned_scores * alignment_metrics
- return assigned_labels, assigned_bboxes, assigned_scores
+ return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
From d5d8650d3dbaa9479181259a9902af556496fb2e Mon Sep 17 00:00:00 2001
From: shangliang Xu
Date: Fri, 10 Feb 2023 13:19:56 +0800
Subject: [PATCH 002/116] fix dn_match_indices bug in detr_loss.py (#7724)
---
ppdet/modeling/losses/detr_loss.py | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/ppdet/modeling/losses/detr_loss.py b/ppdet/modeling/losses/detr_loss.py
index 8e9b1294f68..f4291a7a74b 100644
--- a/ppdet/modeling/losses/detr_loss.py
+++ b/ppdet/modeling/losses/detr_loss.py
@@ -163,9 +163,9 @@ def _get_loss_aux(self,
gt_class,
bg_index,
num_gts,
- match_indices=None,
+ dn_match_indices=None,
postfix=""):
- if boxes is None and logits is None:
+ if boxes is None or logits is None:
return {
"loss_class_aux" + postfix: paddle.paddle.zeros([1]),
"loss_bbox_aux" + postfix: paddle.paddle.zeros([1]),
@@ -175,9 +175,11 @@ def _get_loss_aux(self,
loss_bbox = []
loss_giou = []
for aux_boxes, aux_logits in zip(boxes, logits):
- if match_indices is None:
+ if dn_match_indices is None:
match_indices = self.matcher(aux_boxes, aux_logits, gt_bbox,
gt_class)
+ else:
+ match_indices = dn_match_indices
loss_class.append(
self._get_loss_class(aux_logits, gt_class, match_indices,
bg_index, num_gts, postfix)['loss_class' +
@@ -237,11 +239,13 @@ def forward(self,
gt_mask (List(Tensor), optional): list[[n, H, W]]
postfix (str): postfix of loss name
"""
- if "match_indices" in kwargs:
- match_indices = kwargs["match_indices"]
- else:
+ dn_match_indices = kwargs.get("dn_match_indices", None)
+ if dn_match_indices is None and (boxes is not None and
+ logits is not None):
match_indices = self.matcher(boxes[-1].detach(),
logits[-1].detach(), gt_bbox, gt_class)
+ else:
+ match_indices = dn_match_indices
num_gts = sum(len(a) for a in gt_bbox)
num_gts = paddle.to_tensor([num_gts], dtype="float32")
@@ -264,13 +268,11 @@ def forward(self,
gt_mask, match_indices, num_gts, postfix))
if self.aux_loss:
- if "match_indices" not in kwargs:
- match_indices = None
total_loss.update(
self._get_loss_aux(
boxes[:-1] if boxes is not None else None, logits[:-1]
if logits is not None else None, gt_bbox, gt_class,
- self.num_classes, num_gts, match_indices, postfix))
+ self.num_classes, num_gts, dn_match_indices, postfix))
return total_loss
@@ -292,7 +294,6 @@ def forward(self,
total_loss = super(DINOLoss, self).forward(boxes, logits, gt_bbox,
gt_class)
- # denoising training loss
if dn_meta is not None:
dn_positive_idx, dn_num_group = \
dn_meta["dn_positive_idx"], dn_meta["dn_num_group"]
@@ -315,13 +316,14 @@ def forward(self,
else:
dn_match_indices, dn_num_group = None, 1.
+ # compute denoising training loss
dn_loss = super(DINOLoss, self).forward(
dn_out_bboxes,
dn_out_logits,
gt_bbox,
gt_class,
postfix="_dn",
- match_indices=dn_match_indices,
+ dn_match_indices=dn_match_indices,
dn_num_group=dn_num_group)
total_loss.update(dn_loss)
From 9e4e0ad0a208cb3cbce07b75cae2882a8d0fd8bc Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Fri, 10 Feb 2023 16:57:56 +0800
Subject: [PATCH 003/116] update pphuman ppvehicle tiny models (#7726)
* update pphuman ppvechicle new models, test=document_fixb
* update docs, test=document_fixb
* update docs, test=document_fix
* fix configs, test=document_fix
---
configs/pphuman/README.md | 4 +-
...oe_plus_crn_t_auxhead_320_60e_pphuman.yml} | 6 +-
..._plus_crn_t_p2_auxhead_320_60e_pphuman.yml | 78 ++++++++++++++++++
configs/ppvehicle/README.md | 4 +-
..._plus_crn_t_auxhead_320_60e_ppvehicle.yml} | 6 +-
...lus_crn_t_p2_auxhead_320_60e_ppvehicle.yml | 79 +++++++++++++++++++
configs/ppyoloe/README.md | 6 +-
configs/ppyoloe/README_cn.md | 6 +-
...reader.yml => ppyoloe_plus_reader_320.yml} | 0
configs/ppyoloe/distill/README.md | 5 +-
...yml => ppyoloe_crn_t_p2_320_300e_coco.yml} | 4 +-
...e_plus_crn_t_p2_auxhead_320_300e_coco.yml} | 4 +-
ppdet/modeling/heads/ppyoloe_head.py | 11 ++-
13 files changed, 187 insertions(+), 26 deletions(-)
rename configs/pphuman/{ppyoloe_plus_crn_t_p2_60e_pphuman.yml => ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml} (85%)
create mode 100644 configs/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml
rename configs/ppvehicle/{ppyoloe_plus_crn_t_p2_60e_ppvehicle.yml => ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml} (85%)
create mode 100644 configs/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml
rename configs/ppyoloe/_base_/{ppyoloe_plus_tiny_reader.yml => ppyoloe_plus_reader_320.yml} (100%)
rename configs/ppyoloe/{ppyoloe_crn_t_p2_300e_coco.yml => ppyoloe_crn_t_p2_320_300e_coco.yml} (94%)
rename configs/ppyoloe/{ppyoloe_plus_crn_t_p2_auxhead_300e_coco.yml => ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml} (91%)
diff --git a/configs/pphuman/README.md b/configs/pphuman/README.md
index 6a713b6ce39..e267bbd73e0 100644
--- a/configs/pphuman/README.md
+++ b/configs/pphuman/README.md
@@ -11,8 +11,8 @@ PaddleDetection团队提供了针对行人的基于PP-YOLOE的检测模型,用
|PP-YOLOE-l| CrowdHuman | 48.0 | 81.9 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_36e_crowdhuman.pdparams) | [配置文件](./ppyoloe_crn_l_36e_crowdhuman.yml) |
|PP-YOLOE-s| 业务数据集 | 53.2 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | [配置文件](./ppyoloe_crn_s_36e_pphuman.yml) |
|PP-YOLOE-l| 业务数据集 | 57.8 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | [配置文件](./ppyoloe_crn_l_36e_pphuman.yml) |
-|PP-YOLOE+_t-P2(320)| 业务数据集 | 49.8 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_t_p2_60e_pipeline.zip) | [配置文件](./ppyoloe_plus_crn_t_p2_60e_pphuman.yml) |
-|PP-YOLOE+_t-P2(416)| 业务数据集 | 52.2 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_t_p2_60e_pipeline.zip) | [配置文件](./ppyoloe_plus_crn_t_p2_60e_pphuman.yml) |
+|PP-YOLOE+_t-P2-aux(320)| 业务数据集 | 49.8 | 85.0 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.zip) | [配置文件](./ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml) |
+|PP-YOLOE+_t-aux(320)| 业务数据集 | 45.7 | 81.2 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.zip) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml) |
**注意:**
diff --git a/configs/pphuman/ppyoloe_plus_crn_t_p2_60e_pphuman.yml b/configs/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml
similarity index 85%
rename from configs/pphuman/ppyoloe_plus_crn_t_p2_60e_pphuman.yml
rename to configs/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml
index b13f19c5564..9d542fe6b4f 100644
--- a/configs/pphuman/ppyoloe_plus_crn_t_p2_60e_pphuman.yml
+++ b/configs/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml
@@ -3,14 +3,14 @@ _BASE_: [
'../runtime.yml',
'../ppyoloe/_base_/optimizer_300e.yml',
'../ppyoloe/_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
- '../ppyoloe/_base_/ppyoloe_plus_reader_tiny.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_reader_320.yml',
]
log_iter: 100
snapshot_epoch: 4
-weights: output/ppyoloe_plus_crn_tiny_60e_pphuman/model_final
+weights: output/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman/model_final
-pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_tiny_auxhead_300e_coco.pdparams
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams # 640*640 COCO mAP 39.7
depth_mult: 0.33
width_mult: 0.375
diff --git a/configs/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml b/configs/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml
new file mode 100644
index 00000000000..fd0605cfe9e
--- /dev/null
+++ b/configs/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml
@@ -0,0 +1,78 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../ppyoloe/_base_/optimizer_300e.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_reader_320.yml',
+]
+
+log_iter: 100
+snapshot_epoch: 4
+weights: output/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.pdparams # 320*320 COCO mAP 36.3
+depth_mult: 0.33
+width_mult: 0.375
+
+
+num_classes: 1
+TrainDataset:
+ !COCODataSet
+ image_dir: ""
+ anno_path: annotations/train.json
+ dataset_dir: dataset/pphuman
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+ !COCODataSet
+ image_dir: ""
+ anno_path: annotations/val.json
+ dataset_dir: dataset/pphuman
+
+TestDataset:
+ !ImageFolder
+ anno_path: annotations/val.json
+ dataset_dir: dataset/pphuman
+
+
+TrainReader:
+ batch_size: 8
+
+
+epoch: 60
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !CosineDecay
+ max_epochs: 72
+ - !LinearWarmup
+ start_factor: 0.
+ epochs: 1
+
+
+architecture: PPYOLOEWithAuxHead
+PPYOLOEWithAuxHead:
+ backbone: CSPResNet
+ neck: CustomCSPPAN
+ yolo_head: PPYOLOEHead
+ aux_head: SimpleConvHead
+ post_process: ~
+
+CSPResNet:
+ return_idx: [0, 1, 2, 3] # index 0 stands for P2
+
+CustomCSPPAN:
+ out_channels: [384, 384, 384, 384]
+
+SimpleConvHead:
+ fpn_strides: [32, 16, 8, 4]
+
+PPYOLOEHead:
+ fpn_strides: [32, 16, 8, 4]
+ static_assigner_epoch: -1
+ nms:
+ name: MultiClassNMS
+ nms_top_k: 1000
+ keep_top_k: 300
+ score_threshold: 0.01
+ nms_threshold: 0.7
diff --git a/configs/ppvehicle/README.md b/configs/ppvehicle/README.md
index 91811629000..0c09ce7bbbe 100644
--- a/configs/ppvehicle/README.md
+++ b/configs/ppvehicle/README.md
@@ -19,8 +19,8 @@ PaddleDetection团队提供了针对自动驾驶场景的基于PP-YOLOE的检测
|PP-YOLOE-s| PPVehicle9cls | 9 | 35.3 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_s_36e_ppvehicle9cls.pdparams) | [配置文件](./mot_ppyoloe_s_36e_ppvehicle9cls.yml) |
|PP-YOLOE-l| PPVehicle | 1 | 63.9 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_l_36e_ppvehicle.pdparams) | [配置文件](./mot_ppyoloe_l_36e_ppvehicle.yml) |
|PP-YOLOE-s| PPVehicle | 1 | 61.3 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_s_36e_ppvehicle.pdparams) | [配置文件](./mot_ppyoloe_s_36e_ppvehicle.yml) |
-|PP-YOLOE+_t-P2(320)| PPVehicle | 1 | 58.2 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_t_p2_60e_ppvehicle.zip) | [配置文件](./ppyoloe_plus_crn_t_p2_60e_ppvehicle.yml) |
-|PP-YOLOE+_t-P2(416)| PPVehicle | 1 | 60.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_t_p2_60e_ppvehicle.zip) | [配置文件](./ppyoloe_plus_crn_t_p2_60e_ppvehicle.yml) |
+|PP-YOLOE+_t-P2-aux(320)| PPVehicle | 1 | 58.2 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.zip) | [配置文件](./ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml) |
+|PP-YOLOE+_t-aux(320)| PPVehicle | 1 | 53.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.zip) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml) |
**注意:**
diff --git a/configs/ppvehicle/ppyoloe_plus_crn_t_p2_60e_ppvehicle.yml b/configs/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml
similarity index 85%
rename from configs/ppvehicle/ppyoloe_plus_crn_t_p2_60e_ppvehicle.yml
rename to configs/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml
index 815935f92f8..7ed888d7a4e 100644
--- a/configs/ppvehicle/ppyoloe_plus_crn_t_p2_60e_ppvehicle.yml
+++ b/configs/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml
@@ -3,14 +3,14 @@ _BASE_: [
'../runtime.yml',
'../ppyoloe/_base_/optimizer_300e.yml',
'../ppyoloe/_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
- '../ppyoloe/_base_/ppyoloe_plus_reader_tiny.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_reader_320.yml',
]
log_iter: 100
snapshot_epoch: 4
-weights: output/ppyoloe_plus_crn_tiny_60e_ppvehicle/model_final
+weights: output/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle/model_final
-pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_tiny_auxhead_300e_coco.pdparams
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams # 640*640 COCO mAP 39.7
depth_mult: 0.33
width_mult: 0.375
diff --git a/configs/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml b/configs/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml
new file mode 100644
index 00000000000..7fbf9bcdd0a
--- /dev/null
+++ b/configs/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml
@@ -0,0 +1,79 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../ppyoloe/_base_/optimizer_300e.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_reader_320.yml',
+]
+
+log_iter: 100
+snapshot_epoch: 4
+weights: output/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.pdparams # 320*320 COCO mAP 36.3
+depth_mult: 0.33
+width_mult: 0.375
+
+
+num_classes: 1
+TrainDataset:
+ !COCODataSet
+ image_dir: ""
+ anno_path: annotations/train_all.json
+ dataset_dir: dataset/ppvehicle
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+ allow_empty: true
+
+EvalDataset:
+ !COCODataSet
+ image_dir: ""
+ anno_path: annotations/val_all.json
+ dataset_dir: dataset/ppvehicle
+
+TestDataset:
+ !ImageFolder
+ anno_path: annotations/val_all.json
+ dataset_dir: dataset/ppvehicle
+
+
+TrainReader:
+ batch_size: 8
+
+
+epoch: 60
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !CosineDecay
+ max_epochs: 72
+ - !LinearWarmup
+ start_factor: 0.
+ epochs: 1
+
+
+architecture: PPYOLOEWithAuxHead
+PPYOLOEWithAuxHead:
+ backbone: CSPResNet
+ neck: CustomCSPPAN
+ yolo_head: PPYOLOEHead
+ aux_head: SimpleConvHead
+ post_process: ~
+
+CSPResNet:
+ return_idx: [0, 1, 2, 3] # index 0 stands for P2
+
+CustomCSPPAN:
+ out_channels: [384, 384, 384, 384]
+
+SimpleConvHead:
+ fpn_strides: [32, 16, 8, 4]
+
+PPYOLOEHead:
+ fpn_strides: [32, 16, 8, 4]
+ static_assigner_epoch: -1
+ nms:
+ name: MultiClassNMS
+ nms_top_k: 1000
+ keep_top_k: 300
+ score_threshold: 0.01
+ nms_threshold: 0.7
diff --git a/configs/ppyoloe/README.md b/configs/ppyoloe/README.md
index 258ea4c0a8e..f7d876f6766 100644
--- a/configs/ppyoloe/README.md
+++ b/configs/ppyoloe/README.md
@@ -48,9 +48,9 @@ PP-YOLOE is composed of following methods:
| Model | Epoch | GPU number | images/GPU | backbone | input shape | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | download | config |
|:--------------:|:-----:|:-------:|:----------:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
-| PP-YOLOE+_t(aux)| 300 | 8 | 8 | cspresnet-t | 640 | 39.5 | 51.7 | 4.85 | 19.15 | - | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
-| PP-YOLOE-t-P2 | 300 | 8 | 8 | cspresnet-t | 320 | 34.7 | 50.0 | 6.82 | 4.78 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_t_p2_300e_coco.pdparams) | [config](./ppyoloe_crn_t_p2_300e_coco.yml) |
-| PP-YOLOE+_t-P2(aux) | 300 | 8 | 8 | cspresnet-t | 320 | 36.3 | 51.7 | 6.00 | 15.46 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_p2_auxhead_300e_coco.yml) |
+| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | - | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
+| PP-YOLOE-t-P2(320) | 300 | 8 | 8 | cspresnet-t | 320 | 34.7 | 50.0 | 6.82 | 4.78 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_t_p2_320_300e_coco.pdparams) | [config](./ppyoloe_crn_t_p2_320_300e_coco.yml) |
+| PP-YOLOE+_t-P2-aux(320) | 300 | 8 | 8 | cspresnet-t | 320 | 36.3 | 51.7 | 6.00 | 15.46 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml) |
### Comprehensive Metrics
diff --git a/configs/ppyoloe/README_cn.md b/configs/ppyoloe/README_cn.md
index a599a8e582a..43abf385182 100644
--- a/configs/ppyoloe/README_cn.md
+++ b/configs/ppyoloe/README_cn.md
@@ -47,9 +47,9 @@ PP-YOLOE由以下方法组成
| 模型 | Epoch | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 |
|:---------------:|:-----:|:---------:|:--------:|:----------:|:----------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
-| PP-YOLOE+_t(aux)| 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | - | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
-| PP-YOLOE-t-P2 | 300 | 8 | 8 | cspresnet-t | 320 | 34.7 | 50.0 | 6.82 | 4.78 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_t_p2_300e_coco.pdparams) | [config](./ppyoloe_crn_t_p2_300e_coco.yml) |
-| PP-YOLOE+_t-P2(aux) | 300 | 8 | 8 | cspresnet-t | 320 | 36.3 | 51.7 | 6.00 | 15.46 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_p2_auxhead_300e_coco.yml) |
+| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | - | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
+| PP-YOLOE-t-P2(320) | 300 | 8 | 8 | cspresnet-t | 320 | 34.7 | 50.0 | 6.82 | 4.78 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_t_p2_320_300e_coco.pdparams) | [config](./ppyoloe_crn_t_p2_320_300e_coco.yml) |
+| PP-YOLOE+_t-P2-aux(320) | 300 | 8 | 8 | cspresnet-t | 320 | 36.3 | 51.7 | 6.00 | 15.46 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml) |
### 综合指标
diff --git a/configs/ppyoloe/_base_/ppyoloe_plus_tiny_reader.yml b/configs/ppyoloe/_base_/ppyoloe_plus_reader_320.yml
similarity index 100%
rename from configs/ppyoloe/_base_/ppyoloe_plus_tiny_reader.yml
rename to configs/ppyoloe/_base_/ppyoloe_plus_reader_320.yml
diff --git a/configs/ppyoloe/distill/README.md b/configs/ppyoloe/distill/README.md
index 85b55ff5c7e..9f8761d83bd 100644
--- a/configs/ppyoloe/distill/README.md
+++ b/configs/ppyoloe/distill/README.md
@@ -8,11 +8,10 @@ PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案,结合了logit
| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: |
| PP-YOLOE+_x | teacher | 640 | 80e | 54.7 | [config](../ppyoloe_plus_crn_x_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_x_80e_coco.pdparams) |
| PP-YOLOE+_l | student | 640 | 80e | 52.9 | [config](../ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) |
-| PP-YOLOE+_l | distill | 640 | 80e | 54.0(+1.1) | [config](./ppyoloe_plus_crn_l_80e_coco_distill.yml),[slim_config](../../slim/distill/ppyoloe_plus_distill_x_distill_l.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams) |
+| PP-YOLOE+_l | distill | 640 | 80e | **54.0(+1.1)** | [config](./ppyoloe_plus_crn_l_80e_coco_distill.yml),[slim_config](../../slim/distill/ppyoloe_plus_distill_x_distill_l.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams) |
| PP-YOLOE+_l | teacher | 640 | 80e | 52.9 | [config](../ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) |
| PP-YOLOE+_m | student | 640 | 80e | 49.8 | [config](../ppyoloe_plus_crn_m_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco.pdparams) |
-| PP-YOLOE+_m | distill | 640 | 80e | 50.7(+0.9) | [config](./ppyoloe_plus_crn_m_80e_coco_distill.yml),[slim_config](../../slim/distill/ppyoloe_plus_distill_l_distill_m.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco_distill.pdparams) |
-
+| PP-YOLOE+_m | distill | 640 | 80e | **51.0(+1.2)** | [config](./ppyoloe_plus_crn_m_80e_coco_distill.yml),[slim_config](../../slim/distill/ppyoloe_plus_distill_l_distill_m.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco_distill.pdparams) |
## 快速开始
diff --git a/configs/ppyoloe/ppyoloe_crn_t_p2_300e_coco.yml b/configs/ppyoloe/ppyoloe_crn_t_p2_320_300e_coco.yml
similarity index 94%
rename from configs/ppyoloe/ppyoloe_crn_t_p2_300e_coco.yml
rename to configs/ppyoloe/ppyoloe_crn_t_p2_320_300e_coco.yml
index b056ea7b962..37db828e07f 100644
--- a/configs/ppyoloe/ppyoloe_crn_t_p2_300e_coco.yml
+++ b/configs/ppyoloe/ppyoloe_crn_t_p2_320_300e_coco.yml
@@ -3,12 +3,12 @@ _BASE_: [
'../runtime.yml',
'./_base_/optimizer_300e.yml',
'./_base_/ppyoloe_crn.yml',
- './_base_/ppyoloe_plus_tiny_reader.yml', # 320*320
+ './_base_/ppyoloe_plus_reader_320.yml',
]
log_iter: 100
snapshot_epoch: 10
-weights: output/ppyoloe_crn_t_p2_300e_coco/model_final
+weights: output/ppyoloe_crn_t_p2_320_300e_coco/model_final
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_t_pretrained.pdparams
depth_mult: 0.33
diff --git a/configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_300e_coco.yml b/configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml
similarity index 91%
rename from configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_300e_coco.yml
rename to configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml
index 434b6f4249f..0aef4bcfebf 100644
--- a/configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_300e_coco.yml
+++ b/configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml
@@ -3,12 +3,12 @@ _BASE_: [
'../runtime.yml',
'./_base_/optimizer_300e.yml',
'./_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
- './_base_/ppyoloe_plus_tiny_reader.yml', # 320*320
+ './_base_/ppyoloe_plus_reader_320.yml',
]
log_iter: 100
snapshot_epoch: 10
-weights: output/ppyoloe_plus_crn_t_p2_auxhead_300e_coco/model_final
+weights: output/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco/model_final
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_t_pretrained.pdparams
depth_mult: 0.33
diff --git a/ppdet/modeling/heads/ppyoloe_head.py b/ppdet/modeling/heads/ppyoloe_head.py
index 60d7bbc2f83..261c0c4933b 100644
--- a/ppdet/modeling/heads/ppyoloe_head.py
+++ b/ppdet/modeling/heads/ppyoloe_head.py
@@ -37,8 +37,10 @@ def __init__(self, feat_channels, act='swish', attn_conv='convbn'):
self.fc = nn.Conv2D(feat_channels, feat_channels, 1)
if attn_conv == 'convbn':
self.conv = ConvBNLayer(feat_channels, feat_channels, 1, act=act)
- else:
+ elif attn_conv == 'repvgg':
self.conv = RepVggBlock(feat_channels, feat_channels, act=act)
+ else:
+ self.conv = None
self._init_weights()
def _init_weights(self):
@@ -46,7 +48,10 @@ def _init_weights(self):
def forward(self, feat, avg_feat):
weight = F.sigmoid(self.fc(avg_feat))
- return self.conv(feat * weight)
+ if self.conv:
+ return self.conv(feat * weight)
+ else:
+ return feat * weight
@register
@@ -526,7 +531,7 @@ def post_process(self, head_outs, scale_factor):
return pred_bboxes, pred_scores, None
else:
bbox_pred, bbox_num, nms_keep_idx = self.nms(pred_bboxes,
- pred_scores)
+ pred_scores)
return bbox_pred, bbox_num, nms_keep_idx
From 5ba111900b1bd5c6b582c1713bb141a493501ca4 Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Mon, 13 Feb 2023 19:02:52 +0800
Subject: [PATCH 004/116] update ppyoloe tiny relu model (#7747)
---
configs/pphuman/README.md | 9 ++-
..._plus_crn_t_p2_auxhead_320_60e_pphuman.yml | 78 ------------------
configs/ppvehicle/README.md | 4 +-
...lus_crn_t_p2_auxhead_320_60e_ppvehicle.yml | 79 ------------------
configs/ppyoloe/README_cn.md | 4 +-
.../ppyoloe_crn_t_p2_320_300e_coco.yml | 81 -------------------
.../ppyoloe_plus_crn_t_auxhead_300e_coco.yml | 26 +-----
...oloe_plus_crn_t_auxhead_relu_300e_coco.yml | 26 ++++++
...oe_plus_crn_t_p2_auxhead_320_300e_coco.yml | 60 --------------
9 files changed, 35 insertions(+), 332 deletions(-)
delete mode 100644 configs/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml
delete mode 100644 configs/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml
delete mode 100644 configs/ppyoloe/ppyoloe_crn_t_p2_320_300e_coco.yml
create mode 100644 configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml
delete mode 100644 configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml
diff --git a/configs/pphuman/README.md b/configs/pphuman/README.md
index e267bbd73e0..e7bf9538c62 100644
--- a/configs/pphuman/README.md
+++ b/configs/pphuman/README.md
@@ -5,14 +5,15 @@
PaddleDetection团队提供了针对行人的基于PP-YOLOE的检测模型,用户可以下载模型进行使用。PP-Human中使用模型为业务数据集模型,我们同时提供CrowdHuman训练配置,可以使用开源数据进行训练。
其中整理后的COCO格式的CrowdHuman数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/crowdhuman.zip),检测类别仅一类 `pedestrian(1)`,原始数据集[下载链接](http://www.crowdhuman.org/download.html)。
+相关模型的部署模型均在[PP-Human](../../deploy/pipeline/)项目中使用。
+
| 模型 | 数据集 | mAPval
0.5:0.95 | mAPval
0.5 | 下载 | 配置文件 |
|:---------|:-------:|:------:|:------:| :----: | :------:|
|PP-YOLOE-s| CrowdHuman | 42.5 | 77.9 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_36e_crowdhuman.pdparams) | [配置文件](./ppyoloe_crn_s_36e_crowdhuman.yml) |
|PP-YOLOE-l| CrowdHuman | 48.0 | 81.9 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_36e_crowdhuman.pdparams) | [配置文件](./ppyoloe_crn_l_36e_crowdhuman.yml) |
-|PP-YOLOE-s| 业务数据集 | 53.2 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | [配置文件](./ppyoloe_crn_s_36e_pphuman.yml) |
-|PP-YOLOE-l| 业务数据集 | 57.8 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | [配置文件](./ppyoloe_crn_l_36e_pphuman.yml) |
-|PP-YOLOE+_t-P2-aux(320)| 业务数据集 | 49.8 | 85.0 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.zip) | [配置文件](./ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml) |
-|PP-YOLOE+_t-aux(320)| 业务数据集 | 45.7 | 81.2 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.zip) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml) |
+|PP-YOLOE-s| 业务数据集 | 53.2 | - | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_36e_pphuman.pdparams) | [配置文件](./ppyoloe_crn_s_36e_pphuman.yml) |
+|PP-YOLOE-l| 业务数据集 | 57.8 | - | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_36e_pphuman.pdparams) | [配置文件](./ppyoloe_crn_l_36e_pphuman.yml) |
+|PP-YOLOE+_t-aux(320)| 业务数据集 | 45.7 | 81.2 | [下载链接](https://paddledet.bj.bcebos.com/models/pyoloe_plus_crn_t_auxhead_320_60e_pphuman.pdparams) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml) |
**注意:**
diff --git a/configs/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml b/configs/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml
deleted file mode 100644
index fd0605cfe9e..00000000000
--- a/configs/pphuman/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman.yml
+++ /dev/null
@@ -1,78 +0,0 @@
-_BASE_: [
- '../datasets/coco_detection.yml',
- '../runtime.yml',
- '../ppyoloe/_base_/optimizer_300e.yml',
- '../ppyoloe/_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
- '../ppyoloe/_base_/ppyoloe_plus_reader_320.yml',
-]
-
-log_iter: 100
-snapshot_epoch: 4
-weights: output/ppyoloe_plus_crn_t_p2_auxhead_320_60e_pphuman/model_final
-
-pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.pdparams # 320*320 COCO mAP 36.3
-depth_mult: 0.33
-width_mult: 0.375
-
-
-num_classes: 1
-TrainDataset:
- !COCODataSet
- image_dir: ""
- anno_path: annotations/train.json
- dataset_dir: dataset/pphuman
- data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
-
-EvalDataset:
- !COCODataSet
- image_dir: ""
- anno_path: annotations/val.json
- dataset_dir: dataset/pphuman
-
-TestDataset:
- !ImageFolder
- anno_path: annotations/val.json
- dataset_dir: dataset/pphuman
-
-
-TrainReader:
- batch_size: 8
-
-
-epoch: 60
-LearningRate:
- base_lr: 0.001
- schedulers:
- - !CosineDecay
- max_epochs: 72
- - !LinearWarmup
- start_factor: 0.
- epochs: 1
-
-
-architecture: PPYOLOEWithAuxHead
-PPYOLOEWithAuxHead:
- backbone: CSPResNet
- neck: CustomCSPPAN
- yolo_head: PPYOLOEHead
- aux_head: SimpleConvHead
- post_process: ~
-
-CSPResNet:
- return_idx: [0, 1, 2, 3] # index 0 stands for P2
-
-CustomCSPPAN:
- out_channels: [384, 384, 384, 384]
-
-SimpleConvHead:
- fpn_strides: [32, 16, 8, 4]
-
-PPYOLOEHead:
- fpn_strides: [32, 16, 8, 4]
- static_assigner_epoch: -1
- nms:
- name: MultiClassNMS
- nms_top_k: 1000
- keep_top_k: 300
- score_threshold: 0.01
- nms_threshold: 0.7
diff --git a/configs/ppvehicle/README.md b/configs/ppvehicle/README.md
index 0c09ce7bbbe..71a21e15e3c 100644
--- a/configs/ppvehicle/README.md
+++ b/configs/ppvehicle/README.md
@@ -9,6 +9,7 @@ PaddleDetection团队提供了针对自动驾驶场景的基于PP-YOLOE的检测
- PPVehicle9cls数据集整合了BDD100K-MOT和UA-DETRAC,具体类别为9类,包括`pedestrian(1), rider(2), car(3), truck(4), bus(5), van(6), motorcycle(7), bicycle(8), others(9)`。
- PPVehicle数据集整合了BDD100K-MOT和UA-DETRAC,是将BDD100K-MOT中的`car, truck, bus, van`和UA-DETRAC中的`car, bus, van`都合并为1类`vehicle(1)`后的数据集。
+相关模型的部署模型均在[PP-Vehicle](../../deploy/pipeline/)项目中使用。
| 模型 | 数据集 | 类别数 | mAPval
0.5:0.95 | 下载链接 | 配置文件 |
|:---------|:---------------:|:------:|:-----------------------:|:---------:| :-----: |
@@ -19,8 +20,7 @@ PaddleDetection团队提供了针对自动驾驶场景的基于PP-YOLOE的检测
|PP-YOLOE-s| PPVehicle9cls | 9 | 35.3 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_s_36e_ppvehicle9cls.pdparams) | [配置文件](./mot_ppyoloe_s_36e_ppvehicle9cls.yml) |
|PP-YOLOE-l| PPVehicle | 1 | 63.9 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_l_36e_ppvehicle.pdparams) | [配置文件](./mot_ppyoloe_l_36e_ppvehicle.yml) |
|PP-YOLOE-s| PPVehicle | 1 | 61.3 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_s_36e_ppvehicle.pdparams) | [配置文件](./mot_ppyoloe_s_36e_ppvehicle.yml) |
-|PP-YOLOE+_t-P2-aux(320)| PPVehicle | 1 | 58.2 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.zip) | [配置文件](./ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml) |
-|PP-YOLOE+_t-aux(320)| PPVehicle | 1 | 53.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.zip) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml) |
+|PP-YOLOE+_t-aux(320)| PPVehicle | 1 | 53.5 | [下载链接](https://paddledet.bj.bcebos.com/models/pipeline/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.pdparams) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml) |
**注意:**
diff --git a/configs/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml b/configs/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml
deleted file mode 100644
index 7fbf9bcdd0a..00000000000
--- a/configs/ppvehicle/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle.yml
+++ /dev/null
@@ -1,79 +0,0 @@
-_BASE_: [
- '../datasets/coco_detection.yml',
- '../runtime.yml',
- '../ppyoloe/_base_/optimizer_300e.yml',
- '../ppyoloe/_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
- '../ppyoloe/_base_/ppyoloe_plus_reader_320.yml',
-]
-
-log_iter: 100
-snapshot_epoch: 4
-weights: output/ppyoloe_plus_crn_t_p2_auxhead_320_60e_ppvehicle/model_final
-
-pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.pdparams # 320*320 COCO mAP 36.3
-depth_mult: 0.33
-width_mult: 0.375
-
-
-num_classes: 1
-TrainDataset:
- !COCODataSet
- image_dir: ""
- anno_path: annotations/train_all.json
- dataset_dir: dataset/ppvehicle
- data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
- allow_empty: true
-
-EvalDataset:
- !COCODataSet
- image_dir: ""
- anno_path: annotations/val_all.json
- dataset_dir: dataset/ppvehicle
-
-TestDataset:
- !ImageFolder
- anno_path: annotations/val_all.json
- dataset_dir: dataset/ppvehicle
-
-
-TrainReader:
- batch_size: 8
-
-
-epoch: 60
-LearningRate:
- base_lr: 0.001
- schedulers:
- - !CosineDecay
- max_epochs: 72
- - !LinearWarmup
- start_factor: 0.
- epochs: 1
-
-
-architecture: PPYOLOEWithAuxHead
-PPYOLOEWithAuxHead:
- backbone: CSPResNet
- neck: CustomCSPPAN
- yolo_head: PPYOLOEHead
- aux_head: SimpleConvHead
- post_process: ~
-
-CSPResNet:
- return_idx: [0, 1, 2, 3] # index 0 stands for P2
-
-CustomCSPPAN:
- out_channels: [384, 384, 384, 384]
-
-SimpleConvHead:
- fpn_strides: [32, 16, 8, 4]
-
-PPYOLOEHead:
- fpn_strides: [32, 16, 8, 4]
- static_assigner_epoch: -1
- nms:
- name: MultiClassNMS
- nms_top_k: 1000
- keep_top_k: 300
- score_threshold: 0.01
- nms_threshold: 0.7
diff --git a/configs/ppyoloe/README_cn.md b/configs/ppyoloe/README_cn.md
index 43abf385182..87caab958bc 100644
--- a/configs/ppyoloe/README_cn.md
+++ b/configs/ppyoloe/README_cn.md
@@ -48,9 +48,7 @@ PP-YOLOE由以下方法组成
| 模型 | Epoch | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 |
|:---------------:|:-----:|:---------:|:--------:|:----------:|:----------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | - | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
-| PP-YOLOE-t-P2(320) | 300 | 8 | 8 | cspresnet-t | 320 | 34.7 | 50.0 | 6.82 | 4.78 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_t_p2_320_300e_coco.pdparams) | [config](./ppyoloe_crn_t_p2_320_300e_coco.yml) |
-| PP-YOLOE+_t-P2-aux(320) | 300 | 8 | 8 | cspresnet-t | 320 | 36.3 | 51.7 | 6.00 | 15.46 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml) |
-
+| PP-YOLOE+_t-aux(640)-relu | 300 | 8 | 8 | cspresnet-t | 640 | 36.5 | 53.1 | 3.60 | 12.17 | - | 476.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml) |
### 综合指标
| 模型 | Epoch | AP0.5:0.95 | AP0.5 | AP0.75 | APsmall | APmedium | APlarge | ARsmall | ARmedium | ARlarge |
diff --git a/configs/ppyoloe/ppyoloe_crn_t_p2_320_300e_coco.yml b/configs/ppyoloe/ppyoloe_crn_t_p2_320_300e_coco.yml
deleted file mode 100644
index 37db828e07f..00000000000
--- a/configs/ppyoloe/ppyoloe_crn_t_p2_320_300e_coco.yml
+++ /dev/null
@@ -1,81 +0,0 @@
-_BASE_: [
- '../datasets/coco_detection.yml',
- '../runtime.yml',
- './_base_/optimizer_300e.yml',
- './_base_/ppyoloe_crn.yml',
- './_base_/ppyoloe_plus_reader_320.yml',
-]
-
-log_iter: 100
-snapshot_epoch: 10
-weights: output/ppyoloe_crn_t_p2_320_300e_coco/model_final
-
-pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_t_pretrained.pdparams
-depth_mult: 0.33
-width_mult: 0.375
-
-
-CSPResNet:
- return_idx: [0, 1, 2, 3]
-
-CustomCSPPAN:
- out_channels: [768, 384, 192, 96]
-
-PPYOLOEHead:
- fpn_strides: [32, 16, 8, 4]
- attn_conv: 'repvgg' #
- assigner:
- name: TaskAlignedAssigner
- topk: 13
- alpha: 1.0
- beta: 6.0
- is_close_gt: True #
- nms:
- name: MultiClassNMS
- nms_top_k: 1000
- keep_top_k: 300
- score_threshold: 0.01
- nms_threshold: 0.7
-
-
-worker_num: 4
-eval_height: &eval_height 320
-eval_width: &eval_width 320
-eval_size: &eval_size [*eval_height, *eval_width]
-
-TrainReader:
- sample_transforms:
- - Decode: {}
- - RandomDistort: {}
- - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- - RandomCrop: {}
- - RandomFlip: {}
- batch_transforms:
- - BatchRandomResize: {target_size: [224, 256, 288, 320, 352, 384, 416, 448, 480, 512, 544], random_size: True, random_interp: True, keep_ratio: False}
- - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- - Permute: {}
- - PadGT: {}
- batch_size: 8
- shuffle: true
- drop_last: true
- use_shared_memory: true
- collate_batch: true
-
-EvalReader:
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- - Permute: {}
- batch_size: 2
-
-TestReader:
- inputs_def:
- image_shape: [3, *eval_height, *eval_width]
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- - Permute: {}
- batch_size: 1
- fuse_normalize: True
diff --git a/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_300e_coco.yml b/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_300e_coco.yml
index e7477787d12..5884cc0f7af 100644
--- a/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_300e_coco.yml
+++ b/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_300e_coco.yml
@@ -3,7 +3,7 @@ _BASE_: [
'../runtime.yml',
'./_base_/optimizer_300e.yml',
'./_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
- './_base_/ppyoloe_plus_reader.yml', # 640*640
+ './_base_/ppyoloe_plus_reader.yml',
]
log_iter: 100
@@ -13,27 +13,3 @@ weights: output/ppyoloe_plus_crn_t_auxhead_300e_coco/model_final
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_t_pretrained.pdparams
depth_mult: 0.33
width_mult: 0.375
-
-
-worker_num: 4
-eval_height: &eval_height 640
-eval_width: &eval_width 640
-eval_size: &eval_size [*eval_height, *eval_width]
-
-EvalReader:
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- - Permute: {}
- batch_size: 2
-
-TestReader:
- inputs_def:
- image_shape: [3, *eval_height, *eval_width]
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- - Permute: {}
- batch_size: 1
diff --git a/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml b/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml
new file mode 100644
index 00000000000..6822f188685
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/optimizer_300e.yml',
+ './_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
+ './_base_/ppyoloe_plus_reader.yml',
+]
+
+log_iter: 100
+snapshot_epoch: 10
+weights: output/ppyoloe_plus_crn_t_auxhead_relu_300e_coco/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_t_pretrained.pdparams
+depth_mult: 0.33
+width_mult: 0.375
+
+
+CSPResNet:
+ act: 'relu'
+
+CustomCSPPAN:
+ act: 'relu'
+
+PPYOLOEHead:
+ act: 'relu'
+ attn_conv: None
diff --git a/configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml b/configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml
deleted file mode 100644
index 0aef4bcfebf..00000000000
--- a/configs/ppyoloe/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-_BASE_: [
- '../datasets/coco_detection.yml',
- '../runtime.yml',
- './_base_/optimizer_300e.yml',
- './_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
- './_base_/ppyoloe_plus_reader_320.yml',
-]
-
-log_iter: 100
-snapshot_epoch: 10
-weights: output/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco/model_final
-
-pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_t_pretrained.pdparams
-depth_mult: 0.33
-width_mult: 0.375
-
-
-architecture: PPYOLOEWithAuxHead
-PPYOLOEWithAuxHead:
- backbone: CSPResNet
- neck: CustomCSPPAN
- yolo_head: PPYOLOEHead
- aux_head: SimpleConvHead
- post_process: ~
-
-CSPResNet:
- return_idx: [0, 1, 2, 3] # index 0 stands for P2
-
-CustomCSPPAN:
- out_channels: [384, 384, 384, 384]
-
-SimpleConvHead:
- fpn_strides: [32, 16, 8, 4]
-
-PPYOLOEHead:
- fpn_strides: [32, 16, 8, 4]
-
-
-worker_num: 4
-eval_height: &eval_height 320
-eval_width: &eval_width 320
-eval_size: &eval_size [*eval_height, *eval_width]
-
-EvalReader:
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- - Permute: {}
- batch_size: 2
-
-TestReader:
- inputs_def:
- image_shape: [3, *eval_height, *eval_width]
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- - Permute: {}
- batch_size: 1
From 26cce017d313a4c1a1b023cafccc3af33f6a3979 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Mon, 13 Feb 2023 19:12:43 +0800
Subject: [PATCH 005/116] [Pipeline] fix import Sequence for python 3.10
(#7739)
---
deploy/pipeline/docs/tutorials/ppvehicle_retrograde.md | 5 +++--
deploy/pipeline/docs/tutorials/ppvehicle_retrograde_en.md | 5 +++--
deploy/pipeline/pipeline.py | 6 +++++-
deploy/pipeline/pphuman/action_infer.py | 5 ++++-
deploy/pipeline/pphuman/video_action_infer.py | 5 ++++-
deploy/pipeline/pphuman/video_action_preprocess.py | 5 ++++-
deploy/pipeline/ppvehicle/vehicle_attr.py | 5 ++++-
7 files changed, 27 insertions(+), 9 deletions(-)
diff --git a/deploy/pipeline/docs/tutorials/ppvehicle_retrograde.md b/deploy/pipeline/docs/tutorials/ppvehicle_retrograde.md
index 57ba922ff35..8f229d16ac7 100644
--- a/deploy/pipeline/docs/tutorials/ppvehicle_retrograde.md
+++ b/deploy/pipeline/docs/tutorials/ppvehicle_retrograde.md
@@ -72,7 +72,7 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_ppv
#预测包含一个或多个视频的文件夹
python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_ppvehicle.yml \
-o VEHICLE_RETROGRADE.enable=true \
- --video_dir=test_video.mp4\
+ --video_dir=test_video \
--device=gpu
```
@@ -97,7 +97,8 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_ppv
**注意:**
- 车道线中间线自动判断条件:在采样的视频段内同时有两个相反方向的车辆,且判断一次后固定,不再更新;
- - 因摄像头角度以及2d视角问题,车道线中间线判断存在不准确情况,可在配置文件手动输入中间线坐标
+ - 因摄像头角度以及2d视角问题,车道线中间线判断存在不准确情况;
+ - 可在配置文件手动输入中间线坐标.参考[车辆违章配置文件](../../config/examples/infer_cfg_vehicle_violation.yml)
## 方案说明
diff --git a/deploy/pipeline/docs/tutorials/ppvehicle_retrograde_en.md b/deploy/pipeline/docs/tutorials/ppvehicle_retrograde_en.md
index 457f84bfa20..650efe199a1 100644
--- a/deploy/pipeline/docs/tutorials/ppvehicle_retrograde_en.md
+++ b/deploy/pipeline/docs/tutorials/ppvehicle_retrograde_en.md
@@ -70,7 +70,7 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_ppv
#For folder contains one or multiple videos
python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_ppvehicle.yml \
-o VEHICLE_RETROGRADE.enable=true \
- --video_dir=test_video.mp4\
+ --video_dir=test_video \
--device=gpu
```
@@ -95,7 +95,8 @@ The result is shown as follow:
**Note:**
- Automatic judgment condition of lane line middle line: there are two vehicles in opposite directions in the sampled video segment, and the judgment is fixed after one time and will not be updated;
- - Due to camera angle and 2d visual angle problems, the judgment of lane line middle line is inaccurate. You can manually enter the middle line coordinates in the configuration file
+ - Due to camera angle and 2d visual angle problems, the judgment of lane line middle line is inaccurate.
+ - You can manually enter the middle line coordinates in the configuration file.Example as [infer_cfg_vehicle_violation.yml](../../config/examples/infer_cfg_vehicle_violation.yml)
## Features to the Solution
diff --git a/deploy/pipeline/pipeline.py b/deploy/pipeline/pipeline.py
index 04fcfc28be2..3407f479e82 100644
--- a/deploy/pipeline/pipeline.py
+++ b/deploy/pipeline/pipeline.py
@@ -24,8 +24,12 @@
import threading
import queue
import time
-from collections import Sequence, defaultdict
+from collections import defaultdict
from datacollector import DataCollector, Result
+try:
+ from collections.abc import Sequence
+except Exception:
+ from collections import Sequence
# add deploy path of PaddleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
diff --git a/deploy/pipeline/pphuman/action_infer.py b/deploy/pipeline/pphuman/action_infer.py
index b04bd3f881b..45c04ad5198 100644
--- a/deploy/pipeline/pphuman/action_infer.py
+++ b/deploy/pipeline/pphuman/action_infer.py
@@ -21,7 +21,10 @@
import math
import paddle
import sys
-from collections import Sequence
+try:
+ from collections.abc import Sequence
+except Exception:
+ from collections import Sequence
# add deploy path of PaddleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
diff --git a/deploy/pipeline/pphuman/video_action_infer.py b/deploy/pipeline/pphuman/video_action_infer.py
index e5e6c10eccd..6a10355f385 100644
--- a/deploy/pipeline/pphuman/video_action_infer.py
+++ b/deploy/pipeline/pphuman/video_action_infer.py
@@ -21,8 +21,11 @@
import math
import paddle
import sys
-from collections import Sequence
import paddle.nn.functional as F
+try:
+ from collections.abc import Sequence
+except Exception:
+ from collections import Sequence
# add deploy path of PaddleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
diff --git a/deploy/pipeline/pphuman/video_action_preprocess.py b/deploy/pipeline/pphuman/video_action_preprocess.py
index f6f9f11f7ae..eccec048dbe 100644
--- a/deploy/pipeline/pphuman/video_action_preprocess.py
+++ b/deploy/pipeline/pphuman/video_action_preprocess.py
@@ -14,9 +14,12 @@
import cv2
import numpy as np
-from collections.abc import Sequence
from PIL import Image
import paddle
+try:
+ from collections.abc import Sequence
+except Exception:
+ from collections import Sequence
class Sampler(object):
diff --git a/deploy/pipeline/ppvehicle/vehicle_attr.py b/deploy/pipeline/ppvehicle/vehicle_attr.py
index 4f7923f61e3..eb1b9423b64 100644
--- a/deploy/pipeline/ppvehicle/vehicle_attr.py
+++ b/deploy/pipeline/ppvehicle/vehicle_attr.py
@@ -21,7 +21,10 @@
import math
import paddle
import sys
-from collections import Sequence
+try:
+ from collections.abc import Sequence
+except Exception:
+ from collections import Sequence
# add deploy path of PaddleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 3)))
From 523941e5d66a7f3b5b0005c81a1dd0f0b4a820d2 Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Mon, 13 Feb 2023 19:51:23 +0800
Subject: [PATCH 006/116] fix distill codes and docs (#7741)
* fix distill codes and docs
* fix slim distill docs
* fix docs
* add details of docs, test=document_fix
---
configs/ppyoloe/distill/README.md | 2 +-
configs/slim/distill/README.md | 153 +++++++++----
ppdet/modeling/heads/__init__.py | 2 -
ppdet/modeling/heads/gfl_head.py | 299 ++++++++++++++++++++++++-
ppdet/modeling/heads/ld_gfl_head.py | 330 ----------------------------
ppdet/slim/distill_loss.py | 14 +-
ppdet/slim/distill_model.py | 23 +-
7 files changed, 432 insertions(+), 391 deletions(-)
delete mode 100644 ppdet/modeling/heads/ld_gfl_head.py
diff --git a/configs/ppyoloe/distill/README.md b/configs/ppyoloe/distill/README.md
index 9f8761d83bd..868d70b8880 100644
--- a/configs/ppyoloe/distill/README.md
+++ b/configs/ppyoloe/distill/README.md
@@ -1,6 +1,6 @@
# PPYOLOE+ Distillation(PPYOLOE+ 蒸馏)
-PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案,结合了logits蒸馏和feature蒸馏。
+PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案,结合了logits蒸馏和feature蒸馏。更多蒸馏方案可以查看[slim/distill](../../slim/distill/)。
## 模型库
diff --git a/configs/slim/distill/README.md b/configs/slim/distill/README.md
index 6ffdf50dad2..97c93fcc42d 100644
--- a/configs/slim/distill/README.md
+++ b/configs/slim/distill/README.md
@@ -1,5 +1,13 @@
# Distillation(蒸馏)
+## 内容
+- [YOLOv3模型蒸馏](#YOLOv3模型蒸馏)
+- [FGD模型蒸馏](#FGD模型蒸馏)
+- [CWD模型蒸馏](#CWD模型蒸馏)
+- [LD模型蒸馏](#LD模型蒸馏)
+- [PPYOLOE模型蒸馏](#PPYOLOE模型蒸馏)
+- [引用](#引用)
+
## YOLOv3模型蒸馏
以YOLOv3-MobileNetV1为例,使用YOLOv3-ResNet34作为蒸馏训练的teacher网络, 对YOLOv3-MobileNetV1结构的student网络进行蒸馏。
@@ -12,6 +20,25 @@ COCO数据集作为目标检测任务的训练目标难度更大,意味着teac
| YOLOv3-MobileNetV1 | student | 608 | 270e | 29.4 | [config](../../yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) |
| YOLOv3-MobileNetV1 | distill | 608 | 270e | 31.0(+1.6) | [config](../../yolov3/yolov3_mobilenet_v1_270e_coco.yml),[slim_config](./yolov3_mobilenet_v1_coco_distill.yml) | [download](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams) |
+
+ 快速开始
+
+```shell
+# 单卡训练(不推荐)
+python tools/train.py -c configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml --slim_config configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml
+# 多卡训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml --slim_config configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml
+# 评估
+python tools/eval.py -c configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams
+# 预测
+python tools/infer.py -c configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams --infer_img=demo/000000014439_640x640.jpg
+```
+
+- `-c`: 指定模型配置文件,也是student配置文件。
+- `--slim_config`: 指定压缩策略配置文件,也是teacher配置文件。
+
+
+
## FGD模型蒸馏
@@ -24,16 +51,24 @@ FGD全称为[Focal and Global Knowledge Distillation for Detectors](https://arxi
| RetinaNet-ResNet50 | student | 1333x800 | 2x | 39.1 | [config](../../retinanet/retinanet_r50_fpn_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco.pdparams) |
| RetinaNet-ResNet50 | FGD | 1333x800 | 2x | 40.8(+1.7) | [config](../../retinanet/retinanet_r50_fpn_2x_coco.yml),[slim_config](./retinanet_resnet101_coco_distill.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r101_distill_r50_2x_coco.pdparams) |
+
+ 快速开始
-## LD模型蒸馏
+```shell
+# 单卡训练(不推荐)
+python tools/train.py -c configs/retinanet/retinanet_r50_fpn_2x_coco.yml --slim_config configs/slim/distill/retinanet_resnet101_coco_distill.yml
+# 多卡训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/retinanet/retinanet_r50_fpn_2x_coco.yml --slim_config configs/slim/distill/retinanet_resnet101_coco_distill.yml
+# 评估
+python tools/eval.py -c configs/retinanet/retinanet_r50_fpn_2x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/retinanet_r101_distill_r50_2x_coco.pdparams
+# 预测
+python tools/infer.py -c configs/retinanet/retinanet_r50_fpn_2x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/retinanet_r101_distill_r50_2x_coco.pdparams --infer_img=demo/000000014439_640x640.jpg
+```
-LD全称为[Localization Distillation for Dense Object Detection](https://arxiv.org/abs/2102.12252),将回归框表示为概率分布,把分类任务的KD用在定位任务上,并且使用因地制宜、分而治之的策略,在不同的区域分别学习分类知识与定位知识。在PaddleDetection中,我们实现了LD算法,并基于GFL模型进行验证,实验结果如下:
+- `-c`: 指定模型配置文件,也是student配置文件。
+- `--slim_config`: 指定压缩策略配置文件,也是teacher配置文件。
-| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 |
-| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: |
-| GFL_ResNet101-vd| teacher | 1333x800 | 2x | 46.8 | [config](../../gfl/gfl_r101vd_fpn_mstrain_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r101vd_fpn_mstrain_2x_coco.pdparams) |
-| GFL_ResNet18-vd | student | 1333x800 | 1x | 36.6 | [config](../../gfl/gfl_r18vd_1x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r18vd_1x_coco.pdparams) |
-| GFL_ResNet18-vd | LD | 1333x800 | 1x | 38.2(+1.6) | [config](../../gfl/gfl_slim_ld_r18vd_1x_coco.yml),[slim_config](./gfl_ld_distill.yml) | [download](https://bj.bcebos.com/v1/paddledet/models/gfl_slim_ld_r18vd_1x_coco.pdparams) |
+
## CWD模型蒸馏
@@ -44,60 +79,104 @@ CWD全称为[Channel-wise Knowledge Distillation for Dense Prediction*](https://
| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: |
| RetinaNet-ResNet101| teacher | 1333x800 | 2x | 40.6 | [config](../../retinanet/retinanet_r101_fpn_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r101_fpn_2x_coco.pdparams) |
| RetinaNet-ResNet50 | student | 1333x800 | 2x | 39.1 | [config](../../retinanet/retinanet_r50_fpn_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco.pdparams) |
-| RetinaNet-ResNet50 | CWD | 1333x800 | 2x | 40.5(+1.4) | [config](../../retinanet/retinanet_r50_fpn_2x_coco_cwd.yml),[slim_config](./retinanet_resnet101_coco_distill_cwd.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_cwd.pdparams) |
+| RetinaNet-ResNet50 | CWD | 1333x800 | 2x | 40.5(+1.4) | [config](../../retinanet/retinanet_r50_fpn_2x_coco.yml),[slim_config](./retinanet_resnet101_coco_distill_cwd.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_cwd.pdparams) |
| GFL_ResNet101-vd| teacher | 1333x800 | 2x | 46.8 | [config](../../gfl/gfl_r101vd_fpn_mstrain_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r101vd_fpn_mstrain_2x_coco.pdparams) |
| GFL_ResNet50 | student | 1333x800 | 1x | 41.0 | [config](../../gfl/gfl_r50_fpn_1x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r50_fpn_1x_coco.pdparams) |
-| GFL_ResNet50 | LD | 1333x800 | 2x | 44.0(+3.0) | [config](../../gfl/gfl_r50_fpn_2x_coco_cwd.yml),[slim_config](./gfl_r101vd_fpn_coco_distill_cwd.yml) | [download](https://bj.bcebos.com/v1/paddledet/models/gfl_r50_fpn_2x_coco_cwd.pdparams) |
+| GFL_ResNet50 | CWD | 1333x800 | 2x | 44.0(+3.0) | [config](../../gfl/gfl_r50_fpn_1x_coco.yml),[slim_config](./gfl_r101vd_fpn_coco_distill_cwd.yml) | [download](https://bj.bcebos.com/v1/paddledet/models/gfl_r50_fpn_2x_coco_cwd.pdparams) |
+
+
+ 快速开始
+
+```shell
+# 单卡训练(不推荐)
+python tools/train.py -c configs/retinanet/retinanet_r50_fpn_2x_coco.yml --slim_config configs/slim/distill/retinanet_resnet101_coco_distill_cwd.yml
+# 多卡训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/retinanet/retinanet_r50_fpn_2x_coco.yml --slim_config configs/slim/distill/retinanet_resnet101_coco_distill_cwd.yml
+# 评估
+python tools/eval.py -c configs/retinanet/retinanet_r50_fpn_2x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_cwd.pdparams
+# 预测
+python tools/infer.py -c configs/retinanet/retinanet_r50_fpn_2x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_cwd.pdparams --infer_img=demo/000000014439_640x640.jpg
+
+# 单卡训练(不推荐)
+python tools/train.py -c configs/gfl/gfl_r50_fpn_1x_coco.yml --slim_config configs/slim/distill/gfl_r101vd_fpn_coco_distill_cwd.yml
+# 多卡训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/gfl/gfl_r50_fpn_1x_coco.yml --slim_config configs/slim/distill/gfl_r101vd_fpn_coco_distill_cwd.yml
+# 评估
+python tools/eval.py -c configs/gfl/gfl_r50_fpn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/gfl_r50_fpn_2x_coco_cwd.pdparams
+# 预测
+python tools/infer.py -c configs/gfl/gfl_r50_fpn_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/gfl_r50_fpn_2x_coco_cwd.pdparams --infer_img=demo/000000014439_640x640.jpg
+```
+- `-c`: 指定模型配置文件,也是student配置文件。
+- `--slim_config`: 指定压缩策略配置文件,也是teacher配置文件。
-## PPYOLOE+ 模型蒸馏
+
-PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案,结合了logits蒸馏和feature蒸馏。
+
+## LD模型蒸馏
+
+LD全称为[Localization Distillation for Dense Object Detection](https://arxiv.org/abs/2102.12252),将回归框表示为概率分布,把分类任务的KD用在定位任务上,并且使用因地制宜、分而治之的策略,在不同的区域分别学习分类知识与定位知识。在PaddleDetection中,我们实现了LD算法,并基于GFL模型进行验证,实验结果如下:
| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 |
| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: |
-| PP-YOLOE+_x | teacher | 640 | 80e | 54.7 | [config](../../ppyoloe/ppyoloe_plus_crn_x_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_x_80e_coco.pdparams) |
-| PP-YOLOE+_l | student | 640 | 80e | 52.9 | [config](../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) |
-| PP-YOLOE+_l | distill | 640 | 80e | 53.9(+1.0) | [config](../../ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml),[slim_config](./ppyoloe_plus_distill_x_distill_l.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams) |
-| PP-YOLOE+_l | teacher | 640 | 80e | 52.9 | [config](../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) |
-| PP-YOLOE+_m | student | 640 | 80e | 49.8 | [config](../../ppyoloe/ppyoloe_plus_crn_m_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco.pdparams) |
-| PP-YOLOE+_m | distill | 640 | 80e | 50.7(+0.9) | [config](../../ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml),[slim_config](./ppyoloe_plus_distill_l_distill_m.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco_distill.pdparams) |
-
+| GFL_ResNet101-vd| teacher | 1333x800 | 2x | 46.8 | [config](../../gfl/gfl_r101vd_fpn_mstrain_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r101vd_fpn_mstrain_2x_coco.pdparams) |
+| GFL_ResNet18-vd | student | 1333x800 | 1x | 36.6 | [config](../../gfl/gfl_r18vd_1x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r18vd_1x_coco.pdparams) |
+| GFL_ResNet18-vd | LD | 1333x800 | 1x | 38.2(+1.6) | [config](../../gfl/gfl_slim_ld_r18vd_1x_coco.yml),[slim_config](./gfl_ld_distill.yml) | [download](https://bj.bcebos.com/v1/paddledet/models/gfl_slim_ld_r18vd_1x_coco.pdparams) |
-## 快速开始
+
+ 快速开始
-### 训练
```shell
-# 单卡
-python tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml
-# 多卡
-python -m paddle.distributed.launch --log_dir=ppyoloe_plus_distill_x_distill_l/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml
+# 单卡训练(不推荐)
+python tools/train.py -c configs/gfl/gfl_slim_ld_r18vd_1x_coco.yml --slim_config configs/slim/distill/gfl_ld_distill.yml
+# 多卡训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/gfl/gfl_slim_ld_r18vd_1x_coco.yml --slim_config configs/slim/distill/gfl_ld_distill.yml
+# 评估
+python tools/eval.py -c configs/gfl/gfl_slim_ld_r18vd_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/gfl_slim_ld_r18vd_1x_coco.pdparams
+# 预测
+python tools/infer.py -c configs/gfl/gfl_slim_ld_r18vd_1x_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/gfl_slim_ld_r18vd_1x_coco.pdparams --infer_img=demo/000000014439_640x640.jpg
```
- `-c`: 指定模型配置文件,也是student配置文件。
- `--slim_config`: 指定压缩策略配置文件,也是teacher配置文件。
-### 评估
+
+
+
+## PPYOLOE模型蒸馏
+
+PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案,结合了logits蒸馏和feature蒸馏。
+
+| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 |
+| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: |
+| PP-YOLOE+_x | teacher | 640 | 80e | 54.7 | [config](../../ppyoloe/ppyoloe_plus_crn_x_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_x_80e_coco.pdparams) |
+| PP-YOLOE+_l | student | 640 | 80e | 52.9 | [config](../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) |
+| PP-YOLOE+_l | distill | 640 | 80e | **54.0(+1.1)** | [config](../../ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml),[slim_config](./ppyoloe_plus_distill_x_distill_l.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams) |
+| PP-YOLOE+_l | teacher | 640 | 80e | 52.9 | [config](../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) |
+| PP-YOLOE+_m | student | 640 | 80e | 49.8 | [config](../../ppyoloe/ppyoloe_plus_crn_m_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco.pdparams) |
+| PP-YOLOE+_m | distill | 640 | 80e | **51.0(+1.2)** | [config](../../ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml),[slim_config](./ppyoloe_plus_distill_l_distill_m.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco_distill.pdparams) |
+
+
+ 快速开始
+
```shell
-python tools/eval.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml -o weights=output/ppyoloe_plus_crn_l_80e_coco_distill/model_final.pdparams
+# 单卡训练(不推荐)
+python tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml
+# 多卡训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml
+# 评估
+python tools/eval.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams
+# 预测
+python tools/infer.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams --infer_img=demo/000000014439_640x640.jpg
```
- `-c`: 指定模型配置文件,也是student配置文件。
- `--slim_config`: 指定压缩策略配置文件,也是teacher配置文件。
-- `-o weights`: 指定压缩算法训好的模型路径。
-
-### 测试
-```shell
-python tools/infer.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml -o weights=output/ppyoloe_plus_crn_l_80e_coco_distill/model_final.pdparams --infer_img=demo/000000014439_640x640.jpg
-```
-- `-c`: 指定模型配置文件。
-- `--slim_config`: 指定压缩策略配置文件。
-- `-o weights`: 指定压缩算法训好的模型路径。
-- `--infer_img`: 指定测试图像路径。
+
-## Citations
+## 引用
```
@article{mehta2018object,
title={Object detection at 200 Frames Per Second},
diff --git a/ppdet/modeling/heads/__init__.py b/ppdet/modeling/heads/__init__.py
index ecd15b2f139..9cceb268a7d 100644
--- a/ppdet/modeling/heads/__init__.py
+++ b/ppdet/modeling/heads/__init__.py
@@ -35,7 +35,6 @@
from . import ppyoloe_head
from . import fcosr_head
from . import ppyoloe_r_head
-from . import ld_gfl_head
from . import yolof_head
from . import ppyoloe_contrast_head
from . import centertrack_head
@@ -63,7 +62,6 @@
from .retina_head import *
from .ppyoloe_head import *
from .fcosr_head import *
-from .ld_gfl_head import *
from .ppyoloe_r_head import *
from .yolof_head import *
from .ppyoloe_contrast_head import *
diff --git a/ppdet/modeling/heads/gfl_head.py b/ppdet/modeling/heads/gfl_head.py
index a1f518da5cf..040a3f7090d 100644
--- a/ppdet/modeling/heads/gfl_head.py
+++ b/ppdet/modeling/heads/gfl_head.py
@@ -28,10 +28,11 @@
from paddle.nn.initializer import Normal, Constant
from ppdet.core.workspace import register
-from ppdet.modeling.layers import ConvNormLayer
from ppdet.modeling.bbox_utils import distance2bbox, bbox2distance, batch_distance2bbox
from ppdet.data.transform.atss_assigner import bbox_overlaps
+__all__ = ['GFLHead', 'LDGFLHead']
+
class ScaleReg(nn.Layer):
"""
@@ -437,3 +438,299 @@ def post_process(self, gfl_head_outs, im_shape, scale_factor):
mlvl_scores = mlvl_scores.transpose([0, 2, 1])
bbox_pred, bbox_num, _ = self.nms(bboxes, mlvl_scores)
return bbox_pred, bbox_num
+
+
+@register
+class LDGFLHead(GFLHead):
+ """
+ GFLHead for LD distill
+ Args:
+ conv_feat (object): Instance of 'FCOSFeat'
+ num_classes (int): Number of classes
+ fpn_stride (list): The stride of each FPN Layer
+ prior_prob (float): Used to set the bias init for the class prediction layer
+ loss_class (object): Instance of QualityFocalLoss.
+ loss_dfl (object): Instance of DistributionFocalLoss.
+ loss_bbox (object): Instance of bbox loss.
+ reg_max: Max value of integral set :math: `{0, ..., reg_max}`
+ n QFL setting. Default: 16.
+ """
+ __inject__ = [
+ 'conv_feat', 'dgqp_module', 'loss_class', 'loss_dfl', 'loss_bbox',
+ 'loss_ld', 'loss_ld_vlr', 'loss_kd', 'nms'
+ ]
+ __shared__ = ['num_classes']
+
+ def __init__(self,
+ conv_feat='FCOSFeat',
+ dgqp_module=None,
+ num_classes=80,
+ fpn_stride=[8, 16, 32, 64, 128],
+ prior_prob=0.01,
+ loss_class='QualityFocalLoss',
+ loss_dfl='DistributionFocalLoss',
+ loss_bbox='GIoULoss',
+ loss_ld='KnowledgeDistillationKLDivLoss',
+ loss_ld_vlr='KnowledgeDistillationKLDivLoss',
+ loss_kd='KnowledgeDistillationKLDivLoss',
+ reg_max=16,
+ feat_in_chan=256,
+ nms=None,
+ nms_pre=1000,
+ cell_offset=0):
+
+ super(LDGFLHead, self).__init__(
+ conv_feat=conv_feat,
+ dgqp_module=dgqp_module,
+ num_classes=num_classes,
+ fpn_stride=fpn_stride,
+ prior_prob=prior_prob,
+ loss_class=loss_class,
+ loss_dfl=loss_dfl,
+ loss_bbox=loss_bbox,
+ reg_max=reg_max,
+ feat_in_chan=feat_in_chan,
+ nms=nms,
+ nms_pre=nms_pre,
+ cell_offset=cell_offset)
+ self.loss_ld = loss_ld
+ self.loss_kd = loss_kd
+ self.loss_ld_vlr = loss_ld_vlr
+
+ def forward(self, fpn_feats):
+ assert len(fpn_feats) == len(
+ self.fpn_stride
+ ), "The size of fpn_feats is not equal to size of fpn_stride"
+ cls_logits_list = []
+ bboxes_reg_list = []
+ for stride, scale_reg, fpn_feat in zip(self.fpn_stride,
+ self.scales_regs, fpn_feats):
+ conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat)
+ cls_score = self.gfl_head_cls(conv_cls_feat)
+ bbox_pred = scale_reg(self.gfl_head_reg(conv_reg_feat))
+
+ if self.dgqp_module:
+ quality_score = self.dgqp_module(bbox_pred)
+ cls_score = F.sigmoid(cls_score) * quality_score
+ if not self.training:
+ cls_score = F.sigmoid(cls_score.transpose([0, 2, 3, 1]))
+ bbox_pred = bbox_pred.transpose([0, 2, 3, 1])
+ b, cell_h, cell_w, _ = paddle.shape(cls_score)
+ y, x = self.get_single_level_center_point(
+ [cell_h, cell_w], stride, cell_offset=self.cell_offset)
+ center_points = paddle.stack([x, y], axis=-1)
+ cls_score = cls_score.reshape([b, -1, self.cls_out_channels])
+ bbox_pred = self.distribution_project(bbox_pred) * stride
+ bbox_pred = bbox_pred.reshape([b, cell_h * cell_w, 4])
+
+ # NOTE: If keep_ratio=False and image shape value that
+ # multiples of 32, distance2bbox not set max_shapes parameter
+ # to speed up model prediction. If need to set max_shapes,
+ # please use inputs['im_shape'].
+ bbox_pred = batch_distance2bbox(
+ center_points, bbox_pred, max_shapes=None)
+
+ cls_logits_list.append(cls_score)
+ bboxes_reg_list.append(bbox_pred)
+
+ return (cls_logits_list, bboxes_reg_list)
+
+ def get_loss(self, gfl_head_outs, gt_meta, soft_label_list,
+ soft_targets_list):
+ cls_logits, bboxes_reg = gfl_head_outs
+
+ num_level_anchors = [
+ featmap.shape[-2] * featmap.shape[-1] for featmap in cls_logits
+ ]
+
+ grid_cells_list = self._images_to_levels(gt_meta['grid_cells'],
+ num_level_anchors)
+
+ labels_list = self._images_to_levels(gt_meta['labels'],
+ num_level_anchors)
+
+ label_weights_list = self._images_to_levels(gt_meta['label_weights'],
+ num_level_anchors)
+ bbox_targets_list = self._images_to_levels(gt_meta['bbox_targets'],
+ num_level_anchors)
+ # vlr regions
+ vlr_regions_list = self._images_to_levels(gt_meta['vlr_regions'],
+ num_level_anchors)
+
+ num_total_pos = sum(gt_meta['pos_num'])
+ try:
+ paddle.distributed.all_reduce(num_total_pos)
+ num_total_pos = paddle.clip(
+ num_total_pos / paddle.distributed.get_world_size(), min=1.)
+ except:
+ num_total_pos = max(num_total_pos, 1)
+
+ loss_bbox_list, loss_dfl_list, loss_qfl_list, loss_ld_list, avg_factor = [], [], [], [], []
+ loss_ld_vlr_list, loss_kd_list = [], []
+
+ for cls_score, bbox_pred, grid_cells, labels, label_weights, bbox_targets, stride, soft_targets,\
+ soft_label, vlr_region in zip(
+ cls_logits, bboxes_reg, grid_cells_list, labels_list,
+ label_weights_list, bbox_targets_list, self.fpn_stride, soft_targets_list,
+ soft_label_list, vlr_regions_list):
+
+ grid_cells = grid_cells.reshape([-1, 4])
+ cls_score = cls_score.transpose([0, 2, 3, 1]).reshape(
+ [-1, self.cls_out_channels])
+ bbox_pred = bbox_pred.transpose([0, 2, 3, 1]).reshape(
+ [-1, 4 * (self.reg_max + 1)])
+
+ soft_targets = soft_targets.transpose([0, 2, 3, 1]).reshape(
+ [-1, 4 * (self.reg_max + 1)])
+
+ soft_label = soft_label.transpose([0, 2, 3, 1]).reshape(
+ [-1, self.cls_out_channels])
+
+ # feture im
+ # teacher_x = teacher_x.transpose([0, 2, 3, 1]).reshape([-1, 256])
+ # x = x.transpose([0, 2, 3, 1]).reshape([-1, 256])
+
+ bbox_targets = bbox_targets.reshape([-1, 4])
+ labels = labels.reshape([-1])
+ label_weights = label_weights.reshape([-1])
+
+ vlr_region = vlr_region.reshape([-1])
+
+ bg_class_ind = self.num_classes
+ pos_inds = paddle.nonzero(
+ paddle.logical_and((labels >= 0), (labels < bg_class_ind)),
+ as_tuple=False).squeeze(1)
+ score = np.zeros(labels.shape)
+
+ remain_inds = (vlr_region > 0).nonzero()
+
+ if len(pos_inds) > 0:
+ pos_bbox_targets = paddle.gather(bbox_targets, pos_inds, axis=0)
+ pos_bbox_pred = paddle.gather(bbox_pred, pos_inds, axis=0)
+ pos_grid_cells = paddle.gather(grid_cells, pos_inds, axis=0)
+
+ pos_grid_cell_centers = self._grid_cells_to_center(
+ pos_grid_cells) / stride
+
+ weight_targets = F.sigmoid(cls_score.detach())
+ weight_targets = paddle.gather(
+ weight_targets.max(axis=1, keepdim=True), pos_inds, axis=0)
+ pos_bbox_pred_corners = self.distribution_project(pos_bbox_pred)
+ pos_decode_bbox_pred = distance2bbox(pos_grid_cell_centers,
+ pos_bbox_pred_corners)
+ pos_decode_bbox_targets = pos_bbox_targets / stride
+ bbox_iou = bbox_overlaps(
+ pos_decode_bbox_pred.detach().numpy(),
+ pos_decode_bbox_targets.detach().numpy(),
+ is_aligned=True)
+ score[pos_inds.numpy()] = bbox_iou
+ pred_corners = pos_bbox_pred.reshape([-1, self.reg_max + 1])
+
+ pos_soft_targets = paddle.gather(soft_targets, pos_inds, axis=0)
+ soft_corners = pos_soft_targets.reshape([-1, self.reg_max + 1])
+
+ target_corners = bbox2distance(pos_grid_cell_centers,
+ pos_decode_bbox_targets,
+ self.reg_max).reshape([-1])
+ # regression loss
+ loss_bbox = paddle.sum(
+ self.loss_bbox(pos_decode_bbox_pred,
+ pos_decode_bbox_targets) * weight_targets)
+
+ # dfl loss
+ loss_dfl = self.loss_dfl(
+ pred_corners,
+ target_corners,
+ weight=weight_targets.expand([-1, 4]).reshape([-1]),
+ avg_factor=4.0)
+
+ # ld loss
+ loss_ld = self.loss_ld(
+ pred_corners,
+ soft_corners,
+ weight=weight_targets.expand([-1, 4]).reshape([-1]),
+ avg_factor=4.0)
+
+ loss_kd = self.loss_kd(
+ paddle.gather(
+ cls_score, pos_inds, axis=0),
+ paddle.gather(
+ soft_label, pos_inds, axis=0),
+ weight=paddle.gather(
+ label_weights, pos_inds, axis=0),
+ avg_factor=pos_inds.shape[0])
+
+ else:
+ loss_bbox = bbox_pred.sum() * 0
+ loss_dfl = bbox_pred.sum() * 0
+ loss_ld = bbox_pred.sum() * 0
+ loss_kd = bbox_pred.sum() * 0
+ weight_targets = paddle.to_tensor([0], dtype='float32')
+
+ if len(remain_inds) > 0:
+ neg_pred_corners = bbox_pred[remain_inds].reshape(
+ [-1, self.reg_max + 1])
+ neg_soft_corners = soft_targets[remain_inds].reshape(
+ [-1, self.reg_max + 1])
+
+ remain_targets = vlr_region[remain_inds]
+
+ loss_ld_vlr = self.loss_ld_vlr(
+ neg_pred_corners,
+ neg_soft_corners,
+ weight=remain_targets.expand([-1, 4]).reshape([-1]),
+ avg_factor=16.0)
+ else:
+ loss_ld_vlr = bbox_pred.sum() * 0
+
+ # qfl loss
+ score = paddle.to_tensor(score)
+ loss_qfl = self.loss_qfl(
+ cls_score, (labels, score),
+ weight=label_weights,
+ avg_factor=num_total_pos)
+
+ loss_bbox_list.append(loss_bbox)
+ loss_dfl_list.append(loss_dfl)
+ loss_qfl_list.append(loss_qfl)
+ loss_ld_list.append(loss_ld)
+ loss_ld_vlr_list.append(loss_ld_vlr)
+ loss_kd_list.append(loss_kd)
+ avg_factor.append(weight_targets.sum())
+
+ avg_factor = sum(avg_factor) # + 1e-6
+ try:
+ paddle.distributed.all_reduce(avg_factor)
+ avg_factor = paddle.clip(
+ avg_factor / paddle.distributed.get_world_size(), min=1)
+ except:
+ avg_factor = max(avg_factor.item(), 1)
+
+ if avg_factor <= 0:
+ loss_qfl = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
+ loss_bbox = paddle.to_tensor(
+ 0, dtype='float32', stop_gradient=False)
+ loss_dfl = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
+ loss_ld = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
+ loss_ld_vlr = paddle.to_tensor(
+ 0, dtype='float32', stop_gradient=False)
+ loss_kd = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
+ else:
+ losses_bbox = list(map(lambda x: x / avg_factor, loss_bbox_list))
+ losses_dfl = list(map(lambda x: x / avg_factor, loss_dfl_list))
+ loss_qfl = sum(loss_qfl_list)
+ loss_bbox = sum(losses_bbox)
+ loss_dfl = sum(losses_dfl)
+ loss_ld = sum(loss_ld_list)
+ loss_ld_vlr = sum(loss_ld_vlr_list)
+ loss_kd = sum(loss_kd_list)
+
+ loss_states = dict(
+ loss_qfl=loss_qfl,
+ loss_bbox=loss_bbox,
+ loss_dfl=loss_dfl,
+ loss_ld=loss_ld,
+ loss_ld_vlr=loss_ld_vlr,
+ loss_kd=loss_kd)
+
+ return loss_states
diff --git a/ppdet/modeling/heads/ld_gfl_head.py b/ppdet/modeling/heads/ld_gfl_head.py
deleted file mode 100644
index dbff7ecbab0..00000000000
--- a/ppdet/modeling/heads/ld_gfl_head.py
+++ /dev/null
@@ -1,330 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# The code is based on:
-# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/dense_heads/ld_head.py
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
-from paddle.nn.initializer import Normal, Constant
-
-from ppdet.core.workspace import register, serializable
-from ppdet.modeling.layers import ConvNormLayer
-from ppdet.modeling.bbox_utils import distance2bbox, bbox2distance, batch_distance2bbox
-from ppdet.data.transform.atss_assigner import bbox_overlaps
-from .gfl_head import GFLHead
-
-
-@register
-class LDGFLHead(GFLHead):
- """
- GFLHead for LD distill
- Args:
- conv_feat (object): Instance of 'FCOSFeat'
- num_classes (int): Number of classes
- fpn_stride (list): The stride of each FPN Layer
- prior_prob (float): Used to set the bias init for the class prediction layer
- loss_class (object): Instance of QualityFocalLoss.
- loss_dfl (object): Instance of DistributionFocalLoss.
- loss_bbox (object): Instance of bbox loss.
- reg_max: Max value of integral set :math: `{0, ..., reg_max}`
- n QFL setting. Default: 16.
- """
- __inject__ = [
- 'conv_feat', 'dgqp_module', 'loss_class', 'loss_dfl', 'loss_bbox',
- 'loss_ld', 'loss_ld_vlr', 'loss_kd', 'nms'
- ]
- __shared__ = ['num_classes']
-
- def __init__(self,
- conv_feat='FCOSFeat',
- dgqp_module=None,
- num_classes=80,
- fpn_stride=[8, 16, 32, 64, 128],
- prior_prob=0.01,
- loss_class='QualityFocalLoss',
- loss_dfl='DistributionFocalLoss',
- loss_bbox='GIoULoss',
- loss_ld='KnowledgeDistillationKLDivLoss',
- loss_ld_vlr='KnowledgeDistillationKLDivLoss',
- loss_kd='KnowledgeDistillationKLDivLoss',
- reg_max=16,
- feat_in_chan=256,
- nms=None,
- nms_pre=1000,
- cell_offset=0):
-
- super(LDGFLHead, self).__init__(
- conv_feat=conv_feat,
- dgqp_module=dgqp_module,
- num_classes=num_classes,
- fpn_stride=fpn_stride,
- prior_prob=prior_prob,
- loss_class=loss_class,
- loss_dfl=loss_dfl,
- loss_bbox=loss_bbox,
- reg_max=reg_max,
- feat_in_chan=feat_in_chan,
- nms=nms,
- nms_pre=nms_pre,
- cell_offset=cell_offset)
- self.loss_ld = loss_ld
- self.loss_kd = loss_kd
- self.loss_ld_vlr = loss_ld_vlr
-
- def forward(self, fpn_feats):
- assert len(fpn_feats) == len(
- self.fpn_stride
- ), "The size of fpn_feats is not equal to size of fpn_stride"
- cls_logits_list = []
- bboxes_reg_list = []
- for stride, scale_reg, fpn_feat in zip(self.fpn_stride,
- self.scales_regs, fpn_feats):
- conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat)
- cls_score = self.gfl_head_cls(conv_cls_feat)
- bbox_pred = scale_reg(self.gfl_head_reg(conv_reg_feat))
-
- if self.dgqp_module:
- quality_score = self.dgqp_module(bbox_pred)
- cls_score = F.sigmoid(cls_score) * quality_score
- if not self.training:
- cls_score = F.sigmoid(cls_score.transpose([0, 2, 3, 1]))
- bbox_pred = bbox_pred.transpose([0, 2, 3, 1])
- b, cell_h, cell_w, _ = paddle.shape(cls_score)
- y, x = self.get_single_level_center_point(
- [cell_h, cell_w], stride, cell_offset=self.cell_offset)
- center_points = paddle.stack([x, y], axis=-1)
- cls_score = cls_score.reshape([b, -1, self.cls_out_channels])
- bbox_pred = self.distribution_project(bbox_pred) * stride
- bbox_pred = bbox_pred.reshape([b, cell_h * cell_w, 4])
-
- # NOTE: If keep_ratio=False and image shape value that
- # multiples of 32, distance2bbox not set max_shapes parameter
- # to speed up model prediction. If need to set max_shapes,
- # please use inputs['im_shape'].
- bbox_pred = batch_distance2bbox(
- center_points, bbox_pred, max_shapes=None)
-
- cls_logits_list.append(cls_score)
- bboxes_reg_list.append(bbox_pred)
-
- return (cls_logits_list, bboxes_reg_list)
-
- def get_loss(self, gfl_head_outs, gt_meta, soft_label_list,
- soft_targets_list):
- cls_logits, bboxes_reg = gfl_head_outs
-
- num_level_anchors = [
- featmap.shape[-2] * featmap.shape[-1] for featmap in cls_logits
- ]
-
- grid_cells_list = self._images_to_levels(gt_meta['grid_cells'],
- num_level_anchors)
-
- labels_list = self._images_to_levels(gt_meta['labels'],
- num_level_anchors)
-
- label_weights_list = self._images_to_levels(gt_meta['label_weights'],
- num_level_anchors)
- bbox_targets_list = self._images_to_levels(gt_meta['bbox_targets'],
- num_level_anchors)
- # vlr regions
- vlr_regions_list = self._images_to_levels(gt_meta['vlr_regions'],
- num_level_anchors)
-
- num_total_pos = sum(gt_meta['pos_num'])
- try:
- paddle.distributed.all_reduce(num_total_pos)
- num_total_pos = paddle.clip(
- num_total_pos / paddle.distributed.get_world_size(), min=1.)
- except:
- num_total_pos = max(num_total_pos, 1)
-
- loss_bbox_list, loss_dfl_list, loss_qfl_list, loss_ld_list, avg_factor = [], [], [], [], []
- loss_ld_vlr_list, loss_kd_list = [], []
-
- for cls_score, bbox_pred, grid_cells, labels, label_weights, bbox_targets, stride, soft_targets,\
- soft_label, vlr_region in zip(
- cls_logits, bboxes_reg, grid_cells_list, labels_list,
- label_weights_list, bbox_targets_list, self.fpn_stride, soft_targets_list,
- soft_label_list, vlr_regions_list):
-
- grid_cells = grid_cells.reshape([-1, 4])
- cls_score = cls_score.transpose([0, 2, 3, 1]).reshape(
- [-1, self.cls_out_channels])
- bbox_pred = bbox_pred.transpose([0, 2, 3, 1]).reshape(
- [-1, 4 * (self.reg_max + 1)])
-
- soft_targets = soft_targets.transpose([0, 2, 3, 1]).reshape(
- [-1, 4 * (self.reg_max + 1)])
-
- soft_label = soft_label.transpose([0, 2, 3, 1]).reshape(
- [-1, self.cls_out_channels])
-
- # feture im
- # teacher_x = teacher_x.transpose([0, 2, 3, 1]).reshape([-1, 256])
- # x = x.transpose([0, 2, 3, 1]).reshape([-1, 256])
-
- bbox_targets = bbox_targets.reshape([-1, 4])
- labels = labels.reshape([-1])
- label_weights = label_weights.reshape([-1])
-
- vlr_region = vlr_region.reshape([-1])
-
- bg_class_ind = self.num_classes
- pos_inds = paddle.nonzero(
- paddle.logical_and((labels >= 0), (labels < bg_class_ind)),
- as_tuple=False).squeeze(1)
- score = np.zeros(labels.shape)
-
- remain_inds = (vlr_region > 0).nonzero()
-
- if len(pos_inds) > 0:
- pos_bbox_targets = paddle.gather(bbox_targets, pos_inds, axis=0)
- pos_bbox_pred = paddle.gather(bbox_pred, pos_inds, axis=0)
- pos_grid_cells = paddle.gather(grid_cells, pos_inds, axis=0)
-
- pos_grid_cell_centers = self._grid_cells_to_center(
- pos_grid_cells) / stride
-
- weight_targets = F.sigmoid(cls_score.detach())
- weight_targets = paddle.gather(
- weight_targets.max(axis=1, keepdim=True), pos_inds, axis=0)
- pos_bbox_pred_corners = self.distribution_project(pos_bbox_pred)
- pos_decode_bbox_pred = distance2bbox(pos_grid_cell_centers,
- pos_bbox_pred_corners)
- pos_decode_bbox_targets = pos_bbox_targets / stride
- bbox_iou = bbox_overlaps(
- pos_decode_bbox_pred.detach().numpy(),
- pos_decode_bbox_targets.detach().numpy(),
- is_aligned=True)
- score[pos_inds.numpy()] = bbox_iou
- pred_corners = pos_bbox_pred.reshape([-1, self.reg_max + 1])
-
- pos_soft_targets = paddle.gather(soft_targets, pos_inds, axis=0)
- soft_corners = pos_soft_targets.reshape([-1, self.reg_max + 1])
-
- target_corners = bbox2distance(pos_grid_cell_centers,
- pos_decode_bbox_targets,
- self.reg_max).reshape([-1])
- # regression loss
- loss_bbox = paddle.sum(
- self.loss_bbox(pos_decode_bbox_pred,
- pos_decode_bbox_targets) * weight_targets)
-
- # dfl loss
- loss_dfl = self.loss_dfl(
- pred_corners,
- target_corners,
- weight=weight_targets.expand([-1, 4]).reshape([-1]),
- avg_factor=4.0)
-
- # ld loss
- loss_ld = self.loss_ld(
- pred_corners,
- soft_corners,
- weight=weight_targets.expand([-1, 4]).reshape([-1]),
- avg_factor=4.0)
-
- loss_kd = self.loss_kd(
- paddle.gather(
- cls_score, pos_inds, axis=0),
- paddle.gather(
- soft_label, pos_inds, axis=0),
- weight=paddle.gather(
- label_weights, pos_inds, axis=0),
- avg_factor=pos_inds.shape[0])
-
- else:
- loss_bbox = bbox_pred.sum() * 0
- loss_dfl = bbox_pred.sum() * 0
- loss_ld = bbox_pred.sum() * 0
- loss_kd = bbox_pred.sum() * 0
- weight_targets = paddle.to_tensor([0], dtype='float32')
-
- if len(remain_inds) > 0:
- neg_pred_corners = bbox_pred[remain_inds].reshape(
- [-1, self.reg_max + 1])
- neg_soft_corners = soft_targets[remain_inds].reshape(
- [-1, self.reg_max + 1])
-
- remain_targets = vlr_region[remain_inds]
-
- loss_ld_vlr = self.loss_ld_vlr(
- neg_pred_corners,
- neg_soft_corners,
- weight=remain_targets.expand([-1, 4]).reshape([-1]),
- avg_factor=16.0)
- else:
- loss_ld_vlr = bbox_pred.sum() * 0
-
- # qfl loss
- score = paddle.to_tensor(score)
- loss_qfl = self.loss_qfl(
- cls_score, (labels, score),
- weight=label_weights,
- avg_factor=num_total_pos)
-
- loss_bbox_list.append(loss_bbox)
- loss_dfl_list.append(loss_dfl)
- loss_qfl_list.append(loss_qfl)
- loss_ld_list.append(loss_ld)
- loss_ld_vlr_list.append(loss_ld_vlr)
- loss_kd_list.append(loss_kd)
- avg_factor.append(weight_targets.sum())
-
- avg_factor = sum(avg_factor) # + 1e-6
- try:
- paddle.distributed.all_reduce(avg_factor)
- avg_factor = paddle.clip(
- avg_factor / paddle.distributed.get_world_size(), min=1)
- except:
- avg_factor = max(avg_factor.item(), 1)
-
- if avg_factor <= 0:
- loss_qfl = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
- loss_bbox = paddle.to_tensor(
- 0, dtype='float32', stop_gradient=False)
- loss_dfl = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
- loss_ld = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
- loss_ld_vlr = paddle.to_tensor(
- 0, dtype='float32', stop_gradient=False)
- loss_kd = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
- else:
- losses_bbox = list(map(lambda x: x / avg_factor, loss_bbox_list))
- losses_dfl = list(map(lambda x: x / avg_factor, loss_dfl_list))
- loss_qfl = sum(loss_qfl_list)
- loss_bbox = sum(losses_bbox)
- loss_dfl = sum(losses_dfl)
- loss_ld = sum(loss_ld_list)
- loss_ld_vlr = sum(loss_ld_vlr_list)
- loss_kd = sum(loss_kd_list)
-
- loss_states = dict(
- loss_qfl=loss_qfl,
- loss_bbox=loss_bbox,
- loss_dfl=loss_dfl,
- loss_ld=loss_ld,
- loss_ld_vlr=loss_ld_vlr,
- loss_kd=loss_kd)
-
- return loss_states
diff --git a/ppdet/slim/distill_loss.py b/ppdet/slim/distill_loss.py
index 6e94fd8410e..d325a5b2ac9 100644
--- a/ppdet/slim/distill_loss.py
+++ b/ppdet/slim/distill_loss.py
@@ -17,14 +17,12 @@
from __future__ import print_function
import math
-import numpy as np
-
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
-from ppdet.core.workspace import register, create
+from ppdet.core.workspace import register
from ppdet.modeling import ops
from ppdet.modeling.losses.iou_loss import GIoULoss
from ppdet.utils.logger import setup_logger
@@ -456,7 +454,7 @@ def distill_softmax(self, x, tau):
x /= tau
return F.softmax(x, axis=1)
- def forward(self, preds_s, preds_t, inputs):
+ def forward(self, preds_s, preds_t, inputs=None):
assert preds_s.shape[-2:] == preds_t.shape[-2:]
N, C, H, W = preds_s.shape
eps = 1e-5
@@ -676,7 +674,7 @@ def forward(self, stu_feature, tea_feature, inputs):
wmin, wmax, hmin, hmax = [], [], [], []
- if gt_bboxes.shape[1] == 0:
+ if len(gt_bboxes) == 0:
loss = self.relation_loss(stu_feature, tea_feature)
return self.lambda_fgd * loss
@@ -750,7 +748,7 @@ def __init__(self,
self.loss_weight = loss_weight
self.resize_stu = resize_stu
- def forward(self, stu_feature, tea_feature, inputs):
+ def forward(self, stu_feature, tea_feature, inputs=None):
size_s, size_t = stu_feature.shape[2:], tea_feature.shape[2:]
if size_s[0] != size_t[0]:
if self.resize_stu:
@@ -791,7 +789,7 @@ def __init__(self,
else:
self.align = None
- def forward(self, stu_feature, tea_feature, inputs):
+ def forward(self, stu_feature, tea_feature, inputs=None):
if self.align is not None:
stu_feature = self.align(stu_feature)
@@ -839,7 +837,7 @@ def __init__(self,
nn.Conv2D(
teacher_channels, teacher_channels, kernel_size=3, padding=1))
- def forward(self, stu_feature, tea_feature, inputs):
+ def forward(self, stu_feature, tea_feature, inputs=None):
N = stu_feature.shape[0]
if self.align is not None:
stu_feature = self.align(stu_feature)
diff --git a/ppdet/slim/distill_model.py b/ppdet/slim/distill_model.py
index c06f92f08e3..96e13663813 100644
--- a/ppdet/slim/distill_model.py
+++ b/ppdet/slim/distill_model.py
@@ -18,8 +18,6 @@
import paddle
import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
from ppdet.core.workspace import register, create, load_config
from ppdet.utils.checkpoint import load_pretrain_weight
@@ -206,13 +204,13 @@ def build_loss(self, distill_cfg):
def get_loss_retinanet(self, stu_fea_list, tea_fea_list, inputs):
loss = self.student_model.head(stu_fea_list, inputs)
- distill_loss = {}
- for idx, k in enumerate(self.loss_dic):
- distill_loss[k] = self.loss_dic[k](stu_fea_list[idx],
- tea_fea_list[idx])
+ loss_dict = {}
+ for idx, k in enumerate(self.distill_loss):
+ loss_dict[k] = self.distill_loss[k](stu_fea_list[idx],
+ tea_fea_list[idx])
- loss['loss'] += distill_loss[k]
- loss[k] = distill_loss[k]
+ loss['loss'] += loss_dict[k]
+ loss[k] = loss_dict[k]
return loss
def get_loss_gfl(self, stu_fea_list, tea_fea_list, inputs):
@@ -234,10 +232,11 @@ def get_loss_gfl(self, stu_fea_list, tea_fea_list, inputs):
s_cls_feat.append(cls_score)
t_cls_feat.append(t_cls_score)
- for idx, k in enumerate(self.loss_dic):
- loss_dict[k] = self.loss_dic[k](s_cls_feat[idx], t_cls_feat[idx])
- feat_loss[f"neck_f_{idx}"] = self.loss_dic[k](stu_fea_list[idx],
- tea_fea_list[idx])
+ for idx, k in enumerate(self.distill_loss):
+ loss_dict[k] = self.distill_loss[k](s_cls_feat[idx],
+ t_cls_feat[idx])
+ feat_loss[f"neck_f_{idx}"] = self.distill_loss[k](stu_fea_list[idx],
+ tea_fea_list[idx])
for k in feat_loss:
loss['loss'] += feat_loss[k]
From 8bf79888afb9183128495c6276f679611ccd8dbe Mon Sep 17 00:00:00 2001
From: Weilong Wu
Date: Tue, 14 Feb 2023 10:07:40 +0800
Subject: [PATCH 007/116] config use_shard_memory for fcos_reader to speed up
data processing (#7746)
---
configs/fcos/_base_/fcos_reader.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/fcos/_base_/fcos_reader.yml b/configs/fcos/_base_/fcos_reader.yml
index 8f0016125eb..4f4d4feffbf 100644
--- a/configs/fcos/_base_/fcos_reader.yml
+++ b/configs/fcos/_base_/fcos_reader.yml
@@ -16,6 +16,7 @@ TrainReader:
batch_size: 2
shuffle: True
drop_last: True
+ use_shared_memory: True
EvalReader:
From ee0d9a6b503ab7ff3c484a4328a121de18504bb9 Mon Sep 17 00:00:00 2001
From: wangguanzhong
Date: Tue, 14 Feb 2023 10:33:33 +0800
Subject: [PATCH 008/116] fix post quant (#7750)
---
ppdet/engine/trainer.py | 2 ++
ppdet/slim/__init__.py | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py
index 98da6c47772..ae0e21d8ea4 100644
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -72,6 +72,8 @@ def __init__(self, cfg, mode='train'):
self.amp_level = self.cfg.get('amp_level', 'O1')
self.custom_white_list = self.cfg.get('custom_white_list', None)
self.custom_black_list = self.cfg.get('custom_black_list', None)
+ if 'slim' in cfg and cfg['slim_type'] == 'PTQ':
+ self.cfg['TestDataset'] = create('TestDataset')()
# build data loader
capital_mode = self.mode.capitalize()
diff --git a/ppdet/slim/__init__.py b/ppdet/slim/__init__.py
index 7d75082b2b3..712919002ff 100644
--- a/ppdet/slim/__init__.py
+++ b/ppdet/slim/__init__.py
@@ -83,9 +83,9 @@ def build_slim_model(cfg, slim_cfg, mode='train'):
load_config(slim_cfg)
load_pretrain_weight(model, cfg.weights)
slim = create(cfg.slim)
+ cfg['slim_type'] = cfg.slim
cfg['slim'] = slim
cfg['model'] = slim(model)
- cfg['slim_type'] = cfg.slim
elif slim_load_cfg['slim'] == 'UnstructuredPruner':
load_config(slim_cfg)
slim = create(cfg.slim)
From 355c9bc7d5ee2930aff40a9dd7d6bad7f52e88f6 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Tue, 14 Feb 2023 14:00:47 +0800
Subject: [PATCH 009/116] [TIPC] diable ppyoloe_vit ppyoloe+ ppyoloe_r sod
train benchmark (#7745)
---
.../ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt | 2 +-
.../ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt | 2 +-
.../ppyoloe_plus_sod_crn_l_80e_coco_train_infer_python.txt | 2 +-
.../ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt b/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt
index be02ebb9789..62ee907afce 100644
--- a/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt
+++ b/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt
@@ -49,7 +49,7 @@ inference:./deploy/python/infer.py
--save_log_path:null
--run_benchmark:False
--trt_max_shape:1600
-===========================train_benchmark_params==========================
+===========================disable_train_benchmark==========================
batch_size:8
fp_items:fp32|fp16
epoch:1
diff --git a/test_tipc/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt b/test_tipc/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt
index 13d5cc45689..92d2d76f89d 100644
--- a/test_tipc/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt
+++ b/test_tipc/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt
@@ -49,7 +49,7 @@ inference:./deploy/python/infer.py
--save_log_path:null
--run_benchmark:False
null:null
-===========================train_benchmark_params==========================
+===========================disable_train_benchmark==========================
batch_size:2
fp_items:fp32
epoch:5
diff --git a/test_tipc/configs/smalldet/ppyoloe_plus_sod_crn_l_80e_coco_train_infer_python.txt b/test_tipc/configs/smalldet/ppyoloe_plus_sod_crn_l_80e_coco_train_infer_python.txt
index d0ea33a3989..ae1081f1193 100644
--- a/test_tipc/configs/smalldet/ppyoloe_plus_sod_crn_l_80e_coco_train_infer_python.txt
+++ b/test_tipc/configs/smalldet/ppyoloe_plus_sod_crn_l_80e_coco_train_infer_python.txt
@@ -49,7 +49,7 @@ inference:./deploy/python/infer.py
--save_log_path:null
--run_benchmark:False
--trt_max_shape:1600
-===========================train_benchmark_params==========================
+===========================disable_train_benchmark==========================
batch_size:8
fp_items:fp32|fp16
epoch:1
diff --git a/test_tipc/configs/vitdet/ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt b/test_tipc/configs/vitdet/ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt
index c9e962d9e5f..f6882611741 100644
--- a/test_tipc/configs/vitdet/ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt
+++ b/test_tipc/configs/vitdet/ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt
@@ -49,7 +49,7 @@ inference:./deploy/python/infer.py
--save_log_path:null
--run_benchmark:False
--trt_max_shape:1600
-===========================train_benchmark_params==========================
+===========================disable_train_benchmark==========================
batch_size:2
fp_items:fp32|fp16
epoch:1
From 166ebba458b271041b17732a1f0a12e833c4c190 Mon Sep 17 00:00:00 2001
From: wangguanzhong
Date: Tue, 14 Feb 2023 15:56:19 +0800
Subject: [PATCH 010/116] update release 2.6 doc, test=document_fix (#7754)
---
docs/CHANGELOG.md | 32 ++++++++++++++++++++++++++++++
docs/CHANGELOG_en.md | 38 ++++++++++++++++++++++++++++++++++++
docs/MODEL_ZOO_cn.md | 8 ++++++++
docs/MODEL_ZOO_en.md | 8 ++++++++
docs/tutorials/INSTALL.md | 9 +++++----
docs/tutorials/INSTALL_cn.md | 11 ++++++-----
6 files changed, 97 insertions(+), 9 deletions(-)
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 86b1bf0f746..0253bdd7c3d 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -4,6 +4,38 @@
## 最新版本信息
+### 2.6(02.15/2023)
+
+- 特色模型
+ - 发布旋转框检测模型PP-YOLOE-R:Anchor-free旋转框检测SOTA模型,精度速度双高、云边一体,s/m/l/x四个模型适配不用算力硬件、部署友好,避免使用特殊算子,能够轻松使用TensorRT加速;
+ - 发布小目标检测模型PP-YOLOE-SOD:基于切图的端到端检测方案、基于原图的检测模型,精度达VisDrone开源最优;
+ - 发布密集检测模型:基于PP-YOLOE+的密集检测算法,SKU数据集检测精度60.3,达到开源最优
+- 前沿算法
+ - YOLO家族新增前沿算法YOLOv8,更新YOLOv6-v3.0
+ - 新增目标检测算法DINO,YOLOF
+ - 新增ViTDet系列检测模型,PP-YOLOE+ViT_base, Mask RCNN + ViT_base, Mask RCNN + ViT_large
+ - 新增多目标跟踪算法CenterTrack
+ - 新增旋转框检测算法FCOSR
+ - 新增实例分割算法QueryInst
+ - 新增3D关键点检测算法Metro3d
+ - 新增模型蒸馏算法FGD,LD,CWD,新增PP-YOLOE+模型蒸馏,精度提升1.1 mAP
+ - 新增半监督检测算法 DenseTeacher,并适配PP-YOLOE+
+ - 新增少样本迁移学习方案,包含Co-tuning,Contrastive learning两类算法
+- 场景能力
+ - PP-Human v2开源边缘端实时检测模型,精度45.7,Jetson AGX速度80FPS
+ - PP-Vehicle开源边缘端实时检测模型,精度53.5,Jetson AGX速度80FPS
+ - PP-Human v2,PP-Vehicle支持多路视频流部署能力,实现Jetson AGX 4路视频流端到端20FPS实时部署
+ - PP-Vehicle新增车辆压线检测和车辆逆行检测能力
+- 框架能力
+ - 功能新增
+ - 新增检测热力图可视化能力,适配FasterRCNN/MaskRCNN系列, PP-YOLOE系列, BlazeFace, SSD, RetinaNet
+ - 功能完善/Bug修复
+ - 支持python3.10版本
+ - EMA支持过滤不更新参数
+ - 简化PP-YOLOE architecture架构代码
+ - AdamW适配paddle2.4.1版本
+
+
### 2.5(08.26/2022)
- 特色模型
diff --git a/docs/CHANGELOG_en.md b/docs/CHANGELOG_en.md
index 15b8321e941..ac374b5d619 100644
--- a/docs/CHANGELOG_en.md
+++ b/docs/CHANGELOG_en.md
@@ -4,6 +4,44 @@ English | [简体中文](./CHANGELOG.md)
## Last Version Information
+### 2.6(02.15/2023)
+
+- Featured model
+
+ - Release rotated object detector PP-YOLOE-R:SOTA Anchor-free rotated object detection model with high accuracy and efficiency. It has a series of models, named s/m/l/x, for cloud and edge devices and avoids using special operators to be deployed friendly with TensorRT.
+ - Release small object detector PP-YOLOE-SOD: End-to-end detection pipeline based on sliced images and SOTA model on VisDrone based on original images.
+ - Release crowded object detector: Crowded object detection model with top accuracy on SKU dataset.
+
+- Functions in different scenarios
+
+ - Release real-time object detection model on edge device in PP-Human v2. The model reaches 45.7mAP and 80FPS on Jetson AGX
+ - Release real-time object detection model on edge device in PP-Vehicle. The model reaches 53.5mAP and 80FPS on Jetson AGX
+ - Support multi-stream deployment in PP-Human v2 and PP-Vehicle. Achieved 20FPS in 4-stream deployment on Jetson AGX
+ - Support retrograde and press line detection in PP-Vehicle
+
+- Cutting-edge algorithms
+
+ - Release YOLOv8 and YOLOv6 3.0 in YOLO Family
+ - Release object detection algorithm DINO, YOLOF
+ - Rich ViTDet series including PP-YOLOE+ViT_base, Mask RCNN + ViT_base, Mask RCNN + ViT_large
+ - Release MOT algorithm CenterTrack
+ - Release oriented object detection algorithm FCOSR
+ - Release instance segmentation algorithm QueryInst
+ - Release 3D keypoint detection algorithm Metro3d
+ - Release distillation algorithm FGD,LD,CWD and PP-YOLOE+ distillation with improvement of 1.1+ mAP
+ - Release SSOD algorithm DenseTeacher and adapt for PP-YOLOE+
+ - Release few shot finetuning algorithm, including Co-tuning and Contrastive learning
+
+- Framework capabilities
+
+ - New functions
+ - Release Grad-CAM for heatmap visualization. Support Faster RCNN, Mask RCNN, PP-YOLOE, BlazeFace, SSD, RetinaNet.
+ - Improvement and fixes
+ - Support python 3.10
+ - Fix EMA for no-grad parameters
+ - Simplify PP-YOLOE architecture
+ - Support AdamW for Paddle 2.4.1
+
### 2.5(08.26/2022)
- Featured model
diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md
index 24973af0c42..9cf3712e251 100644
--- a/docs/MODEL_ZOO_cn.md
+++ b/docs/MODEL_ZOO_cn.md
@@ -139,6 +139,10 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型
请参考[Vision Transformer](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/vitdet)
+### DINO
+
+请参考[DINO](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dino)
+
### YOLOX
请参考[YOLOX](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolox)
@@ -162,6 +166,10 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型
请参考[SOLOv2](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/)
+### QueryInst
+
+请参考[QueryInst](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/queryinst)
+
## [PaddleYOLO](https://github.com/PaddlePaddle/PaddleYOLO)
diff --git a/docs/MODEL_ZOO_en.md b/docs/MODEL_ZOO_en.md
index 80638d76c8b..0ced3c57e9e 100644
--- a/docs/MODEL_ZOO_en.md
+++ b/docs/MODEL_ZOO_en.md
@@ -138,6 +138,10 @@ Please refer to [Sparse R-CNN](https://github.com/PaddlePaddle/PaddleDetection/t
Please refer to [Vision Transformer](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/vitdet)
+### DINO
+
+Please refer to [DINO](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dino)
+
### YOLOX
Please refer to [YOLOX](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolox)
@@ -161,6 +165,10 @@ Please refer to [Cascade R-CNN](https://github.com/PaddlePaddle/PaddleDetection/
Please refer to [SOLOv2](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/)
+### QueryInst
+
+Please refer to [QueryInst](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/queryinst)
+
## [PaddleYOLO](https://github.com/PaddlePaddle/PaddleYOLO)
diff --git a/docs/tutorials/INSTALL.md b/docs/tutorials/INSTALL.md
index c5af2092d84..7f067b98b95 100644
--- a/docs/tutorials/INSTALL.md
+++ b/docs/tutorials/INSTALL.md
@@ -12,7 +12,7 @@ For general information about PaddleDetection, please see [README.md](https://gi
- PaddlePaddle 2.2
- OS 64 bit
-- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64 bit
+- Python 3(3.5.1+/3.6/3.7/3.8/3.9/3.10),64 bit
- pip/pip3(9.0.1+), 64 bit
- CUDA >= 10.2
- cuDNN >= 7.6
@@ -22,7 +22,8 @@ Dependency of PaddleDetection and PaddlePaddle:
| PaddleDetection version | PaddlePaddle version | tips |
| :----------------: | :---------------: | :-------: |
-| develop | develop | Dygraph mode is set as default |
+| develop | >= 2.3.2 | Dygraph mode is set as default |
+| release/2.6 | >= 2.3.2 | Dygraph mode is set as default |
| release/2.5 | >= 2.2.2 | Dygraph mode is set as default |
| release/2.4 | >= 2.2.2 | Dygraph mode is set as default |
| release/2.3 | >= 2.2.0rc | Dygraph mode is set as default |
@@ -42,10 +43,10 @@ Dependency of PaddleDetection and PaddlePaddle:
```
# CUDA10.2
-python -m pip install paddlepaddle-gpu==2.2.2 -i https://mirror.baidu.com/pypi/simple
+python -m pip install paddlepaddle-gpu==2.3.2 -i https://mirror.baidu.com/pypi/simple
# CPU
-python -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
+python -m pip install paddlepaddle==2.3.2 -i https://mirror.baidu.com/pypi/simple
```
- For more CUDA version or environment to quick install, please refer to the [PaddlePaddle Quick Installation document](https://www.paddlepaddle.org.cn/install/quick)
diff --git a/docs/tutorials/INSTALL_cn.md b/docs/tutorials/INSTALL_cn.md
index 970b5393318..9b0313c5254 100644
--- a/docs/tutorials/INSTALL_cn.md
+++ b/docs/tutorials/INSTALL_cn.md
@@ -7,9 +7,9 @@
## 环境要求
-- PaddlePaddle 2.2
+- PaddlePaddle 2.3.2
- OS 64位操作系统
-- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64位版本
+- Python 3(3.5.1+/3.6/3.7/3.8/3.9/3.10),64位版本
- pip/pip3(9.0.1+),64位版本
- CUDA >= 10.2
- cuDNN >= 7.6
@@ -18,7 +18,8 @@ PaddleDetection 依赖 PaddlePaddle 版本关系:
| PaddleDetection版本 | PaddlePaddle版本 | 备注 |
| :------------------: | :---------------: | :-------: |
-| develop | develop | 默认使用动态图模式 |
+| develop | >=2.3.2 | 默认使用动态图模式 |
+| release/2.6 | >=2.3.2 | 默认使用动态图模式 |
| release/2.5 | >= 2.2.2 | 默认使用动态图模式 |
| release/2.4 | >= 2.2.2 | 默认使用动态图模式 |
| release/2.3 | >= 2.2.0rc | 默认使用动态图模式 |
@@ -36,10 +37,10 @@ PaddleDetection 依赖 PaddlePaddle 版本关系:
```
# CUDA10.2
-python -m pip install paddlepaddle-gpu==2.2.2 -i https://mirror.baidu.com/pypi/simple
+python -m pip install paddlepaddle-gpu==2.3.2 -i https://mirror.baidu.com/pypi/simple
# CPU
-python -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
+python -m pip install paddlepaddle==2.3.2 -i https://mirror.baidu.com/pypi/simple
```
- 更多CUDA版本或环境快速安装,请参考[PaddlePaddle快速安装文档](https://www.paddlepaddle.org.cn/install/quick)
- 更多安装方式例如conda或源码编译安装方法,请参考[PaddlePaddle安装文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/index_cn.html)
From 1af1ae6fa82ce161cd438db2f94a9d181d2e6b78 Mon Sep 17 00:00:00 2001
From: wangguanzhong
Date: Tue, 14 Feb 2023 18:05:53 +0800
Subject: [PATCH 011/116] fix tipc benchmark (#7761)
---
test_tipc/benchmark_train.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index 96692c0e81b..bb2324f00c5 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -189,8 +189,8 @@ if [[ ${model_name} =~ "higherhrnet" ]] || [[ ${model_name} =~ "hrnet" ]] || [[
else
epoch=1
repeat=$(set_dynamic_epoch $device_num $repeat)
- eval "sed -i '10c\ repeat: ${repeat}' configs/datasets/coco_detection.yml"
- eval "sed -i '10c\ repeat: ${repeat}' configs/datasets/coco_instance.yml"
+ eval "sed -i '10c\ repeat: ${repeat}' configs/datasets/coco_detection.yml"
+ eval "sed -i '10c\ repeat: ${repeat}' configs/datasets/coco_instance.yml"
eval "sed -i '10c\ repeat: ${repeat}' configs/datasets/mot.yml"
fi
From 1d83897892285e6d3e8ea6ed2e6bcff7c0bef6bc Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Wed, 15 Feb 2023 18:30:56 +0800
Subject: [PATCH 012/116] fix some configs (#7769)
---
configs/fcos/fcos_r50_fpn_iou_1x_coco.yml | 3 ++-
configs/fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml | 1 +
configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml | 1 +
configs/slim/distill/ppyoloe_plus_distill_l_distill_m.yml | 2 ++
configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml | 2 ++
5 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/configs/fcos/fcos_r50_fpn_iou_1x_coco.yml b/configs/fcos/fcos_r50_fpn_iou_1x_coco.yml
index 943c5bc04de..18c33cf8e22 100644
--- a/configs/fcos/fcos_r50_fpn_iou_1x_coco.yml
+++ b/configs/fcos/fcos_r50_fpn_iou_1x_coco.yml
@@ -12,7 +12,7 @@ weights: output/fcos_r50_fpn_iou_1x_coco/model_final
TrainReader:
sample_transforms:
- Decode: {}
- - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 1}
+ - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- RandomFlip: {}
batch_transforms:
@@ -26,6 +26,7 @@ TrainReader:
batch_size: 2
shuffle: True
drop_last: True
+ use_shared_memory: True
EvalReader:
diff --git a/configs/fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml b/configs/fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml
index 3f6a327db26..d53ea17f57b 100644
--- a/configs/fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml
+++ b/configs/fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml
@@ -25,6 +25,7 @@ TrainReader:
batch_size: 2
shuffle: True
drop_last: True
+ use_shared_memory: True
EvalReader:
diff --git a/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml b/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml
index 85a9781359d..0afdbbc5be6 100644
--- a/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml
+++ b/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml
@@ -25,6 +25,7 @@ TrainReader:
batch_size: 2
shuffle: True
drop_last: True
+ use_shared_memory: True
epoch: 24
diff --git a/configs/slim/distill/ppyoloe_plus_distill_l_distill_m.yml b/configs/slim/distill/ppyoloe_plus_distill_l_distill_m.yml
index dbef4902e90..0a5bfcd29cc 100644
--- a/configs/slim/distill/ppyoloe_plus_distill_l_distill_m.yml
+++ b/configs/slim/distill/ppyoloe_plus_distill_l_distill_m.yml
@@ -43,6 +43,8 @@ DistillPPYOLOELoss: # L -> M
loss_weight: {'logits': 4.0, 'feat': 1.0}
logits_distill: True
logits_loss_weight: {'class': 1.0, 'iou': 2.5, 'dfl': 0.5}
+ logits_ld_distill: True
+ logits_ld_params: {'weight': 20000, 'T': 10}
feat_distill: True
feat_distiller: 'fgd' # ['cwd', 'fgd', 'pkd', 'mgd', 'mimic']
feat_distill_place: 'neck_feats'
diff --git a/configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml b/configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml
index 3f78deaefe7..55d3c4c9f08 100644
--- a/configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml
+++ b/configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml
@@ -43,6 +43,8 @@ DistillPPYOLOELoss: # X -> L
loss_weight: {'logits': 4.0, 'feat': 1.0}
logits_distill: True
logits_loss_weight: {'class': 1.0, 'iou': 2.5, 'dfl': 0.5}
+ logits_ld_distill: True
+ logits_ld_params: {'weight': 20000, 'T': 10}
feat_distill: True
feat_distiller: 'fgd' # ['cwd', 'fgd', 'pkd', 'mgd', 'mimic']
feat_distill_place: 'neck_feats'
From e16ecd71559cfca6ef52916d2c9049f41fbdfc00 Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Wed, 15 Feb 2023 19:20:07 +0800
Subject: [PATCH 013/116] Add swin backbone dino configs and codes (#7764)
* add swin large dino, refine dino codes
* fix dino export
---
configs/dino/_base_/dino_reader.yml | 50 ++++++------
configs/dino/_base_/dino_swin.yml | 46 +++++++++++
configs/dino/_base_/optimizer_3x.yml | 16 ++++
.../_base_/faster_rcnn_swin_tiny_fpn.yml | 10 +--
ppdet/modeling/architectures/detr.py | 30 ++++---
ppdet/modeling/backbones/swin_transformer.py | 78 +++++++++++++++++--
.../modeling/transformers/dino_transformer.py | 5 +-
7 files changed, 176 insertions(+), 59 deletions(-)
create mode 100644 configs/dino/_base_/dino_swin.yml
create mode 100644 configs/dino/_base_/optimizer_3x.yml
diff --git a/configs/dino/_base_/dino_reader.yml b/configs/dino/_base_/dino_reader.yml
index c62a8054cf3..0fdf7a8985b 100644
--- a/configs/dino/_base_/dino_reader.yml
+++ b/configs/dino/_base_/dino_reader.yml
@@ -1,20 +1,20 @@
worker_num: 4
TrainReader:
sample_transforms:
- - Decode: {}
- - RandomFlip: {prob: 0.5}
- - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
- transforms2: [
- RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
- RandomSizeCrop: { min_size: 384, max_size: 600 },
- RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
- }
- - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- - NormalizeBox: {}
- - BboxXYXY2XYWH: {}
- - Permute: {}
+ - Decode: {}
+ - RandomFlip: {prob: 0.5}
+ - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+ transforms2: [
+ RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+ RandomSizeCrop: { min_size: 384, max_size: 600 },
+ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+ }
+ - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+ - NormalizeBox: {}
+ - BboxXYXY2XYWH: {}
+ - Permute: {}
batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 4
shuffle: true
drop_last: true
@@ -24,25 +24,21 @@ TrainReader:
EvalReader:
sample_transforms:
- - Decode: {}
- - Resize: {target_size: [800, 1333], keep_ratio: True}
- - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- - Permute: {}
+ - Decode: {}
+ - Resize: {target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+ - Permute: {}
batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
- shuffle: false
- drop_last: false
TestReader:
sample_transforms:
- - Decode: {}
- - Resize: {target_size: [800, 1333], keep_ratio: True}
- - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- - Permute: {}
+ - Decode: {}
+ - Resize: {target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+ - Permute: {}
batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
- shuffle: false
- drop_last: false
diff --git a/configs/dino/_base_/dino_swin.yml b/configs/dino/_base_/dino_swin.yml
new file mode 100644
index 00000000000..b2edb961ce5
--- /dev/null
+++ b/configs/dino/_base_/dino_swin.yml
@@ -0,0 +1,46 @@
+architecture: DETR
+# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
+hidden_dim: 256
+use_focal_loss: True
+
+DETR:
+ backbone: SwinTransformer
+ transformer: DINOTransformer
+ detr_head: DINOHead
+ post_process: DETRBBoxPostProcess
+
+SwinTransformer:
+ arch: 'swin_L_384' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
+ ape: false
+ drop_path_rate: 0.2
+ patch_norm: true
+ out_indices: [1, 2, 3]
+
+DINOTransformer:
+ num_queries: 900
+ position_embed_type: sine
+ num_levels: 4
+ nhead: 8
+ num_encoder_layers: 6
+ num_decoder_layers: 6
+ dim_feedforward: 2048
+ dropout: 0.0
+ activation: relu
+ pe_temperature: 10000
+ pe_offset: -0.5
+ num_denoising: 100
+ label_noise_ratio: 0.5
+ box_noise_scale: 1.0
+ learnt_init_query: True
+
+DINOHead:
+ loss:
+ name: DINOLoss
+ loss_coeff: {class: 1, bbox: 5, giou: 2}
+ aux_loss: True
+ matcher:
+ name: HungarianMatcher
+ matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRBBoxPostProcess:
+ num_top_queries: 300
diff --git a/configs/dino/_base_/optimizer_3x.yml b/configs/dino/_base_/optimizer_3x.yml
new file mode 100644
index 00000000000..3747999ae0b
--- /dev/null
+++ b/configs/dino/_base_/optimizer_3x.yml
@@ -0,0 +1,16 @@
+epoch: 36
+
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [33]
+ use_warmup: false
+
+OptimizerBuilder:
+ clip_grad_by_norm: 0.1
+ regularizer: false
+ optimizer:
+ type: AdamW
+ weight_decay: 0.0001
diff --git a/configs/faster_rcnn/_base_/faster_rcnn_swin_tiny_fpn.yml b/configs/faster_rcnn/_base_/faster_rcnn_swin_tiny_fpn.yml
index 6208600e324..c8aadef7234 100644
--- a/configs/faster_rcnn/_base_/faster_rcnn_swin_tiny_fpn.yml
+++ b/configs/faster_rcnn/_base_/faster_rcnn_swin_tiny_fpn.yml
@@ -1,4 +1,5 @@
architecture: FasterRCNN
+# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
FasterRCNN:
backbone: SwinTransformer
@@ -8,15 +9,12 @@ FasterRCNN:
bbox_post_process: BBoxPostProcess
SwinTransformer:
- embed_dim: 96
- depths: [2, 2, 6, 2]
- num_heads: [3, 6, 12, 24]
- window_size: 7
+ arch: 'swin_T_224'
ape: false
drop_path_rate: 0.1
patch_norm: true
- out_indices: [0,1,2,3]
- pretrained: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224.pdparams
+ out_indices: [0, 1, 2, 3]
+ pretrained: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
FPN:
out_channel: 256
diff --git a/ppdet/modeling/architectures/detr.py b/ppdet/modeling/architectures/detr.py
index 21379185b02..953c8f627a9 100644
--- a/ppdet/modeling/architectures/detr.py
+++ b/ppdet/modeling/architectures/detr.py
@@ -21,6 +21,7 @@
from ppdet.core.workspace import register, create
__all__ = ['DETR']
+# Deformable DETR, DINO use the same architecture as DETR
@register
@@ -31,8 +32,8 @@ class DETR(BaseArch):
def __init__(self,
backbone,
- transformer,
- detr_head,
+ transformer='DETRTransformer',
+ detr_head='DETRHead',
post_process='DETRBBoxPostProcess',
exclude_post_process=False):
super(DETR, self).__init__()
@@ -73,7 +74,13 @@ def _forward(self):
# DETR Head
if self.training:
- return self.detr_head(out_transformer, body_feats, self.inputs)
+ detr_losses = self.detr_head(out_transformer, body_feats,
+ self.inputs)
+ detr_losses.update({
+ 'loss': paddle.add_n(
+ [v for k, v in detr_losses.items() if 'log' not in k])
+ })
+ return detr_losses
else:
preds = self.detr_head(out_transformer, body_feats)
if self.exclude_post_process:
@@ -82,20 +89,11 @@ def _forward(self):
else:
bbox, bbox_num = self.post_process(
preds, self.inputs['im_shape'], self.inputs['scale_factor'])
- return bbox, bbox_num
+ output = {'bbox': bbox, 'bbox_num': bbox_num}
+ return output
def get_loss(self):
- losses = self._forward()
- losses.update({
- 'loss':
- paddle.add_n([v for k, v in losses.items() if 'log' not in k])
- })
- return losses
+ return self._forward()
def get_pred(self):
- bbox_pred, bbox_num = self._forward()
- output = {
- "bbox": bbox_pred,
- "bbox_num": bbox_num,
- }
- return output
+ return self._forward()
diff --git a/ppdet/modeling/backbones/swin_transformer.py b/ppdet/modeling/backbones/swin_transformer.py
index aa4311ff812..8a581b763d6 100644
--- a/ppdet/modeling/backbones/swin_transformer.py
+++ b/ppdet/modeling/backbones/swin_transformer.py
@@ -16,18 +16,70 @@
Ths copyright of microsoft/Swin-Transformer is as follows:
MIT License [see LICENSE for details]
"""
-
+import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.modeling.shape_spec import ShapeSpec
from ppdet.core.workspace import register, serializable
-import numpy as np
-
from .transformer_utils import DropPath, Identity
from .transformer_utils import add_parameter, to_2tuple
from .transformer_utils import ones_, zeros_, trunc_normal_
+__all__ = ['SwinTransformer']
+
+MODEL_cfg = {
+ # use 22kto1k finetune weights as default pretrained, can set by SwinTransformer.pretrained in config
+ 'swin_T_224': dict(
+ pretrain_img_size=224,
+ embed_dim=96,
+ depths=[2, 2, 6, 2],
+ num_heads=[3, 6, 12, 24],
+ window_size=7,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams',
+ ),
+ 'swin_S_224': dict(
+ pretrain_img_size=224,
+ embed_dim=96,
+ depths=[2, 2, 18, 2],
+ num_heads=[3, 6, 12, 24],
+ window_size=7,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/swin_small_patch4_window7_224_22kto1k_pretrained.pdparams',
+ ),
+ 'swin_B_224': dict(
+ pretrain_img_size=224,
+ embed_dim=128,
+ depths=[2, 2, 18, 2],
+ num_heads=[4, 8, 16, 32],
+ window_size=7,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/swin_base_patch4_window7_224_22kto1k_pretrained.pdparams',
+ ),
+ 'swin_L_224': dict(
+ pretrain_img_size=224,
+ embed_dim=192,
+ depths=[2, 2, 18, 2],
+ num_heads=[6, 12, 24, 48],
+ window_size=7,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/swin_large_patch4_window7_224_22kto1k_pretrained.pdparams',
+ ),
+ 'swin_B_384': dict(
+ pretrain_img_size=384,
+ embed_dim=128,
+ depths=[2, 2, 18, 2],
+ num_heads=[4, 8, 16, 32],
+ window_size=12,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/swin_base_patch4_window12_384_22kto1k_pretrained.pdparams',
+ ),
+ 'swin_L_384': dict(
+ pretrain_img_size=384,
+ embed_dim=192,
+ depths=[2, 2, 18, 2],
+ num_heads=[6, 12, 24, 48],
+ window_size=12,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/swin_large_patch4_window12_384_22kto1k_pretrained.pdparams',
+ ),
+}
+
class Mlp(nn.Layer):
def __init__(self,
@@ -273,7 +325,8 @@ def forward(self, x, mask_matrix):
pad_l = pad_t = 0
pad_r = (self.window_size - W % self.window_size) % self.window_size
pad_b = (self.window_size - H % self.window_size) % self.window_size
- x = F.pad(x, [0, pad_l, 0, pad_b, 0, pad_r, 0, pad_t])
+ x = F.pad(x, [0, pad_l, 0, pad_b, 0, pad_r, 0, pad_t],
+ data_format='NHWC')
_, Hp, Wp, _ = x.shape
# cyclic shift
@@ -350,7 +403,10 @@ def forward(self, x, H, W):
# padding
pad_input = (H % 2 == 1) or (W % 2 == 1)
if pad_input:
- x = F.pad(x, [0, 0, 0, W % 2, 0, H % 2])
+ # paddle F.pad default data_format is 'NCHW'
+ x = F.pad(x, [0, 0, 0, H % 2, 0, W % 2, 0, 0], data_format='NHWC')
+ H += H % 2
+ W += W % 2
x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C
x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C
@@ -495,6 +551,7 @@ def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
self.norm = None
def forward(self, x):
+ # TODO # export dynamic shape
B, C, H, W = x.shape
# assert [H, W] == self.img_size[:2], "Input image size ({H}*{W}) doesn't match model ({}*{}).".format(H, W, self.img_size[0], self.img_size[1])
if W % self.patch_size[1] != 0:
@@ -540,6 +597,7 @@ class SwinTransformer(nn.Layer):
"""
def __init__(self,
+ arch='swin_T_224',
pretrain_img_size=224,
patch_size=4,
in_chans=3,
@@ -560,10 +618,16 @@ def __init__(self,
frozen_stages=-1,
pretrained=None):
super(SwinTransformer, self).__init__()
+ assert arch in MODEL_cfg.keys(), "Unsupported arch: {}".format(arch)
+ pretrain_img_size = MODEL_cfg[arch]['pretrain_img_size']
+ embed_dim = MODEL_cfg[arch]['embed_dim']
+ depths = MODEL_cfg[arch]['depths']
+ num_heads = MODEL_cfg[arch]['num_heads']
+ window_size = MODEL_cfg[arch]['window_size']
+ if pretrained is None:
+ pretrained = MODEL_cfg[arch]['pretrained']
- self.pretrain_img_size = pretrain_img_size
self.num_layers = len(depths)
- self.embed_dim = embed_dim
self.ape = ape
self.patch_norm = patch_norm
self.out_indices = out_indices
diff --git a/ppdet/modeling/transformers/dino_transformer.py b/ppdet/modeling/transformers/dino_transformer.py
index 040e1807a5d..8050ef94430 100644
--- a/ppdet/modeling/transformers/dino_transformer.py
+++ b/ppdet/modeling/transformers/dino_transformer.py
@@ -21,7 +21,6 @@
from __future__ import division
from __future__ import print_function
-import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
@@ -37,7 +36,7 @@
bias_init_with_prob)
from .utils import (_get_clones, get_valid_ratio,
get_contrastive_denoising_training_group,
- get_sine_pos_embed, inverse_sigmoid)
+ get_sine_pos_embed)
__all__ = ['DINOTransformer']
@@ -502,7 +501,7 @@ def _get_encoder_input(self, feats, pad_mask=None):
# [num_levels, 2]
spatial_shapes = paddle.to_tensor(
paddle.stack(spatial_shapes).astype('int64'))
- # [l], 每一个level的起始index
+ # [l] start index of each level
level_start_index = paddle.concat([
paddle.zeros(
[1], dtype='int64'), spatial_shapes.prod(1).cumsum(0)[:-1]
From 0ba00ddbd9300b359d062ccad1f55a74098d1ca4 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Wed, 15 Feb 2023 19:33:29 +0800
Subject: [PATCH 014/116] update pipeline docs; vehicle illegal;rtsp;jetson
(#7760)
* update pipeline docs; vehicle illegal;rtsp;jetson
* update docs
---
deploy/pipeline/README.md | 16 ++++++-
deploy/pipeline/README_en.md | 19 ++++++--
.../docs/tutorials/PPHuman_QUICK_STARTED.md | 30 +++++++++---
.../tutorials/PPHuman_QUICK_STARTED_en.md | 38 +++++++++++----
.../docs/tutorials/PPVehicle_QUICK_STARTED.md | 36 +++++++++++---
.../tutorials/PPVehicle_QUICK_STARTED_en.md | 48 ++++++++++++++-----
.../docs/tutorials/ppvehicle_mot_en.md | 2 +-
7 files changed, 148 insertions(+), 41 deletions(-)
diff --git a/deploy/pipeline/README.md b/deploy/pipeline/README.md
index 48ab1c11cc9..db82080fac5 100644
--- a/deploy/pipeline/README.md
+++ b/deploy/pipeline/README.md
@@ -14,7 +14,8 @@
## 📣 近期更新
-- 🔥🔥🔥 **2022.8.20:PP-Vehicle首发,提供车牌识别、车辆属性分析(颜色、车型)、车流量统计以及违章检测四大功能,完善的文档教程支持高效完成二次开发与模型优化**
+- 🔥🔥🔥 2023.02.15: Jetson部署专用小模型PP-YOLOE-PLUS-Tiny发布,可在AGX平台实现4路视频流实时预测;PP-Vehicle发布违法分析功能车辆逆行和压车道线。
+- **2022.8.20:PP-Vehicle首发,提供车牌识别、车辆属性分析(颜色、车型)、车流量统计以及违章检测四大功能,完善的文档教程支持高效完成二次开发与模型优化**
- **2022.7.13:PP-Human v2发布,新增打架、打电话、抽烟、闯入四大行为识别,底层算法性能升级,覆盖行人检测、跟踪、属性三类核心算法能力,提供保姆级全流程开发及模型优化策略**
- 2022.4.18:新增PP-Human全流程实战教程, 覆盖训练、部署、动作类型扩展等内容,AIStudio项目请见[链接](https://aistudio.baidu.com/aistudio/projectdetail/3842982)
- 2022.4.10:新增PP-Human范例,赋能社区智能精细化管理, AIStudio快速上手教程[链接](https://aistudio.baidu.com/aistudio/projectdetail/3679564)
@@ -28,7 +29,7 @@
| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- |
| **跨镜跟踪(ReID)** | 超强性能:针对目标遮挡、完整度、模糊度等难点特殊优化,实现mAP 98.8、1.5ms/人 |
|
| **属性分析** | 兼容多种数据格式:支持图片、视频、在线视频流输入
高性能:融合开源数据集与企业真实数据进行训练,实现mAP 95.4、2ms/人
支持26种属性:性别、年龄、眼镜、上衣、鞋子、帽子、背包等26种高频属性 |
|
-| **行为识别** | 功能丰富:支持摔倒、打架、抽烟、打电话、人员闯入五种高频异常行为识别
鲁棒性强:对光照、视角、背景环境无限制
性能高:与视频识别技术相比,模型计算量大幅降低,支持本地化与服务化快速部署
训练速度快:仅需15分钟即可产出高精度行为识别模型 |
|
+| **行为识别(包含摔倒、打架、抽烟、打电话、人员闯入)** | 功能丰富:支持摔倒、打架、抽烟、打电话、人员闯入五种高频异常行为识别
鲁棒性强:对光照、视角、背景环境无限制
性能高:与视频识别技术相比,模型计算量大幅降低,支持本地化与服务化快速部署
训练速度快:仅需15分钟即可产出高精度行为识别模型 |
|
| **人流量计数**
**轨迹记录** | 简洁易用:单个参数即可开启人流量计数与轨迹记录功能 |
|
### PP-Vehicle
@@ -39,6 +40,8 @@
| **车辆属性分析** | 支持多种车型、颜色类别识别
使用更强力的Backbone模型PP-HGNet、PP-LCNet,精度高、速度快。识别精度: 90.81 |
|
| **违章检测** | 简单易用:一行命令即可实现违停检测,自定义设置区域
检测、跟踪效果好,可实现违停车辆车牌识别 |
|
| **车流量计数** | 简单易用:一行命令即可开启功能,自定义出入位置
可提供目标跟踪轨迹显示,统计准确度高 |
|
+| **违法分析-车辆逆行** | 简单易用:一行命令即可开启功能
车道线分割使用高精度模型PP-LIteSeg |
|
+| **违法分析-压车道线** | 简单易用:一行命令即可开启功能
车道线分割使用高精度模型PP-LIteSeg |
|
## 🗳 模型库
@@ -51,8 +54,10 @@
|:---------:|:---------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------:|
| 行人检测(高精度) | 25.1ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | 182M |
| 行人检测(轻量级) | 16.2ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| 行人检测(超轻量级) | 10ms(Jetson AGX) | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.tar.gz) | 17M |
| 行人跟踪(高精度) | 31.8ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | 182M |
| 行人跟踪(轻量级) | 21.0ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| 行人跟踪(超轻量级) | 13.2ms(Jetson AGX) | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.tar.gz) | 17M |
| 跨镜跟踪(REID) | 单人1.5ms | [REID](https://bj.bcebos.com/v1/paddledet/models/pipeline/reid_model.zip) | REID:92M |
| 属性识别(高精度) | 单人8.5ms | [目标检测](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip)
[属性识别](https://bj.bcebos.com/v1/paddledet/models/pipeline/strongbaseline_r50_30e_pa100k.zip) | 目标检测:182M
属性识别:86M |
| 属性识别(轻量级) | 单人7.1ms | [目标检测](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip)
[属性识别](https://bj.bcebos.com/v1/paddledet/models/pipeline/strongbaseline_r50_30e_pa100k.zip) | 目标检测:182M
属性识别:86M |
@@ -76,10 +81,13 @@
| :---------: | :-------: | :------: |:------: |
| 车辆检测(高精度) | 25.7ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) | 182M |
| 车辆检测(轻量级) | 13.2ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) | 27M |
+| 车辆检测(超轻量级) | 10ms(Jetson AGX) | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.tar.gz) | 17M |
| 车辆跟踪(高精度) | 40ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) | 182M |
| 车辆跟踪(轻量级) | 25ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) | 27M |
+| 车辆跟踪(超轻量级) | 13.2ms(Jetson AGX) | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.tar.gz) | 17M |
| 车牌识别 | 4.68ms | [车牌检测](https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_det_infer.tar.gz)
[车牌字符识别](https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_rec_infer.tar.gz) | 车牌检测:3.9M
车牌字符识别: 12M |
| 车辆属性 | 7.31ms | [车辆属性](https://bj.bcebos.com/v1/paddledet/models/pipeline/vehicle_attribute_model.zip) | 7.2M |
+| 车道线检测 | 47ms | [车道线模型](https://bj.bcebos.com/v1/paddledet/models/pipeline/pp_lite_stdc2_bdd100k.zip) | 47M |
点击模型方案中的模型即可下载指定模型,下载后解压存放至`./output_inference`目录中
@@ -147,6 +155,10 @@
- [快速开始](docs/tutorials/ppvehicle_press.md)
+- [二次开发教程]
+
#### 车辆逆行
- [快速开始](docs/tutorials/ppvehicle_retrograde.md)
+
+- [二次开发教程]
diff --git a/deploy/pipeline/README_en.md b/deploy/pipeline/README_en.md
index 02a8f7ae447..3fdf086fd89 100644
--- a/deploy/pipeline/README_en.md
+++ b/deploy/pipeline/README_en.md
@@ -12,7 +12,8 @@
## 📣 Updates
-- 🔥🔥🔥 **2022.8.20:PP-Vehicle was first launched with four major toolbox for vehicle analysis,and it also provide detailed documentation for user to train with their own datas and model optimize.**
+- 🔥🔥🔥 PP-YOLOE-PLUS-Tiny was launched for Jetson deploy, which has achieved 20fps while four rtsp streams work at the same time; PP-Vehicle was launched with retrograde and lane line press.
+- 🔥 **2022.8.20:PP-Vehicle was first launched with four major toolbox for vehicle analysis,and it also provide detailed documentation for user to train with their own datas and model optimize.**
- 🔥 2022.7.13:PP-Human v2 launched with a full upgrade of four industrial features: behavior analysis, attributes recognition, visitor traffic statistics and ReID. It provides a strong core algorithm for pedestrian detection, tracking and attribute analysis with a simple and detailed development process and model optimization strategy.
- 2022.4.18: Add PP-Human practical tutorials, including training, deployment, and action expansion. Details for AIStudio project please see [Link](https://aistudio.baidu.com/aistudio/projectdetail/3842982)
@@ -41,7 +42,8 @@
| **Vehicle Attributes** | Identify 10 vehicle colors and 9 models
More powerfull backbone: PP-HGNet/PP-LCNet, with higher accuracy and faster speed
accuracy of model: 90.81
|
|
| **Illegal Parking** | Easy to use with one line command, and define the illegal area by yourself
Get the license of illegal car
|
|
| **in-out counting** | Easy to use with one line command, and define the in-out line by yourself
Target route visualize with high tracking performance |
|
-
+| **vehicle retrograde** | Easy to use with one line command
High precision Segmetation model PP-LiteSeg |
|
+| **vehicle press line** | Easy to use with one line command
High precision Segmetation model PP-LiteSeg |
|
## 🗳 Model Zoo
@@ -52,8 +54,10 @@
|:--------------------------------------:|:--------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------:|
| Pedestrian detection (high precision) | 25.1ms | [Multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | 182M |
| Pedestrian detection (lightweight) | 16.2ms | [Multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| Pedestrian detection (super lightweight) | 10ms(Jetson AGX) | [Multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.tar.gz) | 17M |
| Pedestrian tracking (high precision) | 31.8ms | [Multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | 182M |
| Pedestrian tracking (lightweight) | 21.0ms | [Multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| Pedestrian tracking(super lightweight) | 13.2ms(Jetson AGX) | [Multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.tar.gz) | 17M |
| MTMCT(REID) | Single Person 1.5ms | [REID](https://bj.bcebos.com/v1/paddledet/models/pipeline/reid_model.zip) | REID:92M |
| Attribute recognition (high precision) | Single person8.5ms | [Object detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip)
[Attribute recognition](https://bj.bcebos.com/v1/paddledet/models/pipeline/strongbaseline_r50_30e_pa100k.zip) | Object detection:182M
Attribute recognition:86M |
| Attribute recognition (lightweight) | Single person 7.1ms | [Object detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip)
[Attribute recognition](https://bj.bcebos.com/v1/paddledet/models/pipeline/strongbaseline_r50_30e_pa100k.zip) | Object detection:182M
Attribute recognition:86M |
@@ -71,11 +75,14 @@
| Task | End-to-End Speed(ms) | Model | Size |
|:--------------------------------------:|:--------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------:|
| Vehicle detection (high precision) | 25.7ms | [object detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) | 182M |
-| Vehicle detection (lightweight) | 13.2ms | [object detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) | 27M |
+| Vehicle detection (lightweight) | 13.2ms | [object detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) | 27M |
+| Vehicle detection (super lightweight) | 10ms(Jetson AGX) | [object detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.tar.gz) | 17M |
| Vehicle tracking (high precision) | 40ms | [multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) | 182M |
| Vehicle tracking (lightweight) | 25ms | [multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| Vehicle tracking (super lightweight) | 13.2ms(Jetson AGX) | [multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.tar.gz) | 17M |
| Plate Recognition | 4.68ms | [plate detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_det_infer.tar.gz)
[plate recognition](https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_rec_infer.tar.gz) | Plate detection:3.9M
Plate recognition:12M |
-| Vehicle attribute | 7.31ms | [attribute recognition](https://bj.bcebos.com/v1/paddledet/models/pipeline/vehicle_attribute_model.zip) | 7.2M |
+| Vehicle attribute | 7.31ms | [attribute recognition](https://bj.bcebos.com/v1/paddledet/models/pipeline/vehicle_attribute_model.zip) | 7.2M |
+| Lane line Segmentation | 47ms | [Lane line Segmentation](https://bj.bcebos.com/v1/paddledet/models/pipeline/pp_lite_stdc2_bdd100k.zip) | 47M |
@@ -145,6 +152,10 @@ Click to download the model, then unzip and save it in the `. /output_inference`
- [A quick start](docs/tutorials/ppvehicle_press_en.md)
+- [Customized development tutorials]
+
#### Vehicle Retrograde
- [A quick start](docs/tutorials/ppvehicle_retrograde_en.md)
+
+- [Customized development tutorials]
diff --git a/deploy/pipeline/docs/tutorials/PPHuman_QUICK_STARTED.md b/deploy/pipeline/docs/tutorials/PPHuman_QUICK_STARTED.md
index 5628029869f..236287ddb51 100644
--- a/deploy/pipeline/docs/tutorials/PPHuman_QUICK_STARTED.md
+++ b/deploy/pipeline/docs/tutorials/PPHuman_QUICK_STARTED.md
@@ -8,6 +8,8 @@
- [模型下载](#模型下载)
- [配置文件说明](#配置文件说明)
- [预测部署](#预测部署)
+ - [在线视频流](#在线视频流)
+ - [Jetson部署说明](#Jetson部署说明)
- [参数说明](#参数说明)
- [方案介绍](#方案介绍)
- [行人检测](#行人检测)
@@ -49,8 +51,10 @@ PP-Human提供了目标检测、属性识别、行为识别、ReID预训练模
| :---------: | :-------: | :------: |:------: |
| 行人检测(高精度) | 25.1ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | 182M |
| 行人检测(轻量级) | 16.2ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| 行人检测(超轻量级) | 10ms(Jetson AGX) | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.tar.gz) | 17M |
| 行人跟踪(高精度) | 31.8ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | 182M |
| 行人跟踪(轻量级) | 21.0ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| 行人跟踪(超轻量级) | 13.2ms(Jetson AGX) | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.tar.gz) | 17M |
| 跨镜跟踪(REID) | 单人1.5ms | [REID](https://bj.bcebos.com/v1/paddledet/models/pipeline/reid_model.zip) | REID:92M |
| 属性识别(高精度) | 单人8.5ms | [目标检测](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip)
[属性识别](https://bj.bcebos.com/v1/paddledet/models/pipeline/PPHGNet_small_person_attribute_954_infer.zip) | 目标检测:182M
属性识别:86M |
| 属性识别(轻量级) | 单人7.1ms | [目标检测](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip)
[属性识别](https://bj.bcebos.com/v1/paddledet/models/pipeline/PPLCNet_x1_0_person_attribute_945_infer.zip) | 目标检测:182M
属性识别:86M |
@@ -126,7 +130,10 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_pph
python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_pphuman.yml -o SKELETON_ACTION.enbale=True --video_file=test_video.mp4 --device=gpu
```
-3. rtsp推拉流
+### 在线视频流
+
+在线视频流解码功能基于opencv的capture函数,支持rtsp、rtmp格式。
+
- rtsp拉流预测
对rtsp拉流的支持,使用--rtsp RTSP [RTSP ...]参数指定一路或者多路rtsp视频流,如果是多路地址中间用空格隔开。(或者video_file后面的视频地址直接更换为rtsp流地址),示例如下:
@@ -147,19 +154,30 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/examples/infe
```
注:
1. rtsp推流服务基于 [rtsp-simple-server](https://github.com/aler9/rtsp-simple-server), 如使用推流功能请先开启该服务.
-2. rtsp推流如果模型处理速度跟不上会出现很明显的卡顿现象,建议跟踪模型使用ppyoloe_s版本,即修改配置中跟踪模型mot_ppyoloe_l_36e_pipeline.zip替换为mot_ppyoloe_s_36e_pipeline.zip。
+使用方法很简单,以linux平台为例:1)下载对应平台release包;2)解压后在命令行执行命令 `./rtsp-simple-server`即可,成功后进入服务开启状态就可以接收视频流了。
+2. rtsp推流如果模型处理速度跟不上会出现很明显的卡顿现象,建议跟踪模型使用ppyoloe_s或ppyoloe-plus-tiny版本,方式为修改配置中跟踪模型mot_ppyoloe_l_36e_pipeline.zip替换为mot_ppyoloe_s_36e_pipeline.zip。
### Jetson部署说明
由于Jetson平台算力相比服务器有较大差距,有如下使用建议:
-1. 模型选择轻量级版本,特别是跟踪模型,推荐使用`ppyoloe_s: https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip`
-2. 开启跟踪跳帧功能,推荐使用2或者3: `skip_frame_num: 3`
+1. 模型选择轻量级版本,我们最新提供了轻量级[PP-YOLOE-Plus Tiny模型](../../../../configs/pphuman/README.md),该模型在Jetson AGX上可以实现4路视频流20fps实时跟踪。
+2. 如果需进一步提升速度,建议开启跟踪跳帧功能,推荐使用2或者3: `skip_frame_num: 3`,该功能当前默认关闭。
+
+上述修改可以直接修改配置文件(推荐),也可以在命令行中修改(字段较长,不推荐)。
+
+PP-YOLOE-Plus Tiny模型在AGX平台不同功能开启时的速度如下:(跟踪人数为3人情况下,以属性为例,总耗时为跟踪13.3+5.2*3≈29ms)
-使用该推荐配置,在TX2平台上可以达到较高速率,经测试属性案例达到20fps。
+| 功能 | 平均每帧耗时(ms) | 运行帧率(fps) |
+|:----------|:----------|:----------|
+| 跟踪 | 13 | 77 |
+| 属性识别 | 29 | 34 |
+| 摔倒识别 | 64.5 | 15.5 |
+| 抽烟识别 | 68.8 | 14.5 |
+| 打电话识别 | 22.5 | 44.5 |
+| 打架识别 | 3.98 | 251 |
-可以直接修改配置文件(推荐),也可以在命令行中修改(字段较长,不推荐)。
### 参数说明
diff --git a/deploy/pipeline/docs/tutorials/PPHuman_QUICK_STARTED_en.md b/deploy/pipeline/docs/tutorials/PPHuman_QUICK_STARTED_en.md
index 8250f2341e0..cd717327105 100644
--- a/deploy/pipeline/docs/tutorials/PPHuman_QUICK_STARTED_en.md
+++ b/deploy/pipeline/docs/tutorials/PPHuman_QUICK_STARTED_en.md
@@ -8,6 +8,8 @@ English | [简体中文](PPHuman_QUICK_STARTED.md)
- [Model Download](#Model-Download)
- [Configuration](#Configuration)
- [Inference Deployment](#Inference-Deployment)
+ - [rtsp_stream](#rtsp_stream)
+ - [Nvidia_Jetson](#Nvidia_Jetson)
- [Parameters](#Parameters)
- [Solutions](#Solutions)
- [Pedestrian Detection](#edestrian-Detection)
@@ -49,8 +51,10 @@ PP-Human provides object detection, attribute recognition, behaviour recognition
|:--------------------------------------:|:--------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------:|
| Pedestrian Detection (high precision) | 25.1ms | [Multi-Object Tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | 182M |
| Pedestrian Detection (Lightweight) | 16.2ms | [Multi-Object Tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| Pedestrian detection (super lightweight) | 10ms(Jetson AGX) | [Multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.tar.gz) | 17M |
| Pedestrian Tracking (high precision) | 31.8ms | [Multi-Object Tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip) | 182M |
| Pedestrian Tracking (Lightweight) | 21.0ms | [Multi-Object Tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip) | 27M |
+| Pedestrian tracking(super lightweight) | 13.2ms(Jetson AGX) | [Multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/pphuman/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.tar.gz) | 17M |
| MTMCT(REID) | Single Person 1.5ms | [REID](https://bj.bcebos.com/v1/paddledet/models/pipeline/reid_model.zip) | REID:92M |
| Attribute Recognition (high precision) | Single Person 8.5ms | [Object Detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip)
[Attribute Recognition](https://bj.bcebos.com/v1/paddledet/models/pipeline/PPHGNet_small_person_attribute_954_infer.zip) | Object Detection:182M
Attribute Recogniton:86M |
| Attribute Recognition (Lightweight) | Single Person 7.1ms | [Object Detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip)
[Attribute Recogniton](https://bj.bcebos.com/v1/paddledet/models/pipeline/PPLCNet_x1_0_person_attribute_945_infer.zip) | Object Detection:182M
Attribute Recogniton:86M |
@@ -126,7 +130,10 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_pph
python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_pphuman.yml -o SKELETON_ACTION.enbale=True --video_file=test_video.mp4 --device=gpu
```
-3. rtsp push/pull stream
+### rtsp_stream
+
+The online stream decode based on opencv Capture function, normally support rtsp and rtmp.
+
- rtsp pull stream
For rtsp pull stream, use `--rtsp RTSP [RTSP ...]` parameter to specify one or more rtsp streams. Separate the multiple addresses with a space, or replace the video address directly after the video_file with the rtsp stream address), examples as follows
@@ -148,17 +155,29 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/examples/infe
```
Note:
1. rtsp push stream is based on [rtsp-simple-server](https://github.com/aler9/rtsp-simple-server), please enable this serving first.
-2. the output visualize will be frozen frequently if the model cost too much time, we suggest to use faster model like ppyoloe_s in tracking, this is simply replace mot_ppyoloe_l_36e_pipeline.zip with mot_ppyoloe_s_36e_pipeline.zip in model config yaml file.
+It's very easy to use: 1) download the [release package](https://github.com/aler9/rtsp-simple-server/releases) which is compatible with your workspace. 2) run command './rtsp-simple-server', which works as a rtsp server.
+2. the output visualize will be frozen frequently if the model cost too much time, we suggest to use faster model like ppyoloe_s or ppyoloe_plus_tiny in tracking, this is simply replace mot_ppyoloe_l_36e_pipeline.zip with mot_ppyoloe_s_36e_pipeline.zip in model config yaml file.
-### Jetson Deployment
+### Nvidia_Jetson
Due to the large gap in computing power of the Jetson platform compared to the server, we suggest:
-1. choose a lightweight model, especially for tracking model, `ppyoloe_s: https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip` is recommended
-2. For frame skipping of tracking; we recommend 2 or 3: `skip_frame_num: 3`
+1. choose a lightweight model, we provide a new model named [PP-YOLOE-Plus Tiny](../../../../configs/pphuman/README.md),which achieve 20fps with four rtsp streams work togather on Jetson AGX.
+2. For further speedup, you can set frame skipping of tracking; we recommend 2 or 3: `skip_frame_num: 3`
+
+PP-YOLOE-Plus Tiny module speed test data on AGX:(three people in video, for example of attribute,the whole time cost per frame is 13.3+5.2*3≈29ms)
+
+| module | time cost per frame(ms) | speed(fps) |
+|:----------|:----------|:----------|
+| tracking | 13 | 77 |
+| Attribute | 29 | 34 |
+| falldown | 64.5 | 15.5 |
+| smoking | 68.8 | 14.5 |
+| calling | 22.5 | 44.5 |
+| fighting | 3.98 | 251 |
+
-With this recommended configuration, it is possible to achieve higher speeds on the TX2 platform. It has been tested with attribute case, with speeds up to 20fps. The configuration file can be modified directly (recommended) or from the command line (not recommended due to its long fields).
### Parameters
@@ -172,8 +191,7 @@ With this recommended configuration, it is possible to achieve higher speeds on
| --rtsp | Option | rtsp video stream address, supports one or more simultaneous streams input |
| --camera_id | Option | The camera ID for prediction, default is -1 ( for no camera prediction, can be set to 0 - (number of cameras - 1) ), press `q` in the visualization interface during the prediction process to output the prediction result to: output/output.mp4 |
| --device | Option | Running device, options include `CPU/GPU/XPU`, and the default is `CPU`. |
-| --pushurl | Option | push the output video to rtsp stream, normaly start with `rtsp://`; this has higher priority than local video save, while this is set, pipeline will not save local visualize video, the default is "", means this will not work now.
- |
+| --pushurl | Option | push the output video to rtsp stream, normaly start with `rtsp://`; this has higher priority than local video save, while this is set, pipeline will not save local visualize video, the default is "", means this will not work now.|
| --output_dir | Option | The root directory for the visualization results, and the default is output/ |
| --run_mode | Option | For GPU, the default is paddle, with (paddle/trt_fp32/trt_fp16/trt_int8) as optional |
| --enable_mkldnn | Option | Whether to enable MKLDNN acceleration in CPU prediction, the default is False |
@@ -192,14 +210,14 @@ The overall solution for PP-Human v2 is shown in the graph below:
### Pedestrian detection
- Take PP-YOLOE L as the object detection model
-- For detailed documentation, please refer to [PP-YOLOE](... /... /... /... /configs/ppyoloe/) and [Multiple-Object-Tracking](pphuman_mot_en.md)
+- For detailed documentation, please refer to [PP-YOLOE](../../../../configs/ppyoloe/) and [Multiple-Object-Tracking](pphuman_mot_en.md)
### Pedestrian tracking
- Vehicle tracking by SDE solution
- Adopt PP-YOLOE L (high precision) and S (lightweight) for detection models
- Adopt the OC-SORT solution for racking module
-- Refer to [OC-SORT](... /... /... /... /configs/mot/ocsort) and [Multi-Object Tracking](pphuman_mot_en.md) for details
+- Refer to [OC-SORT](../../../../configs/mot/ocsort) and [Multi-Object Tracking](pphuman_mot_en.md) for details
### Multi-camera & multi-pedestrain tracking
diff --git a/deploy/pipeline/docs/tutorials/PPVehicle_QUICK_STARTED.md b/deploy/pipeline/docs/tutorials/PPVehicle_QUICK_STARTED.md
index 8b140d4729d..b131dfd09a0 100644
--- a/deploy/pipeline/docs/tutorials/PPVehicle_QUICK_STARTED.md
+++ b/deploy/pipeline/docs/tutorials/PPVehicle_QUICK_STARTED.md
@@ -8,6 +8,8 @@
- [模型下载](#模型下载)
- [配置文件说明](#配置文件说明)
- [预测部署](#预测部署)
+ - [在线视频流](#在线视频流)
+ - [Jetson部署说明](#Jetson部署说明)
- [参数说明](#参数说明)
- [方案介绍](#方案介绍)
- [车辆检测](#车辆检测)
@@ -50,11 +52,13 @@ PP-Vehicle提供了目标检测、属性识别、行为识别、ReID预训练模
| :---------: | :-------: | :------: |:------: |
| 车辆检测(高精度) | 25.7ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) | 182M |
| 车辆检测(轻量级) | 13.2ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) | 27M |
+| 车辆检测(超轻量级) | 10ms(Jetson AGX) | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.tar.gz) | 17M |
| 车辆跟踪(高精度) | 40ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) | 182M |
| 车辆跟踪(轻量级) | 25ms | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) | 27M |
+| 车辆跟踪(超轻量级) | 13.2ms(Jetson AGX) | [多目标跟踪](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.tar.gz) | 17M |
| 车牌识别 | 4.68ms | [车牌检测](https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_det_infer.tar.gz)
[车牌字符识别](https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_rec_infer.tar.gz) | 车牌检测:3.9M
车牌字符识别: 12M |
| 车辆属性 | 7.31ms | [车辆属性](https://bj.bcebos.com/v1/paddledet/models/pipeline/vehicle_attribute_model.zip) | 7.2M |
-
+| 车道线检测 | 47ms | [车道线模型](https://bj.bcebos.com/v1/paddledet/models/pipeline/pp_lite_stdc2_bdd100k.zip) | 47M |
下载模型后,解压至`./output_inference`文件夹。
@@ -131,7 +135,10 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/examples/infe
```
-3. rtsp推拉流
+### 在线视频流
+
+在线视频流解码功能基于opencv的capture函数,支持rtsp、rtmp格式。
+
- rtsp拉流预测
对rtsp拉流的支持,使用--rtsp RTSP [RTSP ...]参数指定一路或者多路rtsp视频流,如果是多路地址中间用空格隔开。(或者video_file后面的视频地址直接更换为rtsp流地址),示例如下:
@@ -152,18 +159,25 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/examples/infe
```
注:
1. rtsp推流服务基于 [rtsp-simple-server](https://github.com/aler9/rtsp-simple-server), 如使用推流功能请先开启该服务.
+使用方法很简单,以linux平台为例:1)下载对应平台release包;2)解压后在命令行执行命令 `./rtsp-simple-server`即可,成功后进入服务开启状态就可以接收视频流了。
2. rtsp推流如果模型处理速度跟不上会出现很明显的卡顿现象,建议跟踪模型使用ppyoloe_s版本,即修改配置中跟踪模型mot_ppyoloe_l_36e_pipeline.zip替换为mot_ppyoloe_s_36e_pipeline.zip。
### Jetson部署说明
由于Jetson平台算力相比服务器有较大差距,有如下使用建议:
-1. 模型选择轻量级版本,特别是跟踪模型,推荐使用`ppyoloe_s: https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip`
-2. 开启跟踪跳帧功能,推荐使用2或者3. `skip_frame_num: 3`
+1. 模型选择轻量级版本,我们最新提供了轻量级[PP-YOLOE-Plus Tiny模型](../../../../configs/ppvehicle/README.md),该模型在Jetson AGX上可以实现4路视频流20fps实时跟踪。
+2. 如果需进一步提升速度,建议开启跟踪跳帧功能,推荐使用2或者3: `skip_frame_num: 3`,该功能当前默认关闭。
-使用该推荐配置,在TX2平台上可以达到较高速率,经测试属性案例达到20fps。
+上述修改可以直接修改配置文件(推荐),也可以在命令行中修改(字段较长,不推荐)。
-可以直接修改配置文件(推荐),也可以在命令行中修改(字段较长,不推荐)。
+PP-YOLOE-Plus Tiny模型在AGX平台不同功能开启时的速度如下:(测试视频跟踪车辆为1个)
+
+| 功能 | 平均每帧耗时(ms) | 运行帧率(fps) |
+|:----------|:----------|:----------|
+| 跟踪 | 13 | 77 |
+| 属性识别 | 20.2 | 49.4 |
+| 车牌识别 | - | - |
### 参数说明
@@ -195,7 +209,7 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/examples/infe
PP-Vehicle 整体方案如下图所示:
-

+
@@ -220,3 +234,11 @@ PP-Vehicle 整体方案如下图所示:
### 违章停车识别
- 车辆跟踪模型使用高精度模型PP-YOLOE L,根据车辆的跟踪轨迹以及指定的违停区域判断是否违章停车,如果存在则展示违章停车车牌号。
- 详细文档参考[违章停车识别](ppvehicle_illegal_parking.md)
+
+### 违法分析-逆行
+- 违法分析-逆行,通过使用高精度分割模型PP-Seg,对车道线进行分割拟合,然后与车辆轨迹组合判断车辆行驶方向是否与道路方向一致。
+- 详细文档参考[违法分析-逆行](ppvehicle_retrograde.md)
+
+### 违法分析-压线
+- 违法分析-逆行,通过使用高精度分割模型PP-Seg,对车道线进行分割拟合,然后与车辆区域是否覆盖实线区域,进行压线判断。
+- 详细文档参考[违法分析-压线](ppvehicle_press.md)
diff --git a/deploy/pipeline/docs/tutorials/PPVehicle_QUICK_STARTED_en.md b/deploy/pipeline/docs/tutorials/PPVehicle_QUICK_STARTED_en.md
index ec720212b30..1abfd1f379a 100644
--- a/deploy/pipeline/docs/tutorials/PPVehicle_QUICK_STARTED_en.md
+++ b/deploy/pipeline/docs/tutorials/PPVehicle_QUICK_STARTED_en.md
@@ -8,6 +8,8 @@ English | [简体中文](PPVehicle_QUICK_STARTED.md)
- [Model Download](#Model-Download)
- [Configuration](#Configuration)
- [Inference Deployment](#Inference-Deployment)
+ - [rtsp_stream](#rtsp_stream)
+ - [Nvidia_Jetson](#Nvidia_Jetson)
- [Parameters](#Parameters)
- [Solutions](#Solutions)
- [Vehicle Detection](#Vehicle-Detection)
@@ -49,10 +51,13 @@ PP-Vehicle provides object detection, attribute recognition, behaviour recogniti
|:---------------------------------:|:----------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------:|
| Vehicle Detection(high precision) | 25.7ms | [Multi-Object Tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) | 182M |
| Vehicle Detection(Lightweight) | 13.2ms | [Multi-Object Tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) | 27M |
+| Vehicle detection (super lightweight) | 10ms(Jetson AGX) | [object detection](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.tar.gz) | 17M |
| Vehicle Tracking(high precision) | 40ms | [Multi-Object Tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) | 182M |
| Vehicle Tracking(Lightweight) | 25ms | [Multi-Object Tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) | 27M |
+| Vehicle tracking (super lightweight) | 13.2ms(Jetson AGX) | [multi-object tracking](https://bj.bcebos.com/v1/paddledet/models/pipeline/ppvehicle/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.tar.gz) | 17M |
| License plate recognition | 4.68ms | [License plate recognition](https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_det_infer.tar.gz)
[License plate character recognition](https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_rec_infer.tar.gz) | Vehicle Detection:3.9M
License plate character recognition: 12M |
| Vehicle Attribute Recognition | 7.31ms | [Vehicle Attribute](https://bj.bcebos.com/v1/paddledet/models/pipeline/vehicle_attribute_model.zip) | 7.2M |
+| Lane line Segmentation | 47ms | [Lane line Segmentation](https://bj.bcebos.com/v1/paddledet/models/pipeline/pp_lite_stdc2_bdd100k.zip) | 47M |
Download the model and unzip it into the `. /output_inference` folder.
@@ -60,7 +65,7 @@ In the configuration file, the model path defaults to the download path of the m
**Notes:**
-- The accuracy of detection tracking model is obtained from the joint dataset PPVehicle (integration of the public dataset BDD100K-MOT and UA-DETRAC). For more details, please refer to [PP-Vehicle](... /... /... /... /configs/ppvehicle)
+- The accuracy of detection tracking model is obtained from the joint dataset PPVehicle (integration of the public dataset BDD100K-MOT and UA-DETRAC). For more details, please refer to [PP-Vehicle](../../../../configs/ppvehicle)
- Inference speed is obtained at T4 with TensorRT FP16 enabled, which includes data pre-processing, model inference and post-processing.
## Configuration
@@ -129,7 +134,10 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/examples/infe
--region_polygon 600 300 1300 300 1300 800 600 800
```
-3. rtsp push/pull stream
+### rtsp_stream
+
+The online stream decode based on opencv Capture function, normally support rtsp and rtmp.
+
- rtsp pull stream
For rtsp pull stream, use --rtsp RTSP [RTSP ...] parameter to specify one or more rtsp streams. Separate the multiple addresses with a space, or replace the video address directly after the video_file with the rtsp stream address), examples as follows
@@ -151,16 +159,24 @@ python deploy/pipeline/pipeline.py --config deploy/pipeline/config/examples/infe
```
Note:
1. rtsp push stream is based on [rtsp-simple-server](https://github.com/aler9/rtsp-simple-server), please enable this serving first.
+It's very easy to use: 1) download the [release package](https://github.com/aler9/rtsp-simple-server/releases) which is compatible with your workspace. 2) run command './rtsp-simple-server', which works as a rtsp server.
2. the output visualize will be frozen frequently if the model cost too much time, we suggest to use faster model like ppyoloe_s in tracking, this is simply replace mot_ppyoloe_l_36e_pipeline.zip with mot_ppyoloe_s_36e_pipeline.zip in model config yaml file.
-### Jetson Deployment
+### Nvidia_Jetson
Due to the large gap in computing power of the Jetson platform compared to the server, we suggest:
-1. choose a lightweight model, especially for tracking model, `ppyoloe_s: https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_pipeline.zip` is recommended
-2. For frame skipping of tracking; we recommend 2 or 3: `skip_frame_num: 3`
+1. choose a lightweight model, we provide a new model named [PP-YOLOE-Plus Tiny](../../../../configs/ppvehicle/README.md),which achieve 20fps with four rtsp streams work togather on Jetson AGX.
+2. For further speedup, you can set frame skipping of tracking; we recommend 2 or 3: `skip_frame_num: 3`
+
+PP-YOLOE-Plus Tiny module speed test data on AGX:(a single car in the test video)
+
+| module | time cost per frame(ms) | speed(fps) |
+|:----------|:----------|:----------|
+| tracking | 13 | 77 |
+| Attribute | 20.2 | 49.4 |
+| Plate | - | - |
-With this recommended configuration, it is possible to achieve higher speeds on the TX2 platform. It has been tested with attribute case, with speeds up to 20fps. The configuration file can be modified directly (recommended) or from the command line (not recommended due to its long fields).
### Parameters
@@ -176,8 +192,7 @@ With this recommended configuration, it is possible to achieve higher speeds on
| --rtsp | Option | rtsp video stream address, supports one or more simultaneous streams input |
| --camera_id | Option | The camera ID for prediction, default is -1 ( for no camera prediction, can be set to 0 - (number of cameras - 1) ), press `q` in the visualization interface during the prediction process to output the prediction result to: output/output.mp4 |
| --device | Option | Running device, options include `CPU/GPU/XPU`, and the default is `CPU`. |
-| --pushurl | Option | push the output video to rtsp stream, normaly start with `rtsp://`; this has higher priority than local video save, while this is set, pipeline will not save local visualize video, the default is "", means this will not work now.
- |
+| --pushurl | Option | push the output video to rtsp stream, normaly start with `rtsp://`; this has higher priority than local video save, while this is set, pipeline will not save local visualize video, the default is "", means this will not work now.|
| --output_dir | Option | The root directory for the visualization results, and the default is output/ |
| --run_mode | Option | For GPU, the default is paddle, with (paddle/trt_fp32/trt_fp16/trt_int8) as optional |
| --enable_mkldnn | Option | Whether to enable MKLDNN acceleration in CPU prediction, the default is False |
@@ -194,7 +209,7 @@ With this recommended configuration, it is possible to achieve higher speeds on
The overall solution for PP-Vehicle v2 is shown in the graph below:
-

+
###
@@ -202,14 +217,14 @@ The overall solution for PP-Vehicle v2 is shown in the graph below:
### Vehicle detection
- Take PP-YOLOE L as the object detection model
-- For detailed documentation, please refer to [PP-YOLOE](... /... /... /... /configs/ppyoloe/) and [Multiple-Object-Tracking](ppvehicle_mot_en.md)
+- For detailed documentation, please refer to [PP-YOLOE](../../../../configs/ppyoloe/) and [Multiple-Object-Tracking](ppvehicle_mot_en.md)
### Vehicle tracking
- Vehicle tracking by SDE solution
- Adopt PP-YOLOE L (high precision) and S (lightweight) for detection models
- Adopt the OC-SORT solution for racking module
-- Refer to [OC-SORT](... /... /... /... /configs/mot/ocsort) and [Multi-Object Tracking](ppvehicle_mot_en.md) for details
+- Refer to [OC-SORT](../../../../configs/mot/ocsort) and [Multi-Object Tracking](ppvehicle_mot_en.md) for details
### Attribute Recognition
@@ -226,3 +241,14 @@ The overall solution for PP-Vehicle v2 is shown in the graph below:
- Use vehicle tracking model (high precision) PP-YOLOE L to determine whether the parking is illegal based on the vehicle's trajectory and the designated illegal parking area. If it is illegal parking, display the illegal parking plate number.
- For details, please refer to [Illegal Parking Detection](ppvehicle_illegal_parking_en.md)
+
+#### Vehicle Press Line
+
+- Use segmentation model PP-LiteSeg to get the lane line in frame, combine it with vehicle route to find out the vehicle against traffic.
+- For details, please refer to [Vehicle Press Line](ppvehicle_press_en.md)
+
+#### Vehicle Retrograde
+
+- Use segmentation model PP-LiteSeg to get the lane line in frame, combine it with vehicle detection box to juege if the car is pressing on lines.
+- For details, please refer to [Vehicle Retrograde](ppvehicle_retrograde_en.md)
+
diff --git a/deploy/pipeline/docs/tutorials/ppvehicle_mot_en.md b/deploy/pipeline/docs/tutorials/ppvehicle_mot_en.md
index 9bebc6c05c9..d63c3ed0182 100644
--- a/deploy/pipeline/docs/tutorials/ppvehicle_mot_en.md
+++ b/deploy/pipeline/docs/tutorials/ppvehicle_mot_en.md
@@ -13,7 +13,7 @@ Vehicle detection and tracking are widely used in traffic monitoring and autonom
| Vehicle Detection/Tracking | PP-YOLOE-l | mAP: 63.9
MOTA: 50.1 | Detection: 25.1ms
Tracking:31.8ms | [Link](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip) |
| Vehicle Detection/Tracking | PP-YOLOE-s | mAP: 61.3
MOTA: 46.8 | Detection: 16.2ms
Tracking:21.0ms | [Link](https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip) |
-1. The detection/tracking model uses the PPVehicle dataset ( which integrates BDD100K-MOT and UA-DETRAC). The dataset merged car, truck, bus, van from BDD100K-MOT and car, bus, van from UA-DETRAC all into 1 class vehicle(1). The detection accuracy mAP was tested on the test set of PPVehicle, and the tracking accuracy MOTA was obtained on the test set of BDD100K-MOT (`car, truck, bus, van` were combined into 1 class `vehicle`). For more details about the training procedure, please refer to [ppvehicle](... /... /... /... /configs/ppvehicle).
+1. The detection/tracking model uses the PPVehicle dataset ( which integrates BDD100K-MOT and UA-DETRAC). The dataset merged car, truck, bus, van from BDD100K-MOT and car, bus, van from UA-DETRAC all into 1 class vehicle(1). The detection accuracy mAP was tested on the test set of PPVehicle, and the tracking accuracy MOTA was obtained on the test set of BDD100K-MOT (`car, truck, bus, van` were combined into 1 class `vehicle`). For more details about the training procedure, please refer to [ppvehicle](../../../../configs/ppvehicle).
2. Inference speed is obtained at T4 with TensorRT FP16 enabled, which includes data pre-processing, model inference and post-processing.
## How To Use
From 62c7480f6df9dbf7cb3d0ba4324e748b9cecc384 Mon Sep 17 00:00:00 2001
From: Zhang Yulong <35552275+ZhangYulongg@users.noreply.github.com>
Date: Thu, 16 Feb 2023 14:46:39 +0800
Subject: [PATCH 015/116] fix tipc inference benchmark params (#7759)
---
.../faster_rcnn_swin_tiny_fpn_1x_coco_train_infer_python.txt | 2 +-
test_tipc/configs/mot/ocsort_ppyoloe_train_infer_python.txt | 2 +-
.../ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt | 2 +-
.../ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt | 2 +-
.../rotate/fcosr/fcosr_x50_3x_spine_coco_train_infer_python.txt | 2 +-
.../ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt | 2 +-
.../ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt | 2 +-
7 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/test_tipc/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_1x_coco_train_infer_python.txt b/test_tipc/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_1x_coco_train_infer_python.txt
index 07f799c0b18..b72cdc49a46 100644
--- a/test_tipc/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_1x_coco_train_infer_python.txt
@@ -50,4 +50,4 @@ inference:./deploy/python/infer.py
--run_benchmark:False
--trt_max_shape:1600
===========================infer_benchmark_params===========================
-numpy_infer_input:3x800x1344.npy
\ No newline at end of file
+numpy_infer_input:3x640x640.npy
\ No newline at end of file
diff --git a/test_tipc/configs/mot/ocsort_ppyoloe_train_infer_python.txt b/test_tipc/configs/mot/ocsort_ppyoloe_train_infer_python.txt
index 0053fe17b7f..8c7be8214fa 100644
--- a/test_tipc/configs/mot/ocsort_ppyoloe_train_infer_python.txt
+++ b/test_tipc/configs/mot/ocsort_ppyoloe_train_infer_python.txt
@@ -57,4 +57,4 @@ repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x640x640.npy
\ No newline at end of file
+numpy_infer_input:3x640x640_2.npy
\ No newline at end of file
diff --git a/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt b/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt
index 62ee907afce..e4b0823f82f 100644
--- a/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt
+++ b/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_s_80e_coco_train_infer_python.txt
@@ -57,4 +57,4 @@ repeat:12
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x640x640.npy
\ No newline at end of file
+numpy_infer_input:3x640x640_2.npy
\ No newline at end of file
diff --git a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
index 1d9f5569c7e..0694895ea60 100644
--- a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
+++ b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
@@ -57,4 +57,4 @@ repeat:12
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x640x640.npy
\ No newline at end of file
+numpy_infer_input:3x640x640_2.npy
\ No newline at end of file
diff --git a/test_tipc/configs/rotate/fcosr/fcosr_x50_3x_spine_coco_train_infer_python.txt b/test_tipc/configs/rotate/fcosr/fcosr_x50_3x_spine_coco_train_infer_python.txt
index 30e08fce218..a9f4b4c7e84 100644
--- a/test_tipc/configs/rotate/fcosr/fcosr_x50_3x_spine_coco_train_infer_python.txt
+++ b/test_tipc/configs/rotate/fcosr/fcosr_x50_3x_spine_coco_train_infer_python.txt
@@ -50,4 +50,4 @@ inference:./deploy/python/infer.py
--run_benchmark:False
null:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x1024x1024.npy
\ No newline at end of file
+numpy_infer_input:3x1024x1024_2.npy
\ No newline at end of file
diff --git a/test_tipc/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt b/test_tipc/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt
index 92d2d76f89d..29c80c7b227 100644
--- a/test_tipc/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt
+++ b/test_tipc/configs/rotate/ppyoloe_r/ppyoloe_r_crn_s_3x_spine_coco_train_infer_python.txt
@@ -57,4 +57,4 @@ repeat:12
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x1024x1024.npy
\ No newline at end of file
+numpy_infer_input:3x1024x1024_2.npy
\ No newline at end of file
diff --git a/test_tipc/configs/vitdet/ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt b/test_tipc/configs/vitdet/ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt
index f6882611741..37855d105e2 100644
--- a/test_tipc/configs/vitdet/ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt
+++ b/test_tipc/configs/vitdet/ppyoloe_vit_base_csppan_cae_36e_coco_train_infer_python.txt
@@ -57,4 +57,4 @@ repeat:2
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x640x640.npy
\ No newline at end of file
+numpy_infer_input:3x640x640_2.npy
\ No newline at end of file
From 15614f13a806bf52779772eb8fe1b6e5ebbebd19 Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Thu, 16 Feb 2023 15:24:14 +0800
Subject: [PATCH 016/116] [doc] fix smalldet doc and readme (#7775)
* fix smalldet docs, test=document_fix
* fix smalldet configs and readme, test=document_fix
---
configs/pphuman/README.md | 2 +-
configs/ppvehicle/README.md | 2 +-
configs/smalldet/DataDownload.md | 99 +++++
configs/smalldet/README.md | 344 +++++++++---------
...oloe_crn_l_80e_sliced_visdrone_640_025.yml | 6 +
configs/smalldet/visdrone/README.md | 64 +++-
...g => visdrone_0000315_01601_d_0000509.jpg} | Bin
docs/tutorials/data/PrepareDetDataSet.md | 12 +-
8 files changed, 336 insertions(+), 193 deletions(-)
create mode 100644 configs/smalldet/DataDownload.md
rename demo/{0000315_01601_d_0000509.jpg => visdrone_0000315_01601_d_0000509.jpg} (100%)
diff --git a/configs/pphuman/README.md b/configs/pphuman/README.md
index e7bf9538c62..a568f120d45 100644
--- a/configs/pphuman/README.md
+++ b/configs/pphuman/README.md
@@ -13,7 +13,7 @@ PaddleDetection团队提供了针对行人的基于PP-YOLOE的检测模型,用
|PP-YOLOE-l| CrowdHuman | 48.0 | 81.9 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_36e_crowdhuman.pdparams) | [配置文件](./ppyoloe_crn_l_36e_crowdhuman.yml) |
|PP-YOLOE-s| 业务数据集 | 53.2 | - | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_36e_pphuman.pdparams) | [配置文件](./ppyoloe_crn_s_36e_pphuman.yml) |
|PP-YOLOE-l| 业务数据集 | 57.8 | - | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_36e_pphuman.pdparams) | [配置文件](./ppyoloe_crn_l_36e_pphuman.yml) |
-|PP-YOLOE+_t-aux(320)| 业务数据集 | 45.7 | 81.2 | [下载链接](https://paddledet.bj.bcebos.com/models/pyoloe_plus_crn_t_auxhead_320_60e_pphuman.pdparams) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml) |
+|PP-YOLOE+_t-aux(320)| 业务数据集 | 45.7 | 81.2 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.pdparams) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_pphuman.yml) |
**注意:**
diff --git a/configs/ppvehicle/README.md b/configs/ppvehicle/README.md
index 71a21e15e3c..de4b783799e 100644
--- a/configs/ppvehicle/README.md
+++ b/configs/ppvehicle/README.md
@@ -20,7 +20,7 @@ PaddleDetection团队提供了针对自动驾驶场景的基于PP-YOLOE的检测
|PP-YOLOE-s| PPVehicle9cls | 9 | 35.3 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_s_36e_ppvehicle9cls.pdparams) | [配置文件](./mot_ppyoloe_s_36e_ppvehicle9cls.yml) |
|PP-YOLOE-l| PPVehicle | 1 | 63.9 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_l_36e_ppvehicle.pdparams) | [配置文件](./mot_ppyoloe_l_36e_ppvehicle.yml) |
|PP-YOLOE-s| PPVehicle | 1 | 61.3 | [下载链接](https://paddledet.bj.bcebos.com/models/mot_ppyoloe_s_36e_ppvehicle.pdparams) | [配置文件](./mot_ppyoloe_s_36e_ppvehicle.yml) |
-|PP-YOLOE+_t-aux(320)| PPVehicle | 1 | 53.5 | [下载链接](https://paddledet.bj.bcebos.com/models/pipeline/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.pdparams) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml) |
+|PP-YOLOE+_t-aux(320)| PPVehicle | 1 | 53.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.pdparams) | [配置文件](./ppyoloe_plus_crn_t_auxhead_320_60e_ppvehicle.yml) |
**注意:**
diff --git a/configs/smalldet/DataDownload.md b/configs/smalldet/DataDownload.md
new file mode 100644
index 00000000000..73189056ea1
--- /dev/null
+++ b/configs/smalldet/DataDownload.md
@@ -0,0 +1,99 @@
+# 小目标数据集下载汇总
+
+## 目录
+- [数据集准备](#数据集准备)
+ - [VisDrone-DET](#VisDrone-DET)
+ - [DOTA水平框](#DOTA水平框)
+ - [Xview](#Xview)
+ - [用户自定义数据集](#用户自定义数据集)
+
+## 数据集准备
+
+### VisDrone-DET
+
+VisDrone-DET是一个无人机航拍场景的小目标数据集,整理后的COCO格式VisDrone-DET数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/visdrone.zip),切图后的COCO格式数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/visdrone_sliced.zip),检测其中的**10类**,包括 `pedestrian(1), people(2), bicycle(3), car(4), van(5), truck(6), tricycle(7), awning-tricycle(8), bus(9), motor(10)`,原始数据集[下载链接](https://github.com/VisDrone/VisDrone-Dataset)。
+具体使用和下载请参考[visdrone](../visdrone)。
+
+### DOTA水平框
+
+DOTA是一个大型的遥感影像公开数据集,这里使用**DOTA-v1.0**水平框数据集,切图后整理的COCO格式的DOTA水平框数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/dota_sliced.zip),检测其中的**15类**,
+包括 `plane(0), baseball-diamond(1), bridge(2), ground-track-field(3), small-vehicle(4), large-vehicle(5), ship(6), tennis-court(7),basketball-court(8), storage-tank(9), soccer-ball-field(10), roundabout(11), harbor(12), swimming-pool(13), helicopter(14)`,
+图片及原始数据集[下载链接](https://captain-whu.github.io/DOAI2019/dataset.html)。
+
+### Xview
+
+Xview是一个大型的航拍遥感检测数据集,目标极小极多,切图后整理的COCO格式数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/xview_sliced.zip),检测其中的**60类**,
+具体类别为:
+
+
+
+`Fixed-wing Aircraft(0),
+Small Aircraft(1),
+Cargo Plane(2),
+Helicopter(3),
+Passenger Vehicle(4),
+Small Car(5),
+Bus(6),
+Pickup Truck(7),
+Utility Truck(8),
+Truck(9),
+Cargo Truck(10),
+Truck w/Box(11),
+Truck Tractor(12),
+Trailer(13),
+Truck w/Flatbed(14),
+Truck w/Liquid(15),
+Crane Truck(16),
+Railway Vehicle(17),
+Passenger Car(18),
+Cargo Car(19),
+Flat Car(20),
+Tank car(21),
+Locomotive(22),
+Maritime Vessel(23),
+Motorboat(24),
+Sailboat(25),
+Tugboat(26),
+Barge(27),
+Fishing Vessel(28),
+Ferry(29),
+Yacht(30),
+Container Ship(31),
+Oil Tanker(32),
+Engineering Vehicle(33),
+Tower crane(34),
+Container Crane(35),
+Reach Stacker(36),
+Straddle Carrier(37),
+Mobile Crane(38),
+Dump Truck(39),
+Haul Truck(40),
+Scraper/Tractor(41),
+Front loader/Bulldozer(42),
+Excavator(43),
+Cement Mixer(44),
+Ground Grader(45),
+Hut/Tent(46),
+Shed(47),
+Building(48),
+Aircraft Hangar(49),
+Damaged Building(50),
+Facility(51),
+Construction Site(52),
+Vehicle Lot(53),
+Helipad(54),
+Storage Tank(55),
+Shipping container lot(56),
+Shipping Container(57),
+Pylon(58),
+Tower(59)
+`
+
+
+
+,原始数据集[下载链接](https://challenge.xviewdataset.org/)。
+
+
+### 用户自定义数据集
+
+用户自定义数据集准备请参考[DET数据集标注工具](../../docs/tutorials/data/DetAnnoTools.md)和[DET数据集准备教程](../../docs/tutorials/data/PrepareDetDataSet.md)去准备。
diff --git a/configs/smalldet/README.md b/configs/smalldet/README.md
index be0b0e34f3e..db9c6366c21 100644
--- a/configs/smalldet/README.md
+++ b/configs/smalldet/README.md
@@ -4,31 +4,103 @@
## 内容
- [简介](#简介)
+- [切图使用说明](#切图使用说明)
+ - [小目标数据集下载](#小目标数据集下载)
+ - [统计数据集分布](#统计数据集分布)
+ - [SAHI切图](#SAHI切图)
- [模型库](#模型库)
- [VisDrone模型](#VisDrone模型)
- [COCO模型](#COCO模型)
- [切图模型](#切图模型)
- [拼图模型](#拼图模型)
-- [数据集准备](#数据集准备)
+ - [注意事项](#注意事项)
- [模型库使用说明](#模型库使用说明)
- [训练](#训练)
- [评估](#评估)
- [预测](#预测)
- [部署](#部署)
-- [切图使用说明](#切图使用说明)
- - [统计数据集分布](#统计数据集分布)
- - [SAHI切图](#SAHI切图)
- [引用](#引用)
+
## 简介
-PaddleDetection团队提供了针对VisDrone-DET、DOTA水平框、Xview等小目标场景数据集的基于PP-YOLOE的检测模型,以及提供了一套使用[SAHI](https://github.com/obss/sahi)(Slicing Aided Hyper Inference)工具切图和拼图的方案,用户可以下载模型进行使用。
+PaddleDetection团队提供了针对VisDrone-DET、DOTA水平框、Xview等小目标场景数据集的基于PP-YOLOE改进的检测模型 PP-YOLOE-SOD,以及提供了一套使用[SAHI](https://github.com/obss/sahi)(Slicing Aided Hyper Inference)工具的切图和拼图的方案。
+
+ - PP-YOLOE-SOD 是PaddleDetection团队自研的小目标检测特色模型,使用**数据集分布相关的基于向量的DFL算法** 和 **针对小目标优化的中心先验优化策略**,并且**在模型的Neck(FPN)结构中加入Transformer模块**,以及结合增加P2层、使用large size等策略,最终在多个小目标数据集上达到极高的精度。
+
+ - 切图拼图方案**适用于任何检测模型**,建议**使用 PP-YOLOE-SOD 结合切图拼图方案**一起使用以达到最佳的效果。
+
+ - 官方 AI Studio 教程案例请参考 [基于PP-YOLOE-SOD的无人机航拍图像检测案例全流程实操](https://aistudio.baidu.com/aistudio/projectdetail/5036782),欢迎一起动手实践学习。
+
+ - 第三方 AI Studio 教程案例可参考 [PPYOLOE:遥感场景下的小目标检测与部署(切图版)](https://aistudio.baidu.com/aistudio/projectdetail/4493701) 和 [涨分神器!基于PPYOLOE的切图和拼图解决方案](https://aistudio.baidu.com/aistudio/projectdetail/4438275),欢迎一起动手实践学习。
+
+**注意:**
+ - **不通过切图拼图而直接使用原图或子图**去训练评估预测,推荐使用 PP-YOLOE-SOD 模型,更多细节和消融实验可参照[COCO模型](#COCO模型)和[VisDrone模型](./visdrone)。
+ - 是否需要切图然后使用子图去**训练**,建议首先参照[切图使用说明](#切图使用说明)中的[统计数据集分布](#统计数据集分布)分析一下数据集再确定,一般数据集中**所有的目标均极小**的情况下推荐切图去训练。
+ - 是否需要切图然后使用子图去**预测**,建议在切图训练的情况下,配合着**同样操作的切图策略和参数**去预测(inference)效果更佳。但其实即便不切图训练,也可进行切图预测(inference),只需**在常规的预测命令最后加上`--slice_infer`以及相关子图参数**即可。
+ - 是否需要切图然后使用子图去**评估**,建议首先确保制作生成了合适的子图验证集,以及确保对应的标注框制作无误,并需要参照[模型库使用说明-评估](#评估)去**改动配置文件中的验证集(EvalDataset)的相关配置**,然后**在常规的评估命令最后加上`--slice_infer`以及相关子图参数**即可。
+ - `--slice_infer`的操作在PaddleDetection中默认**子图预测框会自动组合并拼回原图**,默认返回的是原图上的预测框,此方法也**适用于任何训好的检测模型**,无论是否切图训练。
+
+
+## 切图使用说明
+
+### 小目标数据集下载
+PaddleDetection团队整理提供的VisDrone-DET、DOTA水平框、Xview等小目标场景数据集的下载链接可以参照 [DataDownload.md](./DataDownload.md)。
+
+### 统计数据集分布
+
+对于待训的数据集(默认已处理为COCO格式,参照 [COCO格式数据集准备](../../docs/tutorials/data/PrepareDetDataSet.md#用户数据转成COCO数据),首先统计**标注框的平均宽高占图片真实宽高的比例**分布:
+
+以DOTA水平框数据集的train数据集为例:
+
+```bash
+python tools/box_distribution.py --json_path dataset/DOTA/annotations/train.json --out_img box_distribution.jpg --eval_size 640 --small_stride 8
+```
+ - `--json_path` :待统计数据集 COCO 格式 annotation 的json标注文件路径
+ - `--out_img` :输出的统计分布图的路径
+ - `--eval_size` :推理尺度(默认640)
+ - `--small_stride` :模型最小步长(默认8)
+
+统计结果打印如下:
+```bash
+Suggested reg_range[1] is 13 # DFL算法中推荐值,在 PP-YOLOE-SOD 模型的配置文件的head中设置为此值,效果最佳
+Mean of all img_w is 2304.3981547196595 # 原图宽的平均值
+Mean of all img_h is 2180.9354151880766 # 原图高的平均值
+Median of ratio_w is 0.03799439775910364 # 标注框的宽与原图宽的比例的中位数
+Median of ratio_h is 0.04074914637387802 # 标注框的高与原图高的比例的中位数
+all_img with box: 1409 # 数据集图片总数(排除无框或空标注的图片)
+all_ann: 98905 # 数据集标注框总数
+Distribution saved as box_distribution.jpg
+```
+
+**注意:**
+- 一般情况下,在原始数据集全部有标注框的图片中,**原图宽高的平均值大于1500像素,且有1/2以上的图片标注框的平均宽高与原图宽高比例小于0.04时(通过打印中位数得到该值)**,建议进行切图训练。
+- `Suggested reg_range[1]` 为数据集在优化后DFL算法中推荐的`reg_range`上限,即`reg_max + 1`,在 PP-YOLOE-SOD 模型的配置文件的head中设置这个值。
+
+
+### SAHI切图
-AI Studio 官方教程案例请参考[基于PP-YOLOE-SOD的无人机航拍图像检测案例全流程实操](https://aistudio.baidu.com/aistudio/projectdetail/5036782),欢迎一起动手实践学习。
+针对需要切图的数据集,使用[SAHI](https://github.com/obss/sahi)库进行切图:
+
+#### 安装SAHI库:
+
+参考[SAHI installation](https://github.com/obss/sahi/blob/main/README.md#installation)进行安装,`pip install sahi`,参考[installation](https://github.com/obss/sahi/blob/main/README.md#installation)。
+
+#### 基于SAHI切图
+
+以DOTA水平框数据集的train数据集为例,切分后的**子图文件夹**与**子图json标注文件**共同保存在`dota_sliced`文件夹下,分别命名为`train_images_500_025`、`train_500_025.json`:
+
+```bash
+python tools/slice_image.py --image_dir dataset/DOTA/train/ --json_path dataset/DOTA/annotations/train.json --output_dir dataset/dota_sliced --slice_size 500 --overlap_ratio 0.25
+```
+ - `--image_dir`:原始数据集图片文件夹的路径
+ - `--json_path`:原始数据集COCO格式的json标注文件的路径
+ - `--output_dir`:切分后的子图及其json标注文件保存的路径
+ - `--slice_size`:切分以后子图的边长尺度大小(默认切图后为正方形)
+ - `--overlap_ratio`:切分时的子图之间的重叠率
**注意:**
- - **是否需要切图**,建议参照[切图使用说明](#切图使用说明)中的[统计数据集分布](#统计数据集分布)先分析一下数据集再确定,一般数据集中**所有目标均极小**的情况下推荐切图训练和切图预测。
- - 不通过切图拼图而**直接使用原图**的方案也可以参照[visdrone](./visdrone)。
- - 第三方AI Studio教程案例可参考 [PPYOLOE:遥感场景下的小目标检测与部署(切图版)](https://aistudio.baidu.com/aistudio/projectdetail/4493701) 和 [涨分神器!基于PPYOLOE的切图和拼图解决方案](https://aistudio.baidu.com/aistudio/projectdetail/4438275)。
+- 如果切图然后使用子图去**训练**,则只能**离线切图**,即切完图后保存成子图,存放在内存空间中。
+- 如果切图然后使用子图去**评估或预测**,则既可以**离线切图**,也可以**在线切图**,PaddleDetection中支持切图并自动拼图组合结果到原图上。
## 模型库
@@ -56,6 +128,21 @@ AI Studio 官方教程案例请参考[基于PP-YOLOE-SOD的无人机航拍图像
- **P2**表示增加P2层(1/4下采样层)的特征,共输出4个PPYOLOEHead。
- **Alpha**表示对CSPResNet骨干网络增加可一个学习权重参数Alpha参与训练。
- **largesize**表示使用**以1600尺度为基础的多尺度训练**和**1920尺度预测**,相应的训练batch_size也减小,以速度来换取高精度。
+ - MatlabAPI测试是使用官网评测工具[VisDrone2018-DET-toolkit](https://github.com/VisDrone/VisDrone2018-DET-toolkit)。
+
+
+ 快速开始
+
+```shell
+# 训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/smalldet/visdrone/ppyoloe_plus_sod_crn_l_80e_visdrone.yml --amp --eval
+# 评估
+python tools/eval.py -c configs/smalldet/visdrone/ppyoloe_plus_sod_crn_l_80e_visdrone.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_visdrone.pdparams
+# 预测
+python tools/infer.py -c configs/smalldet/visdrone/ppyoloe_plus_sod_crn_l_80e_visdrone.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_visdrone.pdparams --infer_img=demo/visdrone_0000315_01601_d_0000509.jpg --draw_threshold=0.25
+```
+
+
### COCO模型
@@ -69,6 +156,20 @@ AI Studio 官方教程案例请参考[基于PP-YOLOE-SOD的无人机航拍图像
- 上表中的模型均为**使用原图训练**,也**原图评估预测**,网络输入尺度为640x640,训练集为COCO的train2017,验证集为val2017,均为8卡总batch_size为64训练80 epoch。
- **SOD**表示使用**基于向量的DFL算法**和针对小目标的**中心先验优化策略**,并**在模型的Neck结构中加入transformer**,可在 APsmall 上提升1.9。
+
+ 快速开始
+
+```shell
+# 训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/smalldet/ppyoloe_plus_sod_crn_l_80e_coco.yml --amp --eval
+# 评估
+python tools/eval.py -c configs/smalldet/ppyoloe_plus_sod_crn_l_80e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_coco.pdparams
+# 预测
+python tools/infer.py -c configs/smalldet/ppyoloe_plus_sod_crn_l_80e_coco.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_coco.pdparams --infer_img=demo/000000014439_640x640.jpg --draw_threshold=0.25
+```
+
+
+
### 切图模型
@@ -83,18 +184,55 @@ AI Studio 官方教程案例请参考[基于PP-YOLOE-SOD的无人机航拍图像
- **SLICE_SIZE**表示使用SAHI工具切图后子图的边长大小,**OVERLAP_RATIO**表示切图的子图之间的重叠率。
- VisDrone-DET的模型与[拼图模型](#拼图模型)表格中的VisDrone-DET是**同一个模型权重**,但此处AP精度是在**切图后的子图验证集**上评估的结果。
+
+ 快速开始
+
+```shell
+# 训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml --amp --eval
+# 子图直接评估
+python tools/eval.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams
+# 子图直接预测
+python tools/infer.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --infer_img=demo/visdrone_0000315_01601_d_0000509.jpg --draw_threshold=0.25
+```
+
+
+
### 拼图模型
| 模型 | 数据集 | SLICE_SIZE | OVERLAP_RATIO | 类别数 | mAPval
0.5:0.95 | APval
0.5 | 下载链接 | 配置文件 |
|:---------|:---------------:|:---------------:|:---------------:|:------:|:-----------------------:|:-------------------:|:---------:| :-----: |
-|PP-YOLOE-l (原图评估)| VisDrone-DET| 640 | 0.25 | 10 | 29.7 | 48.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](./ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml) |
-|PP-YOLOE-l (拼图评估)| VisDrone-DET| 640 | 0.25 | 10 | 37.2 | 59.4 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](./ppyoloe_crn_l_80e_sliced_visdrone_640_025_slice_infer.yml) |
+|PP-YOLOE-l (原图直接评估)| VisDrone-DET| 640 | 0.25 | 10 | 29.7 | 48.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](./ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml) |
+|PP-YOLOE-l (切图拼图评估)| VisDrone-DET| 640 | 0.25 | 10 | 37.3 | 59.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](./ppyoloe_crn_l_80e_sliced_visdrone_640_025_slice_infer.yml) |
**注意:**
- 上表中的模型均为使用**切图后的子图**训练,评估预测时分为两种,**直接使用原图**评估预测,和**使用子图自动拼成原图**评估预测,AP精度均为**原图验证集**上评估的结果。。
- **SLICE_SIZE**表示使用SAHI工具切图后子图的边长大小,**OVERLAP_RATIO**表示切图的子图之间的重叠率。
- - VisDrone-DET的模型与[切图模型](#切图模型)表格中的VisDrone-DET是**同一个模型权重**,但此处AP精度是在**原图验证集**上评估的结果。
+ - VisDrone-DET的模型与[切图模型](#切图模型)表格中的VisDrone-DET是**同一个模型权重**,但此处AP精度是在**原图验证集**上评估的结果,需要提前修改`ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml`里的`EvalDataset`的默认的子图验证集路径为以下**原图验证集路径**:
+ ```
+ EvalDataset:
+ !COCODataSet
+ image_dir: VisDrone2019-DET-val
+ anno_path: val.json
+ dataset_dir: dataset/visdrone
+ ```
+
+
+ 快速开始
+
+```shell
+# 训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml --amp --eval
+# 原图直接评估,注意需要提前修改此yml中的 `EvalDataset` 的默认的子图验证集路径 为 原图验证集路径:
+python tools/eval.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams
+# 切图拼图评估,加上 --slice_infer,注意是使用的带 _slice_infer 后缀的yml配置文件
+python tools/eval.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025_slice_infer.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --slice_infer
+# 切图拼图预测,加上 --slice_infer
+python tools/infer.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --infer_img=demo/visdrone_0000315_01601_d_0000509.jpg --draw_threshold=0.25 --slice_infer
+```
+
+
### 注意事项
@@ -103,124 +241,42 @@ AI Studio 官方教程案例请参考[基于PP-YOLOE-SOD的无人机航拍图像
- DOTA水平框和Xview数据集均是**切图后训练**,AP指标为**切图后的子图val上的指标**。
- VisDrone-DET数据集请参照[visdrone](./visdrone),**可使用原图训练,也可使用切图后训练**,这上面表格中的指标均是使用VisDrone-DET的val子集做验证而未使用test_dev子集。
- PP-YOLOE模型训练过程中使用8 GPUs进行混合精度训练,如果**GPU卡数**或者**batch size**发生了改变,你需要按照公式 **lrnew = lrdefault * (batch_sizenew * GPU_numbernew) / (batch_sizedefault * GPU_numberdefault)** 调整学习率。
-- 常用训练验证部署等步骤请参考[ppyoloe](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/ppyoloe#getting-start)。
+- 常用训练验证部署等步骤请参考[ppyoloe](../ppyoloe#getting-start)。
- 自动切图和拼图的推理预测需添加设置`--slice_infer`,具体见下文[模型库使用说明](#模型库使用说明)中的[预测](#预测)和[部署](#部署)。
- 自动切图和拼图过程,参照[2.3 子图拼图评估](#评估)。
-## 数据集准备
-
-### VisDrone-DET
-
-VisDrone-DET是一个无人机航拍场景的小目标数据集,整理后的COCO格式VisDrone-DET数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/visdrone.zip),切图后的COCO格式数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/visdrone_sliced.zip),检测其中的**10类**,包括 `pedestrian(1), people(2), bicycle(3), car(4), van(5), truck(6), tricycle(7), awning-tricycle(8), bus(9), motor(10)`,原始数据集[下载链接](https://github.com/VisDrone/VisDrone-Dataset)。
-具体使用和下载请参考[visdrone](../visdrone)。
-
-### DOTA水平框
-
-DOTA是一个大型的遥感影像公开数据集,这里使用**DOTA-v1.0**水平框数据集,切图后整理的COCO格式的DOTA水平框数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/dota_sliced.zip),检测其中的**15类**,
-包括 `plane(0), baseball-diamond(1), bridge(2), ground-track-field(3), small-vehicle(4), large-vehicle(5), ship(6), tennis-court(7),basketball-court(8), storage-tank(9), soccer-ball-field(10), roundabout(11), harbor(12), swimming-pool(13), helicopter(14)`,
-图片及原始数据集[下载链接](https://captain-whu.github.io/DOAI2019/dataset.html)。
-
-### Xview
-
-Xview是一个大型的航拍遥感检测数据集,目标极小极多,切图后整理的COCO格式数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/xview_sliced.zip),检测其中的**60类**,
-具体类别为:
-
-
-
-`Fixed-wing Aircraft(0),
-Small Aircraft(1),
-Cargo Plane(2),
-Helicopter(3),
-Passenger Vehicle(4),
-Small Car(5),
-Bus(6),
-Pickup Truck(7),
-Utility Truck(8),
-Truck(9),
-Cargo Truck(10),
-Truck w/Box(11),
-Truck Tractor(12),
-Trailer(13),
-Truck w/Flatbed(14),
-Truck w/Liquid(15),
-Crane Truck(16),
-Railway Vehicle(17),
-Passenger Car(18),
-Cargo Car(19),
-Flat Car(20),
-Tank car(21),
-Locomotive(22),
-Maritime Vessel(23),
-Motorboat(24),
-Sailboat(25),
-Tugboat(26),
-Barge(27),
-Fishing Vessel(28),
-Ferry(29),
-Yacht(30),
-Container Ship(31),
-Oil Tanker(32),
-Engineering Vehicle(33),
-Tower crane(34),
-Container Crane(35),
-Reach Stacker(36),
-Straddle Carrier(37),
-Mobile Crane(38),
-Dump Truck(39),
-Haul Truck(40),
-Scraper/Tractor(41),
-Front loader/Bulldozer(42),
-Excavator(43),
-Cement Mixer(44),
-Ground Grader(45),
-Hut/Tent(46),
-Shed(47),
-Building(48),
-Aircraft Hangar(49),
-Damaged Building(50),
-Facility(51),
-Construction Site(52),
-Vehicle Lot(53),
-Helipad(54),
-Storage Tank(55),
-Shipping container lot(56),
-Shipping Container(57),
-Pylon(58),
-Tower(59)
-`
-
-
-
-,原始数据集[下载链接](https://challenge.xviewdataset.org/)。
-
+## 模型库使用说明
-### 用户自定义数据集准备
+### 训练
-用户自定义数据集准备请参考[DET数据集标注工具](../../docs/tutorials/data/DetAnnoTools.md)和[DET数据集准备教程](../../docs/tutorials/data/PrepareDetDataSet.md)去准备。
+#### 1.1 原图训练
+首先将待训数据集制作成COCO数据集格式,然后按照PaddleDetection的模型的常规训练流程训练即可。
+执行以下指令使用混合精度训练COCO数据集:
-## 模型库使用说明
+```bash
+python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/smalldet/ppyoloe_plus_sod_crn_l_80e_coco.yml --amp --eval
+```
-### 训练
+**注意:**
+- 使用默认配置训练需要设置`--amp`以避免显存溢出,`--eval`表示边训边验证,会自动保存最佳精度的模型权重。
-首先将你的数据集为COCO数据集格式,然后使用SAHI切图工具进行离线切图,对保存的子图按常规检测模型的训练流程走即可。
+#### 1.2 原图训练
+首先将待训数据集制作成COCO数据集格式,然后使用SAHI切图工具进行**离线切图**,对保存的子图按**常规检测模型的训练流程**走即可。
也可直接下载PaddleDetection团队提供的切图后的VisDrone-DET、DOTA水平框、Xview数据集。
-执行以下指令使用混合精度训练PP-YOLOE
+执行以下指令使用混合精度训练VisDrone切图数据集:
```bash
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml --amp --eval
```
-**注意:**
-- 使用默认配置训练需要设置`--amp`以避免显存溢出。
### 评估
#### 2.1 子图评估
-
-默认评估方式是子图评估,子图数据集的验证集设置为:
+**默认评估方式是子图评估**,子图数据集的验证集设置为:
```
EvalDataset:
!COCODataSet
@@ -234,7 +290,7 @@ CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/smalldet/ppyoloe_crn_l_80
```
#### 2.2 原图评估
-修改验证集的标注文件路径为原图标注文件:
+修改验证集的标注文件路径为**原图标注文件**:
```
EvalDataset:
!COCODataSet
@@ -248,7 +304,7 @@ CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/smalldet/ppyoloe_crn_l_80
```
#### 2.3 子图拼图评估
-修改验证集的标注文件路径为原图标注文件:
+修改验证集的标注文件路径为**原图标注文件**:
```
# very slow, preferly eval with a determined weights(xx.pdparams)
# if you want to eval during training, change SlicedCOCODataSet to COCODataSet and delete sliced_size and overlap_ratio
@@ -287,14 +343,14 @@ EvalDataset:
#### 3.1 子图或原图直接预测
与评估流程基本相同,可以在提前切好并存下来的子图上预测,也可以对原图预测,如:
```bash
-CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --infer_img=demo/0000315_01601_d_0000509.jpg --draw_threshold=0.25
+CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --infer_img=demo/visdrone_0000315_01601_d_0000509.jpg --draw_threshold=0.25
```
#### 3.2 原图自动切图并拼图预测
也可以对原图进行自动切图并拼图重组来预测原图,如:
```bash
# 单张图
-CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --infer_img=demo/0000315_01601_d_0000509.jpg --draw_threshold=0.25 --slice_infer --slice_size 640 640 --overlap_ratio 0.25 0.25 --combine_method=nms --match_threshold=0.6 --match_metric=ios --save_results=True
+CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --infer_img=demo/visdrone_0000315_01601_d_0000509.jpg --draw_threshold=0.25 --slice_infer --slice_size 640 640 --overlap_ratio 0.25 0.25 --combine_method=nms --match_threshold=0.6 --match_metric=ios --save_results=True
# 或图片文件夹
CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --infer_dir=demo/ --draw_threshold=0.25 --slice_infer --slice_size 640 640 --overlap_ratio 0.25 0.25 --combine_method=nms --match_threshold=0.6 --match_metric=ios
```
@@ -317,14 +373,14 @@ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/smalldet/ppyoloe_
#### 4.2 使用原图或子图直接推理
```bash
# deploy infer
-CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_crn_l_80e_sliced_visdrone_640_025 --image_file=demo/0000315_01601_d_0000509.jpg --device=GPU --save_images --threshold=0.25
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_crn_l_80e_sliced_visdrone_640_025 --image_file=demo/visdrone_0000315_01601_d_0000509.jpg --device=GPU --save_images --threshold=0.25
```
#### 4.3 使用原图自动切图并拼图重组结果来推理
```bash
# deploy slice infer
# 单张图
-CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_crn_l_80e_sliced_visdrone_640_025 --image_file=demo/0000315_01601_d_0000509.jpg --device=GPU --save_images --threshold=0.25 --slice_infer --slice_size 640 640 --overlap_ratio 0.25 0.25 --combine_method=nms --match_threshold=0.6 --match_metric=ios --save_results=True
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_crn_l_80e_sliced_visdrone_640_025 --image_file=demo/visdrone_0000315_01601_d_0000509.jpg --device=GPU --save_images --threshold=0.25 --slice_infer --slice_size 640 640 --overlap_ratio 0.25 0.25 --combine_method=nms --match_threshold=0.6 --match_metric=ios --save_results=True
# 或图片文件夹
CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_crn_l_80e_sliced_visdrone_640_025 --image_dir=demo/ --device=GPU --save_images --threshold=0.25 --slice_infer --slice_size 640 640 --overlap_ratio 0.25 0.25 --combine_method=nms --match_threshold=0.6 --match_metric=ios
```
@@ -336,64 +392,6 @@ CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inferenc
- 设置`--save_results`表示保存图片结果为json文件,一般只单张图预测时使用;
-
-## 切图使用说明
-
-### 统计数据集分布
-
-首先统计所用数据集标注框的平均宽高占图片真实宽高的比例分布:
-
-```bash
-python tools/box_distribution.py --json_path ../../dataset/DOTA/annotations/train.json --out_img box_distribution.jpg
-```
-- `--json_path` :待统计数据集COCO 格式 annotation 的json文件路径
-- `--eval_size` :推理尺度(默认640)
-- `--small_sride` :模型最小步长(默认8)
-- `--out_img` :输出的统计分布图路径
-
-以DOTA数据集的train数据集为例,统计结果打印如下:
-```bash
-Suggested reg_range[1] is 13
-Mean of all img_w is 2304.3981547196595
-Mean of all img_h is 2180.9354151880766
-Median of ratio_w is 0.03799439775910364
-Median of ratio_h is 0.04074914637387802
-all_img with box: 1409
-all_ann: 98905
-Distribution saved as box_distribution.jpg
-```
-
-**注意:**
-- `Suggested reg_range[1]` 为数据集在优化后DFL算法中推荐的`reg_range`上限,即` reg_max+1`。
-- 当原始数据集全部有标注框的图片中,**原图宽高均值大于1500且有1/2以上的图片标注框的平均宽高与原图宽高比例小于0.04时**,建议进行切图训练。
-
-### SAHI切图
-
-针对需要切图的数据集,使用[SAHI](https://github.com/obss/sahi)库进行切分:
-
-#### 安装SAHI库:
-
-参考[SAHI installation](https://github.com/obss/sahi/blob/main/README.md#installation)进行安装
-
-```bash
-pip install sahi
-```
-
-#### 基于SAHI切图
-
-```bash
-python tools/slice_image.py --image_dir ../../dataset/DOTA/train/ --json_path ../../dataset/DOTA/annotations/train.json --output_dir ../../dataset/dota_sliced --slice_size 500 --overlap_ratio 0.25
-```
-
-- `--image_dir`:原始数据集图片文件夹的路径
-- `--json_path`:原始数据集COCO格式的json标注文件的路径
-- `--output_dir`:切分后的子图及其json标注文件保存的路径
-- `--slice_size`:切分以后子图的边长尺度大小(默认切图后为正方形)
-- `--overlap_ratio`:切分时的子图之间的重叠率
-- 以上述代码为例,切分后的子图文件夹与json标注文件共同保存在`dota_sliced`文件夹下,分别命名为`train_images_500_025`、`train_500_025.json`。
-
-
-
## 引用
```
@article{akyon2022sahi,
diff --git a/configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml b/configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml
index 9df37919bf1..26275899ff3 100644
--- a/configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml
+++ b/configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml
@@ -50,3 +50,9 @@ EvalDataset:
image_dir: val_images_640_025
anno_path: val_640_025.json
dataset_dir: dataset/visdrone_sliced
+
+# EvalDataset:
+# !COCODataSet
+# image_dir: VisDrone2019-DET-val
+# anno_path: val.json
+# dataset_dir: dataset/visdrone
diff --git a/configs/smalldet/visdrone/README.md b/configs/smalldet/visdrone/README.md
index 93367b19dce..fbe4ad82224 100644
--- a/configs/smalldet/visdrone/README.md
+++ b/configs/smalldet/visdrone/README.md
@@ -1,6 +1,6 @@
# VisDrone-DET 小目标检测模型
-PaddleDetection团队提供了针对VisDrone-DET小目标数航拍场景的基于PP-YOLOE的检测模型,用户可以下载模型进行使用。整理后的COCO格式VisDrone-DET数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/visdrone.zip),检测其中的10类,包括 `pedestrian(1), people(2), bicycle(3), car(4), van(5), truck(6), tricycle(7), awning-tricycle(8), bus(9), motor(10)`,原始数据集[下载链接](https://github.com/VisDrone/VisDrone-Dataset)。
+PaddleDetection团队提供了针对VisDrone-DET小目标数航拍场景的基于PP-YOLOE的检测模型,用户可以下载模型进行使用。整理后的COCO格式VisDrone-DET数据集[下载链接](https://bj.bcebos.com/v1/paddledet/data/smalldet/visdrone.zip),检测其中的10类,包括 `pedestrian(1), people(2), bicycle(3), car(4), van(5), truck(6), tricycle(7), awning-tricycle(8), bus(9), motor(10)`,原始数据集[下载链接](https://github.com/VisDrone/VisDrone-Dataset)。其他相关小目标数据集可参照 [DataDownload.md](../DataDownload.md)。
**注意:**
- VisDrone-DET数据集包括**train集6471张,val集548张,test_dev集1610张**,test-challenge集1580张(未开放检测框标注),前三者均有开放检测框标注。
@@ -30,26 +30,66 @@ PaddleDetection团队提供了针对VisDrone-DET小目标数航拍场景的基
- **P2**表示增加P2层(1/4下采样层)的特征,共输出4个PPYOLOEHead。
- **Alpha**表示对CSPResNet骨干网络增加可一个学习权重参数Alpha参与训练。
- **largesize**表示使用**以1600尺度为基础的多尺度训练**和**1920尺度预测**,相应的训练batch_size也减小,以速度来换取高精度。
+ - MatlabAPI测试是使用官网评测工具[VisDrone2018-DET-toolkit](https://github.com/VisDrone/VisDrone2018-DET-toolkit)。
+
+
+ 快速开始
+
+```shell
+# 训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/smalldet/visdrone/ppyoloe_plus_sod_crn_l_80e_visdrone.yml --amp --eval
+# 评估
+python tools/eval.py -c configs/smalldet/visdrone/ppyoloe_plus_sod_crn_l_80e_visdrone.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_visdrone.pdparams
+# 预测
+python tools/infer.py -c configs/smalldet/visdrone/ppyoloe_plus_sod_crn_l_80e_visdrone.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_visdrone.pdparams --infer_img=demo/visdrone_0000315_01601_d_0000509.jpg --draw_threshold=0.25
+```
+
+
## 子图训练,原图评估和拼图评估:
| 模型 | 数据集 | SLICE_SIZE | OVERLAP_RATIO | 类别数 | mAPval
0.5:0.95 | APval
0.5 | 下载链接 | 配置文件 |
|:---------|:---------------:|:---------------:|:---------------:|:------:|:-----------------------:|:-------------------:|:---------:| :-----: |
-|PP-YOLOE-l(原图评估)| VisDrone-DET| 640 | 0.25 | 10 | 29.7 | 48.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](../ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml) |
-|PP-YOLOE-l (拼图评估)| VisDrone-DET| 640 | 0.25 | 10 | 37.2 | 59.4 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](../ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml) |
+|PP-YOLOE-l(子图直接评估)| VisDrone-DET| 640 | 0.25 | 10 | 38.5(子图val) | 60.2 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](./ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml) |
+|PP-YOLOE-l(原图直接评估)| VisDrone-DET| 640 | 0.25 | 10 | 29.7(原图val) | 48.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](../ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml) |
+|PP-YOLOE-l (切图拼图评估)| VisDrone-DET| 640 | 0.25 | 10 | 37.3(原图val) | 59.5 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams) | [配置文件](../ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml) |
**注意:**
- 上表中的模型均为使用**切图后的子图**训练,评估预测时分为两种,**直接使用原图**评估预测,和**使用子图自动拼成原图**评估预测,AP精度均为**原图验证集**上评估的结果。。
- **SLICE_SIZE**表示使用SAHI工具切图后子图的边长大小,**OVERLAP_RATIO**表示切图的子图之间的重叠率。
- - VisDrone-DET的模型与[切图模型](../README.md#切图模型)表格中的VisDrone-DET是**同一个模型权重**,但此处AP精度是在**原图验证集**上评估的结果。
+ - VisDrone-DET的模型与[切图模型](../README.md#切图模型)表格中的VisDrone-DET是**同一个模型权重**,但此处AP精度是在**原图验证集**上评估的结果,需要提前修改`ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml`里的`EvalDataset`的默认的子图验证集路径为以下**原图验证集路径**:
+ ```
+ EvalDataset:
+ !COCODataSet
+ image_dir: VisDrone2019-DET-val
+ anno_path: val.json
+ dataset_dir: dataset/visdrone
+ ```
+
+
+ 快速开始
+
+```shell
+# 训练
+python -m paddle.distributed.launch --log_dir=logs/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml --amp --eval
+# 子图直接评估
+python tools/eval.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams
+# 原图直接评估,注意需要提前修改此yml中的 `EvalDataset` 的默认的子图验证集路径 为 原图验证集路径:
+python tools/eval.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams
+# 切图拼图评估,加上 --slice_infer,注意是使用的带 _slice_infer 后缀的yml配置文件
+python tools/eval.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025_slice_infer.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --slice_infer
+# 切图拼图预测,加上 --slice_infer
+python tools/infer.py -c configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_l_80e_sliced_visdrone_640_025.pdparams --infer_img=demo/visdrone_0000315_01601_d_0000509.jpg --draw_threshold=0.25 --slice_infer
+```
+
+
## 注意事项:
- PP-YOLOE模型训练过程中使用8 GPUs进行混合精度训练,如果**GPU卡数**或者**batch size**发生了改变,你需要按照公式 **lrnew = lrdefault * (batch_sizenew * GPU_numbernew) / (batch_sizedefault * GPU_numberdefault)** 调整学习率。
- - 具体使用教程请参考[ppyoloe](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/ppyoloe#getting-start)。
+ - 具体使用教程请参考[ppyoloe](../../ppyoloe#getting-start)。
- MatlabAPI测试是使用官网评测工具[VisDrone2018-DET-toolkit](https://github.com/VisDrone/VisDrone2018-DET-toolkit)。
- - 切图训练模型的配置文件及训练相关流程请参照[README](../README.cn)。
## PP-YOLOE+_SOD 部署模型
@@ -89,16 +129,16 @@ python tools/export_model.py -c configs/smalldet/visdrone/ppyoloe_plus_sod_crn_l
paddle2onnx --model_dir output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 12 --save_file ppyoloe_plus_sod_crn_l_largesize_80e_visdrone.onnx
# 推理单张图片
-CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/0000315_01601_d_0000509.jpg --device=gpu --run_mode=trt_fp16
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/visdrone_0000315_01601_d_0000509.jpg --device=gpu --run_mode=trt_fp16
# 推理文件夹下的所有图片
CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_dir=demo/ --device=gpu --run_mode=trt_fp16
# 单张图片普通测速
-CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/0000315_01601_d_0000509.jpg --device=gpu --run_benchmark=True
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/visdrone_0000315_01601_d_0000509.jpg --device=gpu --run_benchmark=True
# 单张图片TensorRT FP16测速
-CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/0000315_01601_d_0000509.jpg --device=gpu --run_benchmark=True --run_mode=trt_fp16
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/visdrone_0000315_01601_d_0000509.jpg --device=gpu --run_benchmark=True --run_mode=trt_fp16
```
3.运行以下命令导出**不带NMS的模型和ONNX**,并使用TensorRT FP16进行推理和测速,以及**ONNX下FP16测速**
@@ -111,16 +151,16 @@ python tools/export_model.py -c configs/smalldet/visdrone/ppyoloe_plus_sod_crn_l
paddle2onnx --model_dir output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 12 --save_file ppyoloe_plus_sod_crn_l_largesize_80e_visdrone.onnx
# 推理单张图片
-CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/0000315_01601_d_0000509.jpg --device=gpu --run_mode=trt_fp16
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/visdrone_0000315_01601_d_0000509.jpg --device=gpu --run_mode=trt_fp16
# 推理文件夹下的所有图片
CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_dir=demo/ --device=gpu --run_mode=trt_fp16
# 单张图片普通测速
-CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/0000315_01601_d_0000509.jpg --device=gpu --run_benchmark=True
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/visdrone_0000315_01601_d_0000509.jpg --device=gpu --run_benchmark=True
# 单张图片TensorRT FP16测速
-CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/0000315_01601_d_0000509.jpg --device=gpu --run_benchmark=True --run_mode=trt_fp16
+CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/ppyoloe_plus_sod_crn_l_largesize_80e_visdrone --image_file=demo/visdrone_0000315_01601_d_0000509.jpg --device=gpu --run_benchmark=True --run_mode=trt_fp16
# 单张图片ONNX TensorRT FP16测速
/usr/local/TensorRT-8.0.3.4/bin/trtexec --onnx=ppyoloe_plus_sod_crn_l_largesize_80e_visdrone.onnx --workspace=4096 --avgRuns=10 --shapes=input:1x3x1920x1920 --fp16
diff --git a/demo/0000315_01601_d_0000509.jpg b/demo/visdrone_0000315_01601_d_0000509.jpg
similarity index 100%
rename from demo/0000315_01601_d_0000509.jpg
rename to demo/visdrone_0000315_01601_d_0000509.jpg
diff --git a/docs/tutorials/data/PrepareDetDataSet.md b/docs/tutorials/data/PrepareDetDataSet.md
index f956c170bd4..a282d4220f0 100644
--- a/docs/tutorials/data/PrepareDetDataSet.md
+++ b/docs/tutorials/data/PrepareDetDataSet.md
@@ -2,10 +2,10 @@
## 目录
- [目标检测数据说明](#目标检测数据说明)
- [准备训练数据](#准备训练数据)
- - [VOC数据数据](#VOC数据数据)
+ - [VOC数据](#VOC数据)
- [VOC数据集下载](#VOC数据集下载)
- [VOC数据标注文件介绍](#VOC数据标注文件介绍)
- - [COCO数据数据](#COCO数据数据)
+ - [COCO数据](#COCO数据)
- [COCO数据集下载](#COCO数据下载)
- [COCO数据标注文件介绍](#COCO数据标注文件介绍)
- [用户数据准备](#用户数据准备)
@@ -36,8 +36,8 @@
PaddleDetection默认支持[COCO](http://cocodataset.org)和[Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) 和[WIDER-FACE](http://shuoyang1213.me/WIDERFACE/) 数据源。
同时还支持自定义数据源,包括:
-(1) 自定义数据数据转换成VOC数据;
-(2) 自定义数据数据转换成COCO数据;
+(1) 自定义数据转换成VOC数据;
+(2) 自定义数据转换成COCO数据;
(3) 自定义新的数据源,增加自定义的reader。
@@ -47,7 +47,7 @@ cd PaddleDetection/
ppdet_root=$(pwd)
```
-#### VOC数据数据
+#### VOC数据
VOC数据是[Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) 比赛使用的数据。Pascal VOC比赛不仅包含图像分类分类任务,还包含图像目标检测、图像分割等任务,其标注文件中包含多个任务的标注内容。
VOC数据集指的是Pascal VOC比赛使用的数据。用户自定义的VOC数据,xml文件中的非必须字段,请根据实际情况选择是否标注或是否使用默认值。
@@ -148,7 +148,7 @@ xml文件中包含以下字段:
| bndbox子标签 | (xmin,ymin) 左上角坐标,(xmax,ymax) 右下角坐标, |
-#### COCO数据
+#### COCO数据
COCO数据是[COCO](http://cocodataset.org) 比赛使用的数据。同样的,COCO比赛数也包含多个比赛任务,其标注文件中包含多个任务的标注内容。
COCO数据集指的是COCO比赛使用的数据。用户自定义的COCO数据,json文件中的一些字段,请根据实际情况选择是否标注或是否使用默认值。
From d0b5487de4fc1e259045396d6d382adaffa150f9 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Thu, 16 Feb 2023 20:20:12 +0800
Subject: [PATCH 017/116] 3d pose readme (#7768)
* pose3d docs
* traindata download
---
configs/pose3d/README.md | 156 ++++++++++++++++++
.../tinypose3d_medical_multi_frames.yml | 0
.../tinypose3d_multi_frames_heatmap.yml | 0
3 files changed, 156 insertions(+)
create mode 100644 configs/pose3d/README.md
rename configs/{keypoint/tiny_pose => pose3d}/tinypose3d_medical_multi_frames.yml (100%)
rename configs/{keypoint/tiny_pose => pose3d}/tinypose3d_multi_frames_heatmap.yml (100%)
diff --git a/configs/pose3d/README.md b/configs/pose3d/README.md
new file mode 100644
index 00000000000..b5da08f1cf8
--- /dev/null
+++ b/configs/pose3d/README.md
@@ -0,0 +1,156 @@
+简体中文
+
+
+

+
+
+# 3D Pose系列模型
+
+## 目录
+
+- [简介](#简介)
+- [模型推荐](#模型推荐)
+- [快速开始](#快速开始)
+ - [环境安装](#1环境安装)
+ - [数据准备](#2数据准备)
+ - [训练与测试](#3训练与测试)
+ - [单卡训练](#单卡训练)
+ - [多卡训练](#多卡训练)
+ - [模型评估](#模型评估)
+ - [模型预测](#模型预测)
+ - [使用说明](#4使用说明)
+
+## 简介
+
+PaddleDetection 中提供了两种3D Pose算法(稀疏关键点),分别是适用于服务器端的大模型Metro3D和移动端的TinyPose3D。其中Metro3D基于[End-to-End Human Pose and Mesh Reconstruction with Transformers](https://arxiv.org/abs/2012.09760)进行了稀疏化改造,TinyPose3D是在TinyPose基础上修改输出3D关键点。
+
+## 模型推荐(待补充)
+
+|模型|适用场景|human3.6m精度|模型下载|
+|:--:|:--:|:--:|:--:|
+|Metro3D|服务器端|-|-|
+|TinyPose3D|移动端|-|-|
+
+注:
+1. 训练数据基于 [MeshTransfomer](https://github.com/microsoft/MeshTransformer) 中的训练数据。
+2. 测试精度同 MeshTransfomer 采用 14 关键点测试。
+
+## 快速开始
+
+### 1、环境安装
+
+ 请参考PaddleDetection [安装文档](../../docs/tutorials/INSTALL_cn.md)正确安装PaddlePaddle和PaddleDetection即可。
+
+### 2、数据准备
+
+ 我们的训练数据由coco、human3.6m、hr-lspet、posetrack3d、mpii组成。
+
+ 2.1 我们的训练数据下载地址为:
+
+ [coco](https://bj.bcebos.com/v1/paddledet/data/coco.tar)
+
+ [human3.6m](https://bj.bcebos.com/v1/paddledet/data/pose3d/human3.6m.tar.gz)
+
+ [lspet+posetrack+mpii](https://bj.bcebos.com/v1/paddledet/data/pose3d/pose3d_others.tar.gz)
+
+ [标注文件下载](https://bj.bcebos.com/v1/paddledet/data/pose3d/pose3d.tar.gz)
+
+ 2.2 数据下载后按如下结构放在repo目录下
+
+```
+${REPO_DIR}
+|-- dataset
+| |-- traindata
+| |-- coco
+| |-- hr-lspet
+| |-- human3.6m
+| |-- mpii
+| |-- posetrack3d
+| \-- pose3d
+| |-- COCO2014-All-ver01.json
+| |-- COCO2014-Part-ver01.json
+| |-- COCO2014-Val-ver10.json
+| |-- Human3.6m_train.json
+| |-- Human3.6m_valid.json
+| |-- LSPet_train_ver10.json
+| |-- LSPet_test_ver10.json
+| |-- MPII_ver01.json
+| |-- PoseTrack_ver01.json
+|-- ppdet
+|-- deploy
+|-- demo
+|-- README_cn.md
+|-- README_en.md
+|-- ...
+```
+
+
+### 3、训练与测试
+
+#### 单卡训练
+
+```shell
+#单卡训练
+CUDA_VISIBLE_DEVICES=0 python3 tools/train.py -c configs/pose3d/metro3d_24kpts.yml
+
+#多卡训练
+CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m paddle.distributed.launch tools/train.py -c configs/pose3d/metro3d_24kpts.yml
+```
+
+#### 模型评估
+
+```shell
+#单卡评估
+CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/pose3d/metro3d_24kpts.yml -o weights=output/metro3d_24kpts/best_model.pdparams
+
+#当只需要保存评估预测的结果时,可以通过设置save_prediction_only参数实现,评估预测结果默认保存在output/keypoints_results.json文件中
+CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py -c configs/pose3d/metro3d_24kpts.yml -o weights=output/metro3d_24kpts/best_model.pdparams --save_prediction_only
+
+#多卡评估
+CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m paddle.distributed.launch tools/eval.py -c configs/pose3d/metro3d_24kpts.yml -o weights=output/metro3d_24kpts/best_model.pdparams
+```
+
+#### 模型预测
+
+```shell
+#图片生成3视角图
+CUDA_VISIBLE_DEVICES=0 python3 tools/infer.py -c configs/pose3d/metro3d_24kpts.yml -o weights=./output/metro3d_24kpts/best_model.pdparams --infer_img=./demo/hrnet_demo.jpg --draw_threshold=0.5
+```
+
+### 4、使用说明
+
+ 3D Pose在使用中相比2D Pose有更多的困难,该困难主要是由于以下两个原因导致的。
+
+ - 1)训练数据标注成本高;
+
+ - 2)图像在深度信息上的模糊性;
+
+ 由于(1)的原因训练数据往往只能覆盖少量动作,导致模型泛化性困难。由于(2)的原因图像在预测3D Pose坐标时深度z轴上误差通常大于x、y方向,容易导致时序间的较大抖动,且数据标注误差越大该问题表现的更加明显。
+
+ 要解决上述两个问题,就造成了两个矛盾的需求:1)提高泛化性需要更多的标注数据;2)降低预测误差需要高精度的数据标注。而3D Pose本身数据标注的困难导致越高精度的标注成本越高,标注数量则会相应降低。
+
+ 因此,我们提供的解决方案是:
+
+ - 1)使用自动拟合标注方法自动产生大量低精度的数据。训练第一版模型,使其具有较普遍的泛化性。
+
+ - 2)标注少量目标动作的高精度数据,基于第一版模型finetune,得到目标动作上的高精度模型,且一定程度上继承了第一版模型的泛化性。
+
+ 我们的训练数据提供了大量的低精度自动生成式的数据,用户可以在此数据训练的基础上,标注自己高精度的目标动作数据进行finetune,即可得到相对稳定较好的模型。
+
+ 我们在医疗康复高精度数据上的训练效果展示如下
+
+
+

+
+
+
+## 引用
+
+```
+@inproceedings{lin2021end-to-end,
+author = {Lin, Kevin and Wang, Lijuan and Liu, Zicheng},
+title = {End-to-End Human Pose and Mesh Reconstruction with Transformers},
+booktitle = {CVPR},
+year = {2021},
+}
+```
diff --git a/configs/keypoint/tiny_pose/tinypose3d_medical_multi_frames.yml b/configs/pose3d/tinypose3d_medical_multi_frames.yml
similarity index 100%
rename from configs/keypoint/tiny_pose/tinypose3d_medical_multi_frames.yml
rename to configs/pose3d/tinypose3d_medical_multi_frames.yml
diff --git a/configs/keypoint/tiny_pose/tinypose3d_multi_frames_heatmap.yml b/configs/pose3d/tinypose3d_multi_frames_heatmap.yml
similarity index 100%
rename from configs/keypoint/tiny_pose/tinypose3d_multi_frames_heatmap.yml
rename to configs/pose3d/tinypose3d_multi_frames_heatmap.yml
From f0834c2df8577f99337be0b95eabd8bcc9ed271d Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Mon, 20 Feb 2023 15:51:58 +0800
Subject: [PATCH 018/116] add ppyoloe tiny 320 models (#7786)
* add ppyoloe tiny 320 models, test=document_fix
* add ppyoloe tiny 320 speed, test=document_fix
---
configs/ppyoloe/README.md | 11 ++++----
configs/ppyoloe/README_cn.md | 11 +++++---
...yoloe_plus_crn_t_auxhead_320_300e_coco.yml | 15 +++++++++++
..._plus_crn_t_auxhead_relu_320_300e_coco.yml | 26 +++++++++++++++++++
4 files changed, 54 insertions(+), 9 deletions(-)
create mode 100644 configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_320_300e_coco.yml
create mode 100644 configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml
diff --git a/configs/ppyoloe/README.md b/configs/ppyoloe/README.md
index f7d876f6766..1c90e8ad691 100644
--- a/configs/ppyoloe/README.md
+++ b/configs/ppyoloe/README.md
@@ -46,11 +46,12 @@ PP-YOLOE is composed of following methods:
#### Tiny model
-| Model | Epoch | GPU number | images/GPU | backbone | input shape | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | download | config |
-|:--------------:|:-----:|:-------:|:----------:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
-| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | - | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
-| PP-YOLOE-t-P2(320) | 300 | 8 | 8 | cspresnet-t | 320 | 34.7 | 50.0 | 6.82 | 4.78 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_crn_t_p2_320_300e_coco.pdparams) | [config](./ppyoloe_crn_t_p2_320_300e_coco.yml) |
-| PP-YOLOE+_t-P2-aux(320) | 300 | 8 | 8 | cspresnet-t | 320 | 36.3 | 51.7 | 6.00 | 15.46 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_p2_auxhead_320_300e_coco.yml) |
+| Model | Epoch | GPU number | images/GPU | backbone | input shape | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | download | config |
+|:--------:|:-----:|:----------:|:----------:|:----------:|:-----------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------------:| :------: |:--------:|
+| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
+| PP-YOLOE+_t-aux(640)-relu | 300 | 8 | 8 | cspresnet-t | 640 | 36.4 | 53.0 | 3.60 | 12.17 | 476.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml) |
+| PP-YOLOE+_t-aux(320) | 300 | 8 | 8 | cspresnet-t | 320 | 33.3 | 48.5 | 4.85 | 4.80 | 729.9 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_320_300e_coco.yml) |
+| PP-YOLOE+_t-aux(320)-relu | 300 | 8 | 8 | cspresnet-t | 320 | 29.5 | 43.7 | 3.60 | 3.04 | 984.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml) |
### Comprehensive Metrics
diff --git a/configs/ppyoloe/README_cn.md b/configs/ppyoloe/README_cn.md
index 87caab958bc..6f0288d126d 100644
--- a/configs/ppyoloe/README_cn.md
+++ b/configs/ppyoloe/README_cn.md
@@ -45,10 +45,13 @@ PP-YOLOE由以下方法组成
#### Tiny模型
-| 模型 | Epoch | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 |
-|:---------------:|:-----:|:---------:|:--------:|:----------:|:----------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
-| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | - | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
-| PP-YOLOE+_t-aux(640)-relu | 300 | 8 | 8 | cspresnet-t | 640 | 36.5 | 53.1 | 3.60 | 12.17 | - | 476.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml) |
+| 模型 | Epoch | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | 模型下载 | 配置文件 |
+|:----------:|:-----:|:--------:|:-----------:|:---------:|:--------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------------:| :------: |:--------:|
+| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
+| PP-YOLOE+_t-aux(640)-relu | 300 | 8 | 8 | cspresnet-t | 640 | 36.4 | 53.0 | 3.60 | 12.17 | 476.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml) |
+| PP-YOLOE+_t-aux(320) | 300 | 8 | 8 | cspresnet-t | 320 | 33.3 | 48.5 | 4.85 | 4.80 | 729.9 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_320_300e_coco.yml) |
+| PP-YOLOE+_t-aux(320)-relu | 300 | 8 | 8 | cspresnet-t | 320 | 29.5 | 43.7 | 3.60 | 3.04 | 984.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml) |
+
### 综合指标
| 模型 | Epoch | AP0.5:0.95 | AP0.5 | AP0.75 | APsmall | APmedium | APlarge | ARsmall | ARmedium | ARlarge |
diff --git a/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_320_300e_coco.yml b/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_320_300e_coco.yml
new file mode 100644
index 00000000000..010a4f610c8
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_320_300e_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/optimizer_300e.yml',
+ './_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
+ './_base_/ppyoloe_plus_reader_320.yml',
+]
+
+log_iter: 100
+snapshot_epoch: 10
+weights: output/ppyoloe_plus_crn_t_auxhead_320_300e_coco/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_t_pretrained.pdparams
+depth_mult: 0.33
+width_mult: 0.375
diff --git a/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml b/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml
new file mode 100644
index 00000000000..ad7642881ae
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/optimizer_300e.yml',
+ './_base_/ppyoloe_plus_crn_tiny_auxhead.yml',
+ './_base_/ppyoloe_plus_reader_320.yml',
+]
+
+log_iter: 100
+snapshot_epoch: 10
+weights: output/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco/model_final
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_t_pretrained.pdparams
+depth_mult: 0.33
+width_mult: 0.375
+
+
+CSPResNet:
+ act: 'relu'
+
+CustomCSPPAN:
+ act: 'relu'
+
+PPYOLOEHead:
+ act: 'relu'
+ attn_conv: None
From ecccce23e893d266ee8272e0910723a394551503 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Mon, 20 Feb 2023 18:50:15 +0800
Subject: [PATCH 019/116] ppvehicle violation customization (#7779)
* ppvehicle add customized docs, test=document_fix
* fix pipeline readme.md test=document_fix
* fix pipeline readme.md link test=document_fix
* add licensed to lane_to_mask.py
* ppvehicle_violation.md add args list test=document_fix
---
deploy/pipeline/README.md | 4 +-
deploy/pipeline/README_en.md | 4 +-
deploy/pipeline/tools/create_dataset_list.py | 147 +++++
deploy/pipeline/tools/lane_to_mask.py | 508 ++++++++++++++++++
.../customization/ppvehicle_violation.md | 235 ++++++++
.../customization/ppvehicle_violation_en.md | 240 +++++++++
6 files changed, 1134 insertions(+), 4 deletions(-)
create mode 100644 deploy/pipeline/tools/create_dataset_list.py
create mode 100644 deploy/pipeline/tools/lane_to_mask.py
create mode 100644 docs/advanced_tutorials/customization/ppvehicle_violation.md
create mode 100644 docs/advanced_tutorials/customization/ppvehicle_violation_en.md
diff --git a/deploy/pipeline/README.md b/deploy/pipeline/README.md
index db82080fac5..c6762c96777 100644
--- a/deploy/pipeline/README.md
+++ b/deploy/pipeline/README.md
@@ -155,10 +155,10 @@
- [快速开始](docs/tutorials/ppvehicle_press.md)
-- [二次开发教程]
+- [二次开发教程](../../docs/advanced_tutorials/customization/ppvehicle_violation.md)
#### 车辆逆行
- [快速开始](docs/tutorials/ppvehicle_retrograde.md)
-- [二次开发教程]
+- [二次开发教程](../../docs/advanced_tutorials/customization/ppvehicle_violation.md)
diff --git a/deploy/pipeline/README_en.md b/deploy/pipeline/README_en.md
index 3fdf086fd89..d2667a7aa2a 100644
--- a/deploy/pipeline/README_en.md
+++ b/deploy/pipeline/README_en.md
@@ -152,10 +152,10 @@ Click to download the model, then unzip and save it in the `. /output_inference`
- [A quick start](docs/tutorials/ppvehicle_press_en.md)
-- [Customized development tutorials]
+- [Customized development tutorials](../../docs/advanced_tutorials/customization/ppvehicle_violation_en.md)
#### Vehicle Retrograde
- [A quick start](docs/tutorials/ppvehicle_retrograde_en.md)
-- [Customized development tutorials]
+- [Customized development tutorials](../../docs/advanced_tutorials/customization/ppvehicle_violation_en.md)
diff --git a/deploy/pipeline/tools/create_dataset_list.py b/deploy/pipeline/tools/create_dataset_list.py
new file mode 100644
index 00000000000..261e15e8f25
--- /dev/null
+++ b/deploy/pipeline/tools/create_dataset_list.py
@@ -0,0 +1,147 @@
+# coding: utf8
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import os.path
+import argparse
+import warnings
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='PaddleSeg generate file list on cityscapes or your customized dataset.'
+ )
+ parser.add_argument('dataset_root', help='dataset root directory', type=str)
+ parser.add_argument(
+ '--type',
+ help='dataset type: \n'
+ '- cityscapes \n'
+ '- custom(default)',
+ default="custom",
+ type=str)
+ parser.add_argument(
+ '--separator',
+ dest='separator',
+ help='file list separator',
+ default=" ",
+ type=str)
+ parser.add_argument(
+ '--folder',
+ help='the folder names of images and labels',
+ type=str,
+ nargs=2,
+ default=['images', 'labels'])
+ parser.add_argument(
+ '--second_folder',
+ help='the second-level folder names of train set, validation set, test set',
+ type=str,
+ nargs='*',
+ default=['train', 'val', 'test'])
+ parser.add_argument(
+ '--format',
+ help='data format of images and labels, e.g. jpg or png.',
+ type=str,
+ nargs=2,
+ default=['jpg', 'png'])
+ parser.add_argument(
+ '--postfix',
+ help='postfix of images or labels',
+ type=str,
+ nargs=2,
+ default=['', ''])
+
+ return parser.parse_args()
+
+
+def get_files(image_or_label, dataset_split, args):
+ dataset_root = args.dataset_root
+ postfix = args.postfix
+ format = args.format
+ folder = args.folder
+
+ pattern = '*%s.%s' % (postfix[image_or_label], format[image_or_label])
+
+ search_files = os.path.join(dataset_root, folder[image_or_label],
+ dataset_split, pattern)
+ search_files2 = os.path.join(dataset_root, folder[image_or_label],
+ dataset_split, "*", pattern) # 包含子目录
+ search_files3 = os.path.join(dataset_root, folder[image_or_label],
+ dataset_split, "*", "*", pattern) # 包含三级目录
+ search_files4 = os.path.join(dataset_root, folder[image_or_label],
+ dataset_split, "*", "*", "*",
+ pattern) # 包含四级目录
+ search_files5 = os.path.join(dataset_root, folder[image_or_label],
+ dataset_split, "*", "*", "*", "*",
+ pattern) # 包含五级目录
+
+ filenames = glob.glob(search_files)
+ filenames2 = glob.glob(search_files2)
+ filenames3 = glob.glob(search_files3)
+ filenames4 = glob.glob(search_files4)
+ filenames5 = glob.glob(search_files5)
+
+ filenames = filenames + filenames2 + filenames3 + filenames4 + filenames5
+
+ return sorted(filenames)
+
+
+def generate_list(args):
+ dataset_root = args.dataset_root
+ separator = args.separator
+
+ for dataset_split in args.second_folder:
+ print("Creating {}.txt...".format(dataset_split))
+ image_files = get_files(0, dataset_split, args)
+ label_files = get_files(1, dataset_split, args)
+ if not image_files:
+ img_dir = os.path.join(dataset_root, args.folder[0], dataset_split)
+ warnings.warn("No images in {} !!!".format(img_dir))
+ num_images = len(image_files)
+
+ if not label_files:
+ label_dir = os.path.join(dataset_root, args.folder[1],
+ dataset_split)
+ warnings.warn("No labels in {} !!!".format(label_dir))
+ num_label = len(label_files)
+
+ if num_images != num_label and num_label > 0:
+ raise Exception(
+ "Number of images = {} number of labels = {} \n"
+ "Either number of images is equal to number of labels, "
+ "or number of labels is equal to 0.\n"
+ "Please check your dataset!".format(num_images, num_label))
+
+ file_list = os.path.join(dataset_root, dataset_split + '.txt')
+ with open(file_list, "w") as f:
+ for item in range(num_images):
+ left = image_files[item].replace(dataset_root, '', 1)
+ if left[0] == os.path.sep:
+ left = left.lstrip(os.path.sep)
+
+ try:
+ right = label_files[item].replace(dataset_root, '', 1)
+ if right[0] == os.path.sep:
+ right = right.lstrip(os.path.sep)
+ line = left + separator + right + '\n'
+ except:
+ line = left + '\n'
+
+ f.write(line)
+ print(line)
+
+
+if __name__ == '__main__':
+ args = parse_args()
+ generate_list(args)
diff --git a/deploy/pipeline/tools/lane_to_mask.py b/deploy/pipeline/tools/lane_to_mask.py
new file mode 100644
index 00000000000..ece2efb87d0
--- /dev/null
+++ b/deploy/pipeline/tools/lane_to_mask.py
@@ -0,0 +1,508 @@
+# coding: utf8
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Convert poly2d to mask/bitmask."""
+
+import os
+from functools import partial
+from multiprocessing import Pool
+from typing import Callable, Dict, List
+
+import matplotlib # type: ignore
+import matplotlib.pyplot as plt # type: ignore
+import numpy as np
+from PIL import Image
+from scalabel.common.parallel import NPROC
+from scalabel.common.typing import NDArrayU8
+from scalabel.label.io import group_and_sort, load
+from scalabel.label.transforms import poly_to_patch
+from scalabel.label.typing import Config, Frame, ImageSize, Label, Poly2D
+from scalabel.label.utils import (
+ check_crowd,
+ check_ignored,
+ get_leaf_categories, )
+from tqdm import tqdm
+
+from bdd100k.common.logger import logger
+from bdd100k.common.typing import BDD100KConfig
+from bdd100k.common.utils import get_bdd100k_instance_id, load_bdd100k_config
+from bdd100k.label.label import drivables, labels, lane_categories
+from bdd100k.label.to_coco import parse_args
+from bdd100k.label.to_scalabel import bdd100k_to_scalabel
+
+IGNORE_LABEL = 255
+STUFF_NUM = 30
+LANE_DIRECTION_MAP = {"parallel": 0, "vertical": 1}
+LANE_STYLE_MAP = {"solid": 0, "dashed": 1}
+
+
+def frame_to_mask(
+ out_path: str,
+ shape: ImageSize,
+ colors: List[NDArrayU8],
+ poly2ds: List[List[Poly2D]],
+ with_instances: bool=True,
+ back_color: int=0,
+ closed: bool=True, ) -> None:
+ """Converting a frame of poly2ds to mask/bitmask."""
+ assert len(colors) == len(poly2ds)
+ height, width = shape.height, shape.width
+
+ assert back_color >= 0
+ if with_instances:
+ img: NDArrayU8 = (
+ np.ones(
+ [height, width, 4], dtype=np.uint8) * back_color # type: ignore
+ )
+ else:
+ img = (
+ np.ones(
+ [height, width, 1], dtype=np.uint8) * back_color # type: ignore
+ )
+
+ if len(colors) == 0:
+ pil_img = Image.fromarray(img.squeeze())
+ pil_img.save(out_path)
+
+ matplotlib.use("Agg")
+ fig = plt.figure(facecolor="0")
+ fig.set_size_inches((width / fig.get_dpi()), height / fig.get_dpi())
+ ax = fig.add_axes([0, 0, 1, 1])
+ ax.axis("off")
+ ax.set_xlim(0, width)
+ ax.set_ylim(0, height)
+ ax.set_facecolor((0, 0, 0, 0))
+ ax.invert_yaxis()
+
+ for i, poly2d in enumerate(poly2ds):
+ for poly in poly2d:
+ ax.add_patch(
+ poly_to_patch(
+ poly.vertices,
+ poly.types,
+ # (0, 0, 0) for the background
+ color=(
+ ((i + 1) >> 8) / 255.0,
+ ((i + 1) % 255) / 255.0,
+ 0.0, ),
+ closed=closed, ))
+
+ fig.canvas.draw()
+ out: NDArrayU8 = np.frombuffer(fig.canvas.tostring_rgb(), np.uint8)
+ out = out.reshape((height, width, -1)).astype(np.int32)
+ out = (out[..., 0] << 8) + out[..., 1]
+ plt.close()
+
+ for i, color in enumerate(colors):
+ # 0 is for the background
+ img[out == i + 1] = color
+
+ img[img == 255] = 0
+
+ pil_img = Image.fromarray(img.squeeze())
+ pil_img.save(out_path)
+
+
+def set_instance_color(label: Label, category_id: int,
+ ann_id: int) -> NDArrayU8:
+ """Set the color for an instance given its attributes and ID."""
+ attributes = label.attributes
+ if attributes is None:
+ truncated, occluded, crowd, ignored = 0, 0, 0, 0
+ else:
+ truncated = int(attributes.get("truncated", False))
+ occluded = int(attributes.get("occluded", False))
+ crowd = int(check_crowd(label))
+ ignored = int(check_ignored(label))
+ color: NDArrayU8 = np.array(
+ [
+ category_id & 255,
+ (truncated << 3) + (occluded << 2) + (crowd << 1) + ignored,
+ ann_id >> 8,
+ ann_id & 255,
+ ],
+ dtype=np.uint8, )
+ return color
+
+
+def set_lane_color(label: Label, category_id: int) -> NDArrayU8:
+ """Set the color for the lane given its attributes and category."""
+ attributes = label.attributes
+ if attributes is None:
+ lane_direction, lane_style = 0, 0
+ else:
+ lane_direction = LANE_DIRECTION_MAP[str(
+ attributes.get("laneDirection", "parallel"))]
+ lane_style = LANE_STYLE_MAP[str(attributes.get("laneStyle", "solid"))]
+
+ #value = category_id + (lane_direction << 5) + (lane_style << 4)
+ value = category_id
+ if lane_style == 0 and (category_id == 3 or category_id == 2):
+ value = 1
+ if lane_style == 0:
+ value = 2
+ else:
+ value = 3
+
+ color: NDArrayU8 = np.array([value], dtype=np.uint8)
+ return color
+
+
+def frames_to_masks(
+ nproc: int,
+ out_paths: List[str],
+ shapes: List[ImageSize],
+ colors_list: List[List[NDArrayU8]],
+ poly2ds_list: List[List[List[Poly2D]]],
+ with_instances: bool=True,
+ back_color: int=0,
+ closed: bool=True, ) -> None:
+ """Execute the mask conversion in parallel."""
+ with Pool(nproc) as pool:
+ pool.starmap(
+ partial(
+ frame_to_mask,
+ with_instances=with_instances,
+ back_color=back_color,
+ closed=closed, ),
+ tqdm(
+ zip(out_paths, shapes, colors_list, poly2ds_list),
+ total=len(out_paths), ), )
+
+
+def seg_to_masks(
+ frames: List[Frame],
+ out_base: str,
+ config: Config,
+ nproc: int=NPROC,
+ mode: str="sem_seg",
+ back_color: int=IGNORE_LABEL,
+ closed: bool=True, ) -> None:
+ """Converting segmentation poly2d to 1-channel masks."""
+ os.makedirs(out_base, exist_ok=True)
+ img_shape = config.imageSize
+
+ out_paths: List[str] = []
+ shapes: List[ImageSize] = []
+ colors_list: List[List[NDArrayU8]] = []
+ poly2ds_list: List[List[List[Poly2D]]] = []
+
+ categories = dict(
+ sem_seg=labels, drivable=drivables, lane_mark=lane_categories)[mode]
+ cat_name2id = {
+ cat.name: cat.trainId
+ for cat in categories if cat.trainId != IGNORE_LABEL
+ }
+
+ logger.info("Preparing annotations for Semseg to Bitmasks")
+
+ for image_anns in tqdm(frames):
+ # Mask in .png format
+ image_name = image_anns.name.replace(".jpg", ".png")
+ image_name = os.path.split(image_name)[-1]
+ out_path = os.path.join(out_base, image_name)
+ out_paths.append(out_path)
+
+ if img_shape is None:
+ if image_anns.size is not None:
+ img_shape = image_anns.size
+ else:
+ raise ValueError("Image shape not defined!")
+ shapes.append(img_shape)
+
+ colors: List[NDArrayU8] = []
+ poly2ds: List[List[Poly2D]] = []
+ colors_list.append(colors)
+ poly2ds_list.append(poly2ds)
+
+ if image_anns.labels is None:
+ continue
+
+ for label in image_anns.labels:
+ if label.category not in cat_name2id:
+ continue
+ if label.poly2d is None:
+ continue
+
+ category_id = cat_name2id[label.category]
+ if mode in ["sem_seg", "drivable"]:
+ color: NDArrayU8 = np.array([category_id], dtype=np.uint8)
+ else:
+ color = set_lane_color(label, category_id)
+
+ colors.append(color)
+ poly2ds.append(label.poly2d)
+
+ logger.info("Start Conversion for Seg to Masks")
+ frames_to_masks(
+ nproc,
+ out_paths,
+ shapes,
+ colors_list,
+ poly2ds_list,
+ with_instances=False,
+ back_color=back_color,
+ closed=closed, )
+
+
+ToMasksFunc = Callable[[List[Frame], str, Config, int], None]
+semseg_to_masks: ToMasksFunc = partial(
+ seg_to_masks, mode="sem_seg", back_color=IGNORE_LABEL, closed=True)
+drivable_to_masks: ToMasksFunc = partial(
+ seg_to_masks,
+ mode="drivable",
+ back_color=len(drivables) - 1,
+ closed=True, )
+lanemark_to_masks: ToMasksFunc = partial(
+ seg_to_masks, mode="lane_mark", back_color=IGNORE_LABEL, closed=False)
+
+
+def insseg_to_bitmasks(frames: List[Frame],
+ out_base: str,
+ config: Config,
+ nproc: int=NPROC) -> None:
+ """Converting instance segmentation poly2d to bitmasks."""
+ os.makedirs(out_base, exist_ok=True)
+ img_shape = config.imageSize
+
+ out_paths: List[str] = []
+ shapes: List[ImageSize] = []
+ colors_list: List[List[NDArrayU8]] = []
+ poly2ds_list: List[List[List[Poly2D]]] = []
+
+ categories = get_leaf_categories(config.categories)
+ cat_name2id = {cat.name: i + 1 for i, cat in enumerate(categories)}
+
+ logger.info("Preparing annotations for InsSeg to Bitmasks")
+
+ for image_anns in tqdm(frames):
+ ann_id = 0
+
+ # Bitmask in .png format
+ image_name = image_anns.name.replace(".jpg", ".png")
+ image_name = os.path.split(image_name)[-1]
+ out_path = os.path.join(out_base, image_name)
+ out_paths.append(out_path)
+
+ if img_shape is None:
+ if image_anns.size is not None:
+ img_shape = image_anns.size
+ else:
+ raise ValueError("Image shape not defined!")
+ shapes.append(img_shape)
+
+ colors: List[NDArrayU8] = []
+ poly2ds: List[List[Poly2D]] = []
+ colors_list.append(colors)
+ poly2ds_list.append(poly2ds)
+
+ labels_ = image_anns.labels
+ if labels_ is None or len(labels_) == 0:
+ continue
+
+ # Scores higher, rendering later
+ if labels_[0].score is not None:
+ labels_ = sorted(labels_, key=lambda label: float(label.score))
+
+ for label in labels_:
+ if label.poly2d is None:
+ continue
+ if label.category not in cat_name2id:
+ continue
+
+ ann_id += 1
+ category_id = cat_name2id[label.category]
+ color = set_instance_color(label, category_id, ann_id)
+ colors.append(color)
+ poly2ds.append(label.poly2d)
+
+ logger.info("Start conversion for InsSeg to Bitmasks")
+ frames_to_masks(nproc, out_paths, shapes, colors_list, poly2ds_list)
+
+
+def panseg_to_bitmasks(frames: List[Frame],
+ out_base: str,
+ config: Config,
+ nproc: int=NPROC) -> None:
+ """Converting panoptic segmentation poly2d to bitmasks."""
+ os.makedirs(out_base, exist_ok=True)
+ img_shape = config.imageSize
+
+ out_paths: List[str] = []
+ shapes: List[ImageSize] = []
+ colors_list: List[List[NDArrayU8]] = []
+ poly2ds_list: List[List[List[Poly2D]]] = []
+ cat_name2id = {cat.name: cat.id for cat in labels}
+
+ logger.info("Preparing annotations for InsSeg to Bitmasks")
+
+ for image_anns in tqdm(frames):
+ cur_ann_id = STUFF_NUM
+
+ # Bitmask in .png format
+ image_name = image_anns.name.replace(".jpg", ".png")
+ image_name = os.path.split(image_name)[-1]
+ out_path = os.path.join(out_base, image_name)
+ out_paths.append(out_path)
+
+ if img_shape is None:
+ if image_anns.size is not None:
+ img_shape = image_anns.size
+ else:
+ raise ValueError("Image shape not defined!")
+ shapes.append(img_shape)
+
+ colors: List[NDArrayU8] = []
+ poly2ds: List[List[Poly2D]] = []
+ colors_list.append(colors)
+ poly2ds_list.append(poly2ds)
+
+ labels_ = image_anns.labels
+ if labels_ is None or len(labels_) == 0:
+ continue
+
+ # Scores higher, rendering later
+ if labels_[0].score is not None:
+ labels_ = sorted(labels_, key=lambda label: float(label.score))
+
+ for label in labels_:
+ if label.poly2d is None:
+ continue
+ if label.category not in cat_name2id:
+ continue
+
+ category_id = cat_name2id[label.category]
+ if category_id == 0:
+ continue
+ if category_id <= STUFF_NUM:
+ ann_id = category_id
+ else:
+ cur_ann_id += 1
+ ann_id = cur_ann_id
+
+ color = set_instance_color(label, category_id, ann_id)
+ colors.append(color)
+ poly2ds.append(label.poly2d)
+
+ logger.info("Start conversion for PanSeg to Bitmasks")
+ frames_to_masks(nproc, out_paths, shapes, colors_list, poly2ds_list)
+
+
+def segtrack_to_bitmasks(frames: List[Frame],
+ out_base: str,
+ config: Config,
+ nproc: int=NPROC) -> None:
+ """Converting segmentation tracking poly2d to bitmasks."""
+ frames_list = group_and_sort(frames)
+ img_shape = config.imageSize
+
+ out_paths: List[str] = []
+ shapes: List[ImageSize] = []
+ colors_list: List[List[NDArrayU8]] = []
+ poly2ds_list: List[List[List[Poly2D]]] = []
+
+ categories = get_leaf_categories(config.categories)
+ cat_name2id = {cat.name: i + 1 for i, cat in enumerate(categories)}
+
+ logger.info("Preparing annotations for SegTrack to Bitmasks")
+
+ for video_anns in tqdm(frames_list):
+ global_instance_id: int = 1
+ instance_id_maps: Dict[str, int] = {}
+
+ video_name = video_anns[0].videoName
+ out_dir = os.path.join(out_base, video_name)
+ if not os.path.isdir(out_dir):
+ os.makedirs(out_dir)
+
+ for image_anns in video_anns:
+ # Bitmask in .png format
+ image_name = image_anns.name.replace(".jpg", ".png")
+ image_name = os.path.split(image_name)[-1]
+ out_path = os.path.join(out_dir, image_name)
+ out_paths.append(out_path)
+
+ if img_shape is None:
+ if image_anns.size is not None:
+ img_shape = image_anns.size
+ else:
+ raise ValueError("Image shape not defined!")
+ shapes.append(img_shape)
+
+ colors: List[NDArrayU8] = []
+ poly2ds: List[List[Poly2D]] = []
+ colors_list.append(colors)
+ poly2ds_list.append(poly2ds)
+
+ labels_ = image_anns.labels
+ if labels_ is None or len(labels_) == 0:
+ continue
+
+ # Scores higher, rendering later
+ if labels_[0].score is not None:
+ labels_ = sorted(labels_, key=lambda label: float(label.score))
+
+ for label in labels_:
+ if label.poly2d is None:
+ continue
+ if label.category not in cat_name2id:
+ continue
+
+ instance_id, global_instance_id = get_bdd100k_instance_id(
+ instance_id_maps, global_instance_id, label.id)
+ category_id = cat_name2id[label.category]
+ color = set_instance_color(label, category_id, instance_id)
+ colors.append(color)
+ poly2ds.append(label.poly2d)
+
+ logger.info("Start Conversion for SegTrack to Bitmasks")
+ frames_to_masks(nproc, out_paths, shapes, colors_list, poly2ds_list)
+
+
+def main() -> None:
+ """Main function."""
+ args = parse_args()
+ args.mode = "lane_mark"
+
+ os.environ["QT_QPA_PLATFORM"] = "offscreen" # matplotlib offscreen render
+
+ convert_funcs: Dict[str, ToMasksFunc] = dict(
+ sem_seg=semseg_to_masks,
+ drivable=drivable_to_masks,
+ lane_mark=lanemark_to_masks,
+ pan_seg=panseg_to_bitmasks,
+ ins_seg=insseg_to_bitmasks,
+ seg_track=segtrack_to_bitmasks, )
+
+ dataset = load(args.input, args.nproc)
+ if args.config is not None:
+ bdd100k_config = load_bdd100k_config(args.config)
+ elif dataset.config is not None:
+ bdd100k_config = BDD100KConfig(config=dataset.config)
+ else:
+ bdd100k_config = load_bdd100k_config(args.mode)
+
+ if args.mode in ["ins_seg", "seg_track"]:
+ frames = bdd100k_to_scalabel(dataset.frames, bdd100k_config)
+ else:
+ frames = dataset.frames
+
+ convert_funcs[args.mode](frames, args.output, bdd100k_config.scalabel,
+ args.nproc)
+
+ logger.info("Finished!")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docs/advanced_tutorials/customization/ppvehicle_violation.md b/docs/advanced_tutorials/customization/ppvehicle_violation.md
new file mode 100644
index 00000000000..b82fe97d333
--- /dev/null
+++ b/docs/advanced_tutorials/customization/ppvehicle_violation.md
@@ -0,0 +1,235 @@
+简体中文 | [English](./ppvehicle_violation_en.md)
+
+# 车辆违章任务二次开发
+
+车辆违章任务的二次开发,主要集中于车道线分割模型任务。采用PP-LiteSeg模型在车道线数据集bdd100k,上进行fine-tune得到,过程参考[PP-LiteSeg](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.7/configs/pp_liteseg/README.md)。
+
+## 数据准备
+
+ppvehicle违法分析将车道线类别分为4类
+```
+0 背景
+1 双黄线
+2 实线
+3 虚线
+
+```
+
+1. 对于bdd100k数据集,可以结合我们的提供的处理脚本[lane_to_mask.py](../../../deploy/pipeline/tools/lane_to_mask.py)和bdd100k官方[repo](https://github.com/bdd100k/bdd100k)将数据处理成分割需要的数据格式.
+
+```
+#首先执行以下命令clone bdd100k库:
+git clone https://github.com/bdd100k/bdd100k.git
+
+#拷贝lane_to_mask.py到bdd100k目录
+cp PaddleDetection/deploy/pipeline/tools/lane_to_mask.py bdd100k/
+
+#准备bdd100k环境
+cd bdd100k && pip install -r requirements.txt
+
+#数据转换
+python lane_to_mask.py -i dataset/labels/lane/polygons/lane_train.json -o /output_path
+
+# -i bdd100k数据集label的json路径,
+# -o 生成的mask图像路径
+
+```
+
+2. 整理数据,按如下格式存放数据
+```
+dataset_root
+ |
+ |--images
+ | |--train
+ | |--image1.jpg
+ | |--image2.jpg
+ | |--...
+ | |--val
+ | |--image3.jpg
+ | |--image4.jpg
+ | |--...
+ | |--test
+ | |--image5.jpg
+ | |--image6.jpg
+ | |--...
+ |
+ |--labels
+ | |--train
+ | |--label1.jpg
+ | |--label2.jpg
+ | |--...
+ | |--val
+ | |--label3.jpg
+ | |--label4.jpg
+ | |--...
+ | |--test
+ | |--label5.jpg
+ | |--label6.jpg
+ | |--...
+ |
+```
+运行[create_dataset_list.py](../../../deploy/pipeline/tools/create_dataset_list.py)生成txt文件
+```
+python create_dataset_list.py #数据根目录
+ --type custom #数据类型,支持cityscapes、custom
+
+
+```
+其他数据以及数据标注,可参考PaddleSeg[准备自定义数据集](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.7/docs/data/marker/marker_cn.md)
+
+
+## 模型训练
+
+首先执行以下命令clone PaddleSeg库代码到训练机器:
+```
+git clone https://github.com/PaddlePaddle/PaddleSeg.git
+```
+
+安装相关依赖环境:
+```
+cd PaddleSeg
+pip install -r requirements.txt
+```
+
+### 准备配置文件
+详细可参考PaddleSeg[准备配置文件](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.7/docs/config/pre_config_cn.md).
+本例用pp_liteseg_stdc2_bdd100k_1024x512.yml示例
+
+```
+batch_size: 16
+iters: 50000
+
+train_dataset:
+ type: Dataset
+ dataset_root: data/bdd100k #数据集路径
+ train_path: data/bdd100k/train.txt #数据集训练txt文件
+ num_classes: 4 #ppvehicle将道路分为4类
+ mode: train
+ transforms:
+ - type: ResizeStepScaling
+ min_scale_factor: 0.5
+ max_scale_factor: 2.0
+ scale_step_size: 0.25
+ - type: RandomPaddingCrop
+ crop_size: [512, 1024]
+ - type: RandomHorizontalFlip
+ - type: RandomAffine
+ - type: RandomDistort
+ brightness_range: 0.5
+ contrast_range: 0.5
+ saturation_range: 0.5
+ - type: Normalize
+
+val_dataset:
+ type: Dataset
+ dataset_root: data/bdd100k #数据集路径
+ val_path: data/bdd100k/val.txt #数据集验证集txt文件
+ num_classes: 4
+ mode: val
+ transforms:
+ - type: Normalize
+
+optimizer:
+ type: sgd
+ momentum: 0.9
+ weight_decay: 4.0e-5
+
+lr_scheduler:
+ type: PolynomialDecay
+ learning_rate: 0.01 #0.01
+ end_lr: 0
+ power: 0.9
+
+loss:
+ types:
+ - type: MixedLoss
+ losses:
+ - type: CrossEntropyLoss
+ - type: LovaszSoftmaxLoss
+ coef: [0.6, 0.4]
+ - type: MixedLoss
+ losses:
+ - type: CrossEntropyLoss
+ - type: LovaszSoftmaxLoss
+ coef: [0.6, 0.4]
+ - type: MixedLoss
+ losses:
+ - type: CrossEntropyLoss
+ - type: LovaszSoftmaxLoss
+ coef: [0.6, 0.4]
+ coef: [1, 1,1]
+
+
+model:
+ type: PPLiteSeg
+ backbone:
+ type: STDC2
+ pretrained: https://bj.bcebos.com/paddleseg/dygraph/PP_STDCNet2.tar.gz #预训练模型
+```
+
+### 执行训练
+
+```
+#单卡训练
+export CUDA_VISIBLE_DEVICES=0 # Linux上设置1张可用的卡
+# set CUDA_VISIBLE_DEVICES=0 # Windows上设置1张可用的卡
+
+python train.py \
+ --config configs/pp_liteseg/pp_liteseg_stdc2_bdd100k_1024x512.yml \
+ --do_eval \
+ --use_vdl \
+ --save_interval 500 \
+ --save_dir output
+
+```
+### 训练参数解释
+```
+--do_eval 是否在保存模型时启动评估, 启动时将会根据mIoU保存最佳模型至best_model
+--use_vdl 是否开启visualdl记录训练数据
+--save_interval 500 模型保存的间隔步数
+--save_dir output 模型输出路径
+```
+
+## 2、多卡训练
+如果想要使用多卡训练的话,需要将环境变量CUDA_VISIBLE_DEVICES指定为多卡(不指定时默认使用所有的gpu),并使用paddle.distributed.launch启动训练脚本(windows下由于不支持nccl,无法使用多卡训练):
+
+```
+export CUDA_VISIBLE_DEVICES=0,1,2,3 # 设置4张可用的卡
+python -m paddle.distributed.launch train.py \
+ --config configs/pp_liteseg/pp_liteseg_stdc2_bdd100k_1024x512.yml \
+ --do_eval \
+ --use_vdl \
+ --save_interval 500 \
+ --save_dir output
+```
+
+
+训练完成后可以执行以下命令进行性能评估:
+```
+#单卡评估
+python val.py \
+ --config configs/pp_liteseg/pp_liteseg_stdc2_bdd100k_1024x512.yml \
+ --model_path output/iter_1000/model.pdparams
+```
+
+
+### 模型导出
+
+使用下述命令将训练好的模型导出为预测部署模型。
+
+```
+python export.py \
+ --config configs/pp_liteseg/pp_liteseg_stdc2_bdd100k_1024x512.yml \
+ --model_path output/iter_1000/model.pdparams \
+ --save_dir output/inference_model
+```
+
+
+使用时在PP-Vehicle中的配置文件`./deploy/pipeline/config/infer_cfg_ppvehicle.yml`中修改`LANE_SEG`模块中的`model_dir`项.
+```
+LANE_SEG:
+ lane_seg_config: deploy/pipeline/config/lane_seg_config.yml
+ model_dir: output/inference_model
+```
+
+然后可以使用-->至此即完成更新车道线分割模型任务。
diff --git a/docs/advanced_tutorials/customization/ppvehicle_violation_en.md b/docs/advanced_tutorials/customization/ppvehicle_violation_en.md
new file mode 100644
index 00000000000..9b96e8a60ea
--- /dev/null
+++ b/docs/advanced_tutorials/customization/ppvehicle_violation_en.md
@@ -0,0 +1,240 @@
+English | [简体中文](./ppvehicle_violation.md)
+
+# Customized Vehicle Violation
+
+The secondary development of vehicle violation task mainly focuses on the task of lane line segmentation model. PP-LiteSeg model is used to get the lane line data set bdd100k through fine-tune. The process is referred to [PP-LiteSeg](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.7/configs/pp_liteseg/README.md)。
+
+## Data preparation
+
+ppvehicle violation analysis divides the lane line into 4 categories
+```
+0 Background
+
+1 double yellow line
+
+2 Solid line
+
+3 Dashed line
+
+```
+
+1. For the bdd100k data set, we can combine the processing script provided by [lane_to_mask.py](../../../deploy/pipeline/tools/lane_to_mask.py) and bdd100k [repo](https://github.com/bdd100k/bdd100k) to process the data into the data format required for segmentation.
+
+
+```
+# clone bdd100k:
+git clone https://github.com/bdd100k/bdd100k.git
+
+# copy lane_to_mask.py to bdd100k/
+cp PaddleDetection/deploy/pipeline/tools/lane_to_mask.py bdd100k/
+
+# preparation bdd100k env
+cd bdd100k && pip install -r requirements.txt
+
+#bdd100k to mask
+python lane_to_mask.py -i dataset/labels/lane/polygons/lane_train.json -o /output_path
+
+# -i means input path for bdd100k dataset label json,
+# -o for output patn
+
+```
+
+2. Organize data and store data in the following format:
+```
+dataset_root
+ |
+ |--images
+ | |--train
+ | |--image1.jpg
+ | |--image2.jpg
+ | |--...
+ | |--val
+ | |--image3.jpg
+ | |--image4.jpg
+ | |--...
+ | |--test
+ | |--image5.jpg
+ | |--image6.jpg
+ | |--...
+ |
+ |--labels
+ | |--train
+ | |--label1.jpg
+ | |--label2.jpg
+ | |--...
+ | |--val
+ | |--label3.jpg
+ | |--label4.jpg
+ | |--...
+ | |--test
+ | |--label5.jpg
+ | |--label6.jpg
+ | |--...
+ |
+```
+
+run [create_dataset_list.py](../../../deploy/pipeline/tools/create_dataset_list.py) create txt file
+
+```
+python create_dataset_list.py #dataset path
+ --type custom #dataset type,support cityscapes、custom
+
+```
+
+For other data and data annotation, please refer to PaddleSeg [Prepare Custom Datasets](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.7/docs/data/marker/marker_cn.md)
+
+
+## model training
+
+clone PaddleSeg:
+```
+git clone https://github.com/PaddlePaddle/PaddleSeg.git
+```
+
+prepapation env:
+```
+cd PaddleSeg
+pip install -r requirements.txt
+```
+
+### Prepare configuration file
+For details, please refer to PaddleSeg [prepare configuration file](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.7/docs/config/pre_config_cn.md).
+
+exp: pp_liteseg_stdc2_bdd100k_1024x512.yml
+
+```
+batch_size: 16
+iters: 50000
+
+train_dataset:
+ type: Dataset
+ dataset_root: data/bdd100k #dataset path
+ train_path: data/bdd100k/train.txt #dataset train txt
+ num_classes: 4 #lane classes
+ mode: train
+ transforms:
+ - type: ResizeStepScaling
+ min_scale_factor: 0.5
+ max_scale_factor: 2.0
+ scale_step_size: 0.25
+ - type: RandomPaddingCrop
+ crop_size: [512, 1024]
+ - type: RandomHorizontalFlip
+ - type: RandomAffine
+ - type: RandomDistort
+ brightness_range: 0.5
+ contrast_range: 0.5
+ saturation_range: 0.5
+ - type: Normalize
+
+val_dataset:
+ type: Dataset
+ dataset_root: data/bdd100k #dataset path
+ val_path: data/bdd100k/val.txt #dataset val txt
+ num_classes: 4
+ mode: val
+ transforms:
+ - type: Normalize
+
+optimizer:
+ type: sgd
+ momentum: 0.9
+ weight_decay: 4.0e-5
+
+lr_scheduler:
+ type: PolynomialDecay
+ learning_rate: 0.01 #0.01
+ end_lr: 0
+ power: 0.9
+
+loss:
+ types:
+ - type: MixedLoss
+ losses:
+ - type: CrossEntropyLoss
+ - type: LovaszSoftmaxLoss
+ coef: [0.6, 0.4]
+ - type: MixedLoss
+ losses:
+ - type: CrossEntropyLoss
+ - type: LovaszSoftmaxLoss
+ coef: [0.6, 0.4]
+ - type: MixedLoss
+ losses:
+ - type: CrossEntropyLoss
+ - type: LovaszSoftmaxLoss
+ coef: [0.6, 0.4]
+ coef: [1, 1,1]
+
+
+model:
+ type: PPLiteSeg
+ backbone:
+ type: STDC2
+ pretrained: https://bj.bcebos.com/paddleseg/dygraph/PP_STDCNet2.tar.gz #Pre-training model
+```
+
+### training model
+
+```
+#Single GPU training
+export CUDA_VISIBLE_DEVICES=0 # Linux
+# set CUDA_VISIBLE_DEVICES=0 # Windows
+python train.py \
+ --config configs/pp_liteseg/pp_liteseg_stdc2_bdd100k_1024x512.yml \
+ --do_eval \
+ --use_vdl \
+ --save_interval 500 \
+ --save_dir output
+
+```
+### Explanation of training parameters
+```
+--do_eval Whether to start the evaluation when saving the model. When starting, the best model will be saved to best according to mIoU model
+--use_vdl Whether to enable visualdl to record training data
+--save_interval 500 Number of steps between model saving
+--save_dir output Model output path
+```
+
+## 2、Multiple GPUs training
+if you want to use multiple gpus training, you need to set the environment variable CUDA_VISIBLE_DEVICES is specified as multiple gpus (if not specified, all gpus will be used by default), and the training script will be started using paddle.distributed.launch (because nccl is not supported under windows, multi-card training cannot be used):
+
+```
+export CUDA_VISIBLE_DEVICES=0,1,2,3 # 4 gpus
+python -m paddle.distributed.launch train.py \
+ --config configs/pp_liteseg/pp_liteseg_stdc2_bdd100k_1024x512.yml \
+ --do_eval \
+ --use_vdl \
+ --save_interval 500 \
+ --save_dir output
+```
+
+
+After training, you can execute the following commands for performance evaluation:
+```
+python val.py \
+ --config configs/pp_liteseg/pp_liteseg_stdc2_bdd100k_1024x512.yml \
+ --model_path output/iter_1000/model.pdparams
+```
+
+
+### Model export
+
+Use the following command to export the trained model as a prediction deployment model.
+
+```
+python export.py \
+ --config configs/pp_liteseg/pp_liteseg_stdc2_bdd100k_1024x512.yml \
+ --model_path output/iter_1000/model.pdparams \
+ --save_dir output/inference_model
+```
+
+
+Profile in PP-Vehicle when used `./deploy/pipeline/config/infer_cfg_ppvehicle.yml` set `model_dir` in `LANE_SEG`.
+```
+LANE_SEG:
+ lane_seg_config: deploy/pipeline/config/lane_seg_config.yml
+ model_dir: output/inference_model
+```
+
+Then you can use -->to finish the task of updating the lane line segmentation model.
From 613b640de221bc32e999a8cd3d6fc1df865da5fc Mon Sep 17 00:00:00 2001
From: chenxujun
Date: Mon, 20 Feb 2023 18:50:55 +0800
Subject: [PATCH 020/116] Update cam_ppdet.py (#7789)
---
tools/cam_ppdet.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/cam_ppdet.py b/tools/cam_ppdet.py
index c8922b9353f..f65b55987e6 100644
--- a/tools/cam_ppdet.py
+++ b/tools/cam_ppdet.py
@@ -4,7 +4,7 @@
import os
import sys
-# add python path of PadleDetection to sys.path
+# add python path of PaddleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
sys.path.insert(0, parent_path)
From 66f391a8da6bacbdf3da04c6ddf21e113ac8e812 Mon Sep 17 00:00:00 2001
From: thinkthinking <61035602+thinkthinking@users.noreply.github.com>
Date: Tue, 21 Feb 2023 21:28:47 +0800
Subject: [PATCH 021/116] [Docs]update v2.6 readme.md (#7804)
---
README_cn.md | 43 +++++++++----------------------------------
1 file changed, 9 insertions(+), 34 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index db1fe7f4430..15b0896cbb7 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -64,32 +64,7 @@ PaddleDetection整理工业、农业、林业、交通、医疗、金融、能
## 📣最新进展
-**💎稳定版本**
-
-位于[`release/2.5`](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5)分支,最新的[**v2.5**](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5)版本已经在 2022.09.13 发布,版本发新详细内容请参考[v2.5.0更新日志](https://github.com/PaddlePaddle/PaddleDetection/releases/tag/v2.5.0),重点更新:
-- [🎗️产业特色模型|产业工具](#️产业特色模型产业工具-1):
- - 发布[PP-YOLOE+](configs/ppyoloe),最高精度提升2.4% mAP,达到54.9% mAP,模型训练收敛速度提升3.75倍,端到端预测速度最高提升2.3倍;多个下游任务泛化性提升
- - 发布[PicoDet-NPU](configs/picodet)模型,支持模型全量化部署;新增[PicoDet](configs/picodet)版面分析模型
- - 发布[PP-TinyPose升级版](./configs/keypoint/tiny_pose/)增强版,在健身、舞蹈等场景精度提升9.1% AP,支持侧身、卧躺、跳跃、高抬腿等非常规动作
- - 发布行人分析工具[PP-Human v2](./deploy/pipeline),新增打架、打电话、抽烟、闯入四大行为识别,底层算法性能升级,覆盖行人检测、跟踪、属性三类核心算法能力,提供保姆级全流程开发及模型优化策略,支持在线视频流输入
- - 首次发布[PP-Vehicle](./deploy/pipeline),提供车牌识别、车辆属性分析(颜色、车型)、车流量统计以及违章检测四大功能,兼容图片、在线视频流、视频输入,提供完善的二次开发文档教程
-- [📱模型库](#模型库):
- - 全面覆盖的[YOLO家族](https://github.com/PaddlePaddle/PaddleYOLO/tree/develop/docs/MODEL_ZOO_cn.md)经典与最新算法模型的代码库[PaddleYOLO](https://github.com/PaddlePaddle/PaddleYOLO): 包括YOLOv3,百度飞桨自研的实时高精度目标检测模型PP-YOLOE,以及前沿检测算法YOLOv4、YOLOv5、YOLOX,YOLOv6、YOLOv7以及YOLOv8
- - 新增基于[ViT](configs/vitdet)骨干网络高精度检测模型,COCO数据集精度达到55.7% mAP;新增[OC-SORT](configs/mot/ocsort)多目标跟踪模型;新增[ConvNeXt](configs/convnext)骨干网络
-- [💡产业实践范例](#产业实践范例):
- - 新增[智能健身](https://aistudio.baidu.com/aistudio/projectdetail/4385813)、[打架识别](https://aistudio.baidu.com/aistudio/projectdetail/4086987?channelType=0&channel=0)、[来客分析](https://aistudio.baidu.com/aistudio/projectdetail/4230123?channelType=0&channel=0)
-
-**🧬预览版本**
-
-位于[`develop`](https://github.com/PaddlePaddle/PaddleDetection/tree/develop)分支,体验最新功能请切换到[该分支](https://github.com/PaddlePaddle/PaddleDetection/tree/develop),最近更新:
-- [📱模型库](#模型库):
- - 新增[少样本迁移学习](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/few-shot);
- - 新增[半监督检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/semi_det);
- - 新增[YOLOv8](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.5/configs/yolov8);
- - 更新[YOLOv6-v3.0](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.5/configs/yolov6);
-- [🎗️产业特色模型|产业工具](#️产业特色模型产业工具-1):
- - 发布**旋转框检测模型**[PP-YOLOE-R](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r):Anchor-free旋转框检测SOTA模型,精度速度双高、云边一体,s/m/l/x四个模型适配不用算力硬件、部署友好,避免使用特殊算子,能够轻松使用TensorRT加速;
- - 发布**小目标检测模型**[PP-YOLOE-SOD](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/smalldet):基于切图的端到端检测方案、基于原图的检测模型,精度达VisDrone开源最优;
+PaddleDetection 2.6版本发布! [点击查看版本更新介绍](https://github.com/PaddlePaddle/PaddleDetection/releases/tag/v2.6.0)
## 👫开源社区
@@ -106,7 +81,7 @@ PaddleDetection整理工业、农业、林业、交通、医疗、金融、能
- 10+工业安防交通全流程项目实操(含源码)
-

+
PaddleDetection官方交流群二维码
@@ -374,8 +349,8 @@ PaddleDetection整理工业、农业、林业、交通、医疗、金融、能
PP-YOLOv2
PP-YOLOE
PP-YOLOE+
- PP-YOLOE-SOD
- PP-YOLOE-R
+ PP-YOLOE-SOD
+ PP-YOLOE-R
SSD
CenterNet
FCOS
@@ -427,7 +402,7 @@ PaddleDetection整理工业、农业、林业、交通、医疗、金融、能
Semi-Supervised Detection
3D Detection
@@ -548,7 +523,7 @@ PP-YOLOE是基于PP-YOLOv2的卓越的单阶段Anchor-free模型,超越了多
PP-YOLOE-R是一个高效的单阶段Anchor-free旋转框检测模型,基于PP-YOLOE+引入了一系列改进策略来提升检测精度。根据不同的硬件对精度和速度的要求,PP-YOLOE-R包含s/m/l/x四个尺寸的模型。在DOTA 1.0数据集上,PP-YOLOE-R-l和PP-YOLOE-R-x在单尺度训练和测试的情况下分别达到了78.14mAP和78.28 mAP,这在单尺度评估下超越了几乎所有的旋转框检测模型。通过多尺度训练和测试,PP-YOLOE-R-l和PP-YOLOE-R-x的检测精度进一步提升至80.02mAP和80.73 mAP,超越了所有的Anchor-free方法并且和最先进的Anchor-based的两阶段模型精度几乎相当。在保持高精度的同时,PP-YOLOE-R避免使用特殊的算子,例如Deformable Convolution或Rotated RoI Align,使其能轻松地部署在多种多样的硬件上。
-`传送门`:[PP-YOLOE-R说明](https://github.com/thinkthinking/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r)。
+`传送门`:[PP-YOLOE-R说明](configs/rotate/ppyoloe_r)。
`传送门`:[arXiv论文](https://arxiv.org/abs/2211.02386)。
@@ -561,7 +536,7 @@ PP-YOLOE-R是一个高效的单阶段Anchor-free旋转框检测模型,基于PP
| :----------: | :------: | :---: | :-----------------: | :------------------------: | :--------: | :-------: | :--------: | :------: | :------: | :-----: | :-----------: | :---------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------: |
| PP-YOLOE-R-l | CRN-l | 80.02 | 69.7 | 48.3 | 53.29 | 281.65 | 3x | oc | MS+RR | 4 | 2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_r_crn_l_3x_dota_ms.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r/ppyoloe_r_crn_l_3x_dota_ms.yml) |
-`传送门`:[全部预训练模型](https://github.com/thinkthinking/PaddleDetection/tree/develop/configs/rotate/ppyoloe_r)。
+`传送门`:[全部预训练模型](configs/rotate/ppyoloe_r)。
@@ -579,7 +554,7 @@ PP-YOLOE-R是一个高效的单阶段Anchor-free旋转框检测模型,基于PP
PP-YOLOE-SOD(Small Object Detection)是PaddleDetection团队针对小目标检测提出的检测方案,在VisDrone-DET数据集上单模型精度达到38.5mAP,达到了SOTA性能。其分别基于切图拼图流程优化的小目标检测方案以及基于原图模型算法优化的小目标检测方案。同时提供了数据集自动分析脚本,只需输入数据集标注文件,便可得到数据集统计结果,辅助判断数据集是否是小目标数据集以及是否需要采用切图策略,同时给出网络超参数参考值。
-`传送门`:[PP-YOLOE-SOD 小目标检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/smalldet)。
+`传送门`:[PP-YOLOE-SOD 小目标检测模型](configs/smalldet)。
@@ -591,7 +566,7 @@ PP-YOLOE-SOD(Small Object Detection)是PaddleDetection团队针对小目标检
| :------------------ | :-----------------------------: | :------------------------: | :----------------------------------: | :-----------------------------: | :------------------------------------: | :-------------------------------: | :---------------------------------------------------------------------------------------------: | :----------------------------------------------------------: |
| **PP-YOLOE+_SOD-l** | **31.9** | **52.1** | **25.6** | **43.5** | **30.25** | **51.18** | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_sod_crn_l_80e_visdrone.pdparams) | [配置文件](visdrone/ppyoloe_plus_sod_crn_l_80e_visdrone.yml) |
-`传送门`:[全部预训练模型](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/smalldet)。
+`传送门`:[全部预训练模型](configs/smalldet)。
From 2685fb569bc40f8ef80002ab919ac4eac9f6a096 Mon Sep 17 00:00:00 2001
From: wangguanzhong
Date: Wed, 22 Feb 2023 16:00:18 +0800
Subject: [PATCH 022/116] fix unittest (#7806)
---
configs/datasets/voc.yml | 24 ++++++++++++------------
configs/runtime.yml | 1 +
ppdet/core/workspace.py | 11 ++++++++++-
ppdet/engine/trainer.py | 8 ++++----
4 files changed, 27 insertions(+), 17 deletions(-)
diff --git a/configs/datasets/voc.yml b/configs/datasets/voc.yml
index 9fb492f03d1..72182bed9d1 100644
--- a/configs/datasets/voc.yml
+++ b/configs/datasets/voc.yml
@@ -3,19 +3,19 @@ map_type: 11point
num_classes: 20
TrainDataset:
- !VOCDataSet
- dataset_dir: dataset/voc
- anno_path: trainval.txt
- label_list: label_list.txt
- data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+ name: VOCDataSet
+ dataset_dir: dataset/voc
+ anno_path: trainval.txt
+ label_list: label_list.txt
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
EvalDataset:
- !VOCDataSet
- dataset_dir: dataset/voc
- anno_path: test.txt
- label_list: label_list.txt
- data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+ name: VOCDataSet
+ dataset_dir: dataset/voc
+ anno_path: test.txt
+ label_list: label_list.txt
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
TestDataset:
- !ImageFolder
- anno_path: dataset/voc/label_list.txt
+ name: ImageFolder
+ anno_path: dataset/voc/label_list.txt
diff --git a/configs/runtime.yml b/configs/runtime.yml
index c0920da5564..a58b171ce77 100644
--- a/configs/runtime.yml
+++ b/configs/runtime.yml
@@ -1,6 +1,7 @@
use_gpu: true
use_xpu: false
use_mlu: false
+use_npu: false
log_iter: 20
save_dir: output
snapshot_epoch: 1
diff --git a/ppdet/core/workspace.py b/ppdet/core/workspace.py
index b3c932c0a82..6735bcfc26d 100644
--- a/ppdet/core/workspace.py
+++ b/ppdet/core/workspace.py
@@ -67,6 +67,15 @@ def __getattr__(self, key):
return self[key]
raise AttributeError("object has no attribute '{}'".format(key))
+ def __setattr__(self, key, value):
+ self[key] = value
+
+ def copy(self):
+ new_dict = AttrDict()
+ for k, v in self.items():
+ new_dict.update({k: v})
+ return new_dict
+
global_config = AttrDict()
@@ -280,4 +289,4 @@ def create(cls_or_name, **kwargs):
# prevent modification of global config values of reference types
# (e.g., list, dict) from within the created module instances
#kwargs = copy.deepcopy(kwargs)
- return cls(**cls_kwargs)
\ No newline at end of file
+ return cls(**cls_kwargs)
diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py
index ae0e21d8ea4..0378e00ecb5 100644
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -62,7 +62,7 @@
class Trainer(object):
def __init__(self, cfg, mode='train'):
- self.cfg = cfg
+ self.cfg = cfg.copy()
assert mode.lower() in ['train', 'eval', 'test'], \
"mode should be 'train', 'eval' or 'test'"
self.mode = mode.lower()
@@ -99,12 +99,12 @@ def __init__(self, cfg, mode='train'):
self.dataset, cfg.worker_num)
if cfg.architecture == 'JDE' and self.mode == 'train':
- cfg['JDEEmbeddingHead'][
+ self.cfg['JDEEmbeddingHead'][
'num_identities'] = self.dataset.num_identities_dict[0]
# JDE only support single class MOT now.
if cfg.architecture == 'FairMOT' and self.mode == 'train':
- cfg['FairMOTEmbeddingHead'][
+ self.cfg['FairMOTEmbeddingHead'][
'num_identities_dict'] = self.dataset.num_identities_dict
# FairMOT support single class and multi-class MOT now.
@@ -149,7 +149,7 @@ def __init__(self, cfg, mode='train'):
reader_name = '{}Reader'.format(self.mode.capitalize())
# If metric is VOC, need to be set collate_batch=False.
if cfg.metric == 'VOC':
- cfg[reader_name]['collate_batch'] = False
+ self.cfg[reader_name]['collate_batch'] = False
self.loader = create(reader_name)(self.dataset, cfg.worker_num,
self._eval_batch_sampler)
# TestDataset build after user set images, skip loader creation here
From 4c04ce743f861c4245c5d4446e5ef376eb75e165 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Wed, 22 Feb 2023 19:07:24 +0800
Subject: [PATCH 023/116] keypoint petr (#7774)
* petr train ok
train ok
refix augsize
affine size fix
update msdeformable
fix flip/affine
fix clip
add resize area
add distortion
debug mode
fix pos_inds
update edge joints
update word mistake
* delete extra codes;adapt transformer modify;update code format
* reverse old transformer modify
* integrate datasets
---
.gitignore | 6 +-
configs/keypoint/README.md | 2 +
configs/keypoint/README_en.md | 1 +
.../keypoint/petr/petr_resnet50_16x2_coco.yml | 255 ++++
docs/tutorials/data/PrepareKeypointDataSet.md | 2 +-
.../data/PrepareKeypointDataSet_en.md | 2 +-
ppdet/data/source/keypoint_coco.py | 95 +-
ppdet/data/transform/batch_operators.py | 115 +-
ppdet/data/transform/keypoint_operators.py | 733 +++++++++-
ppdet/data/transform/operators.py | 318 ++++-
ppdet/modeling/architectures/__init__.py | 1 +
ppdet/modeling/architectures/keypoint_petr.py | 217 +++
ppdet/modeling/assigners/__init__.py | 2 +
.../modeling/assigners/hungarian_assigner.py | 316 +++++
ppdet/modeling/assigners/pose_utils.py | 275 ++++
ppdet/modeling/backbones/resnet.py | 60 +-
.../modeling/backbones/vision_transformer.py | 4 +-
ppdet/modeling/heads/__init__.py | 1 +
ppdet/modeling/heads/petr_head.py | 1161 ++++++++++++++++
ppdet/modeling/layers.py | 2 +-
ppdet/modeling/losses/focal_loss.py | 79 +-
ppdet/modeling/losses/keypoint_loss.py | 406 +++++-
ppdet/modeling/necks/__init__.py | 1 +
ppdet/modeling/necks/channel_mapper.py | 122 ++
ppdet/modeling/transformers/__init__.py | 1 +
.../modeling/transformers/petr_transformer.py | 1198 +++++++++++++++++
ppdet/utils/visualizer.py | 2 +-
27 files changed, 5215 insertions(+), 162 deletions(-)
create mode 100644 configs/keypoint/petr/petr_resnet50_16x2_coco.yml
create mode 100644 ppdet/modeling/architectures/keypoint_petr.py
create mode 100644 ppdet/modeling/assigners/hungarian_assigner.py
create mode 100644 ppdet/modeling/assigners/pose_utils.py
create mode 100644 ppdet/modeling/heads/petr_head.py
create mode 100644 ppdet/modeling/necks/channel_mapper.py
create mode 100644 ppdet/modeling/transformers/petr_transformer.py
diff --git a/.gitignore b/.gitignore
index 6a98a38b72e..4b6a6e82463 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,9 +18,9 @@ __pycache__/
# Distribution / packaging
/bin/
-/build/
+*build/
/develop-eggs/
-/dist/
+*dist/
/eggs/
/lib/
/lib64/
@@ -30,7 +30,7 @@ __pycache__/
/parts/
/sdist/
/var/
-/*.egg-info/
+*.egg-info/
/.installed.cfg
/*.egg
/.eggs
diff --git a/configs/keypoint/README.md b/configs/keypoint/README.md
index 74f10404a07..c93932d7360 100644
--- a/configs/keypoint/README.md
+++ b/configs/keypoint/README.md
@@ -56,8 +56,10 @@ PaddleDetection 中的关键点检测部分紧跟最先进的算法,包括 Top
## 模型库
COCO数据集
+
| 模型 | 方案 |输入尺寸 | AP(coco val) | 模型下载 | 配置文件 |
| :---------------- | -------- | :----------: | :----------------------------------------------------------: | ----------------------------------------------------| ------- |
+| PETR_Res50 |One-Stage| 512 | 65.5 | [petr_res50.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/petr_resnet50_16x2_coco.pdparams) | [config](./petr/petr_resnet50_16x2_coco.yml) |
| HigherHRNet-w32 |Bottom-Up| 512 | 67.1 | [higherhrnet_hrnet_w32_512.pdparams](https://paddledet.bj.bcebos.com/models/keypoint/higherhrnet_hrnet_w32_512.pdparams) | [config](./higherhrnet/higherhrnet_hrnet_w32_512.yml) |
| HigherHRNet-w32 | Bottom-Up| 640 | 68.3 | [higherhrnet_hrnet_w32_640.pdparams](https://paddledet.bj.bcebos.com/models/keypoint/higherhrnet_hrnet_w32_640.pdparams) | [config](./higherhrnet/higherhrnet_hrnet_w32_640.yml) |
| HigherHRNet-w32+SWAHR |Bottom-Up| 512 | 68.9 | [higherhrnet_hrnet_w32_512_swahr.pdparams](https://paddledet.bj.bcebos.com/models/keypoint/higherhrnet_hrnet_w32_512_swahr.pdparams) | [config](./higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml) |
diff --git a/configs/keypoint/README_en.md b/configs/keypoint/README_en.md
index cfb2e7c8f9b..15f659645c3 100644
--- a/configs/keypoint/README_en.md
+++ b/configs/keypoint/README_en.md
@@ -62,6 +62,7 @@ At the same time, PaddleDetection provides a self-developed real-time keypoint d
COCO Dataset
| Model | Input Size | AP(coco val) | Model Download | Config File |
| :---------------- | -------- | :----------: | :----------------------------------------------------------: | ----------------------------------------------------------- |
+| PETR_Res50 |One-Stage| 512 | 65.5 | [petr_res50.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/petr_resnet50_16x2_coco.pdparams) | [config](./petr/petr_resnet50_16x2_coco.yml) |
| HigherHRNet-w32 | 512 | 67.1 | [higherhrnet_hrnet_w32_512.pdparams](https://paddledet.bj.bcebos.com/models/keypoint/higherhrnet_hrnet_w32_512.pdparams) | [config](./higherhrnet/higherhrnet_hrnet_w32_512.yml) |
| HigherHRNet-w32 | 640 | 68.3 | [higherhrnet_hrnet_w32_640.pdparams](https://paddledet.bj.bcebos.com/models/keypoint/higherhrnet_hrnet_w32_640.pdparams) | [config](./higherhrnet/higherhrnet_hrnet_w32_640.yml) |
| HigherHRNet-w32+SWAHR | 512 | 68.9 | [higherhrnet_hrnet_w32_512_swahr.pdparams](https://paddledet.bj.bcebos.com/models/keypoint/higherhrnet_hrnet_w32_512_swahr.pdparams) | [config](./higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml) |
diff --git a/configs/keypoint/petr/petr_resnet50_16x2_coco.yml b/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
new file mode 100644
index 00000000000..d6415ad3b8b
--- /dev/null
+++ b/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
@@ -0,0 +1,255 @@
+use_gpu: true
+log_iter: 50
+save_dir: output
+snapshot_epoch: 1
+weights: output/petr_resnet50_16x2_coco/model_final
+epoch: 100
+num_joints: &num_joints 17
+pixel_std: &pixel_std 200
+metric: COCO
+num_classes: 1
+trainsize: &trainsize 512
+flip_perm: &flip_perm [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+find_unused_parameters: False
+
+#####model
+architecture: PETR
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PETR_pretrained.pdparams
+
+PETR:
+ backbone:
+ name: ResNet
+ depth: 50
+ variant: b
+ norm_type: bn
+ freeze_norm: True
+ freeze_at: 0
+ return_idx: [1,2,3]
+ num_stages: 4
+ lr_mult_list: [0.1, 0.1, 0.1, 0.1]
+ neck:
+ name: ChannelMapper
+ in_channels: [512, 1024, 2048]
+ kernel_size: 1
+ out_channels: 256
+ norm_type: "gn"
+ norm_groups: 32
+ act: None
+ num_outs: 4
+ bbox_head:
+ name: PETRHead
+ num_query: 300
+ num_classes: 1 # only person
+ in_channels: 2048
+ sync_cls_avg_factor: true
+ with_kpt_refine: true
+ transformer:
+ name: PETRTransformer
+ as_two_stage: true
+ encoder:
+ name: TransformerEncoder
+ encoder_layer:
+ name: TransformerEncoderLayer
+ d_model: 256
+ attn:
+ name: MSDeformableAttention
+ embed_dim: 256
+ num_heads: 8
+ num_levels: 4
+ num_points: 4
+ dim_feedforward: 1024
+ dropout: 0.1
+ num_layers: 6
+ decoder:
+ name: PETR_TransformerDecoder
+ num_layers: 3
+ return_intermediate: true
+ decoder_layer:
+ name: PETR_TransformerDecoderLayer
+ d_model: 256
+ dim_feedforward: 1024
+ dropout: 0.1
+ self_attn:
+ name: MultiHeadAttention
+ embed_dim: 256
+ num_heads: 8
+ dropout: 0.1
+ cross_attn:
+ name: MultiScaleDeformablePoseAttention
+ embed_dims: 256
+ num_heads: 8
+ num_levels: 4
+ num_points: 17
+ hm_encoder:
+ name: TransformerEncoder
+ encoder_layer:
+ name: TransformerEncoderLayer
+ d_model: 256
+ attn:
+ name: MSDeformableAttention
+ embed_dim: 256
+ num_heads: 8
+ num_levels: 1
+ num_points: 4
+ dim_feedforward: 1024
+ dropout: 0.1
+ num_layers: 1
+ refine_decoder:
+ name: PETR_DeformableDetrTransformerDecoder
+ num_layers: 2
+ return_intermediate: true
+ decoder_layer:
+ name: PETR_TransformerDecoderLayer
+ d_model: 256
+ dim_feedforward: 1024
+ dropout: 0.1
+ self_attn:
+ name: MultiHeadAttention
+ embed_dim: 256
+ num_heads: 8
+ dropout: 0.1
+ cross_attn:
+ name: MSDeformableAttention
+ embed_dim: 256
+ num_levels: 4
+ positional_encoding:
+ name: PositionEmbedding
+ num_pos_feats: 128
+ normalize: true
+ offset: -0.5
+ loss_cls:
+ name: Weighted_FocalLoss
+ use_sigmoid: true
+ gamma: 2.0
+ alpha: 0.25
+ loss_weight: 2.0
+ reduction: "mean"
+ loss_kpt:
+ name: L1Loss
+ loss_weight: 70.0
+ loss_kpt_rpn:
+ name: L1Loss
+ loss_weight: 70.0
+ loss_oks:
+ name: OKSLoss
+ loss_weight: 2.0
+ loss_hm:
+ name: CenterFocalLoss
+ loss_weight: 4.0
+ loss_kpt_refine:
+ name: L1Loss
+ loss_weight: 80.0
+ loss_oks_refine:
+ name: OKSLoss
+ loss_weight: 3.0
+ assigner:
+ name: PoseHungarianAssigner
+ cls_cost:
+ name: FocalLossCost
+ weight: 2.0
+ kpt_cost:
+ name: KptL1Cost
+ weight: 70.0
+ oks_cost:
+ name: OksCost
+ weight: 7.0
+
+#####optimizer
+LearningRate:
+ base_lr: 0.0002
+ schedulers:
+ - !PiecewiseDecay
+ milestones: [80]
+ gamma: 0.1
+ use_warmup: false
+ # - !LinearWarmup
+ # start_factor: 0.001
+ # steps: 1000
+
+OptimizerBuilder:
+ clip_grad_by_norm: 0.1
+ optimizer:
+ type: AdamW
+ regularizer:
+ factor: 0.0001
+ type: L2
+
+
+#####data
+TrainDataset:
+ !KeypointBottomUpCocoDataset
+ image_dir: train2017
+ anno_path: annotations/person_keypoints_train2017.json
+ dataset_dir: dataset/coco
+ num_joints: *num_joints
+ return_mask: false
+
+EvalDataset:
+ !KeypointBottomUpCocoDataset
+ image_dir: val2017
+ anno_path: annotations/person_keypoints_val2017.json
+ dataset_dir: dataset/coco
+ num_joints: *num_joints
+ test_mode: true
+ return_mask: false
+
+TestDataset:
+ !ImageFolder
+ anno_path: dataset/coco/keypoint_imagelist.txt
+
+worker_num: 2
+global_mean: &global_mean [0.485, 0.456, 0.406]
+global_std: &global_std [0.229, 0.224, 0.225]
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - PhotoMetricDistortion:
+ brightness_delta: 32
+ contrast_range: [0.5, 1.5]
+ saturation_range: [0.5, 1.5]
+ hue_delta: 18
+ - KeyPointFlip:
+ flip_prob: 0.5
+ flip_permutation: *flip_perm
+ - RandomAffine:
+ max_degree: 30
+ scale: [1.0, 1.0]
+ max_shift: 0.
+ trainsize: -1
+ - RandomSelect: { transforms1: [ RandomShortSideRangeResize: { scales: [[400, 1400], [1400, 1400]]} ],
+ transforms2: [
+ RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+ RandomSizeCrop: { min_size: 384, max_size: 600},
+ RandomShortSideRangeResize: { scales: [[400, 1400], [1400, 1400]]} ]}
+ batch_transforms:
+ - NormalizeImage: {mean: *global_mean, std: *global_std, is_scale: True}
+ - PadGT: {pad_img: True, minimum_gtnum: 1}
+ - Permute: {}
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+ use_shared_memory: true
+ collate_batch: true
+
+EvalReader:
+ sample_transforms:
+ - PETR_Resize: {img_scale: [[800, 1333]], keep_ratio: True}
+ # - MultiscaleTestResize: {origin_target_size: [[800, 1333]], use_flip: false}
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 1
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - EvalAffine:
+ size: *trainsize
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 1
diff --git a/docs/tutorials/data/PrepareKeypointDataSet.md b/docs/tutorials/data/PrepareKeypointDataSet.md
index 4efa90b8d2b..27d844c0348 100644
--- a/docs/tutorials/data/PrepareKeypointDataSet.md
+++ b/docs/tutorials/data/PrepareKeypointDataSet.md
@@ -82,7 +82,7 @@ MPII keypoint indexes:
```
{
'joints_vis': [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
- 'joints': [
+ 'gt_joints': [
[-1.0, -1.0],
[-1.0, -1.0],
[-1.0, -1.0],
diff --git a/docs/tutorials/data/PrepareKeypointDataSet_en.md b/docs/tutorials/data/PrepareKeypointDataSet_en.md
index 80272910cee..6ed566d171a 100644
--- a/docs/tutorials/data/PrepareKeypointDataSet_en.md
+++ b/docs/tutorials/data/PrepareKeypointDataSet_en.md
@@ -82,7 +82,7 @@ The following example takes a parsed annotation information to illustrate the co
```
{
'joints_vis': [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
- 'joints': [
+ 'gt_joints': [
[-1.0, -1.0],
[-1.0, -1.0],
[-1.0, -1.0],
diff --git a/ppdet/data/source/keypoint_coco.py b/ppdet/data/source/keypoint_coco.py
index 45eb9a91d73..11ecea53840 100644
--- a/ppdet/data/source/keypoint_coco.py
+++ b/ppdet/data/source/keypoint_coco.py
@@ -80,7 +80,8 @@ def __getitem__(self, idx):
records = copy.deepcopy(self._get_imganno(idx))
records['image'] = cv2.imread(records['image_file'])
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
- records['mask'] = (records['mask'] + 0).astype('uint8')
+ if 'mask' in records:
+ records['mask'] = (records['mask'] + 0).astype('uint8')
records = self.transform(records)
return records
@@ -135,24 +136,37 @@ def __init__(self,
num_joints,
transform=[],
shard=[0, 1],
- test_mode=False):
+ test_mode=False,
+ return_mask=True,
+ return_bbox=True,
+ return_area=True,
+ return_class=True):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform, shard, test_mode)
self.ann_file = os.path.join(dataset_dir, anno_path)
self.shard = shard
self.test_mode = test_mode
+ self.return_mask = return_mask
+ self.return_bbox = return_bbox
+ self.return_area = return_area
+ self.return_class = return_class
def parse_dataset(self):
self.coco = COCO(self.ann_file)
self.img_ids = self.coco.getImgIds()
if not self.test_mode:
- self.img_ids = [
- img_id for img_id in self.img_ids
- if len(self.coco.getAnnIds(
- imgIds=img_id, iscrowd=None)) > 0
- ]
+ self.img_ids_tmp = []
+ for img_id in self.img_ids:
+ ann_ids = self.coco.getAnnIds(imgIds=img_id)
+ anno = self.coco.loadAnns(ann_ids)
+ anno = [obj for obj in anno if obj['iscrowd'] == 0]
+ if len(anno) == 0:
+ continue
+ self.img_ids_tmp.append(img_id)
+ self.img_ids = self.img_ids_tmp
+
blocknum = int(len(self.img_ids) / self.shard[1])
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
self.shard[0] + 1))]
@@ -199,21 +213,31 @@ def _get_imganno(self, idx):
ann_ids = coco.getAnnIds(imgIds=img_id)
anno = coco.loadAnns(ann_ids)
- mask = self._get_mask(anno, idx)
anno = [
obj for obj in anno
- if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
+ if obj['iscrowd'] == 0 and obj['num_keypoints'] > 0
]
+ db_rec = {}
joints, orgsize = self._get_joints(anno, idx)
+ db_rec['gt_joints'] = joints
+ db_rec['im_shape'] = orgsize
+
+ if self.return_bbox:
+ db_rec['gt_bbox'] = self._get_bboxs(anno, idx)
+
+ if self.return_class:
+ db_rec['gt_class'] = self._get_labels(anno, idx)
+
+ if self.return_area:
+ db_rec['gt_areas'] = self._get_areas(anno, idx)
+
+ if self.return_mask:
+ db_rec['mask'] = self._get_mask(anno, idx)
- db_rec = {}
db_rec['im_id'] = img_id
db_rec['image_file'] = os.path.join(self.img_prefix,
self.id2name[img_id])
- db_rec['mask'] = mask
- db_rec['joints'] = joints
- db_rec['im_shape'] = orgsize
return db_rec
@@ -229,12 +253,41 @@ def _get_joints(self, anno, idx):
np.array(obj['keypoints']).reshape([-1, 3])
img_info = self.coco.loadImgs(self.img_ids[idx])[0]
- joints[..., 0] /= img_info['width']
- joints[..., 1] /= img_info['height']
- orgsize = np.array([img_info['height'], img_info['width']])
+ orgsize = np.array([img_info['height'], img_info['width'], 1])
return joints, orgsize
+ def _get_bboxs(self, anno, idx):
+ num_people = len(anno)
+ gt_bboxes = np.zeros((num_people, 4), dtype=np.float32)
+
+ for idx, obj in enumerate(anno):
+ if 'bbox' in obj:
+ gt_bboxes[idx, :] = obj['bbox']
+
+ gt_bboxes[:, 2] += gt_bboxes[:, 0]
+ gt_bboxes[:, 3] += gt_bboxes[:, 1]
+ return gt_bboxes
+
+ def _get_labels(self, anno, idx):
+ num_people = len(anno)
+ gt_labels = np.zeros((num_people, 1), dtype=np.float32)
+
+ for idx, obj in enumerate(anno):
+ if 'category_id' in obj:
+ catid = obj['category_id']
+ gt_labels[idx, 0] = self.catid2clsid[catid]
+ return gt_labels
+
+ def _get_areas(self, anno, idx):
+ num_people = len(anno)
+ gt_areas = np.zeros((num_people, ), dtype=np.float32)
+
+ for idx, obj in enumerate(anno):
+ if 'area' in obj:
+ gt_areas[idx, ] = obj['area']
+ return gt_areas
+
def _get_mask(self, anno, idx):
"""Get ignore masks to mask out losses."""
coco = self.coco
@@ -506,7 +559,7 @@ def _load_coco_keypoint_annotations(self):
'image_file': os.path.join(self.img_prefix, file_name),
'center': center,
'scale': scale,
- 'joints': joints,
+ 'gt_joints': joints,
'joints_vis': joints_vis,
'im_id': im_id,
})
@@ -570,7 +623,7 @@ def _load_coco_person_detection_results(self):
'center': center,
'scale': scale,
'score': score,
- 'joints': joints,
+ 'gt_joints': joints,
'joints_vis': joints_vis,
})
@@ -647,8 +700,8 @@ def parse_dataset(self):
(self.ann_info['num_joints'], 3), dtype=np.float32)
joints_vis = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float32)
- if 'joints' in a:
- joints_ = np.array(a['joints'])
+ if 'gt_joints' in a:
+ joints_ = np.array(a['gt_joints'])
joints_[:, 0:2] = joints_[:, 0:2] - 1
joints_vis_ = np.array(a['joints_vis'])
assert len(joints_) == self.ann_info[
@@ -664,7 +717,7 @@ def parse_dataset(self):
'im_id': im_id,
'center': c,
'scale': s,
- 'joints': joints,
+ 'gt_joints': joints,
'joints_vis': joints_vis
})
print("number length: {}".format(len(gt_db)))
diff --git a/ppdet/data/transform/batch_operators.py b/ppdet/data/transform/batch_operators.py
index 92c211ee415..2637db43d21 100644
--- a/ppdet/data/transform/batch_operators.py
+++ b/ppdet/data/transform/batch_operators.py
@@ -1102,13 +1102,115 @@ class PadGT(BaseOperator):
1 means bbox, 0 means no bbox.
"""
- def __init__(self, return_gt_mask=True):
+ def __init__(self, return_gt_mask=True, pad_img=False, minimum_gtnum=0):
super(PadGT, self).__init__()
self.return_gt_mask = return_gt_mask
+ self.pad_img = pad_img
+ self.minimum_gtnum = minimum_gtnum
+
+ def _impad(self, img: np.ndarray,
+ *,
+ shape = None,
+ padding = None,
+ pad_val = 0,
+ padding_mode = 'constant') -> np.ndarray:
+ """Pad the given image to a certain shape or pad on all sides with
+ specified padding mode and padding value.
+
+ Args:
+ img (ndarray): Image to be padded.
+ shape (tuple[int]): Expected padding shape (h, w). Default: None.
+ padding (int or tuple[int]): Padding on each border. If a single int is
+ provided this is used to pad all borders. If tuple of length 2 is
+ provided this is the padding on left/right and top/bottom
+ respectively. If a tuple of length 4 is provided this is the
+ padding for the left, top, right and bottom borders respectively.
+ Default: None. Note that `shape` and `padding` can not be both
+ set.
+ pad_val (Number | Sequence[Number]): Values to be filled in padding
+ areas when padding_mode is 'constant'. Default: 0.
+ padding_mode (str): Type of padding. Should be: constant, edge,
+ reflect or symmetric. Default: constant.
+ - constant: pads with a constant value, this value is specified
+ with pad_val.
+ - edge: pads with the last value at the edge of the image.
+ - reflect: pads with reflection of image without repeating the last
+ value on the edge. For example, padding [1, 2, 3, 4] with 2
+ elements on both sides in reflect mode will result in
+ [3, 2, 1, 2, 3, 4, 3, 2].
+ - symmetric: pads with reflection of image repeating the last value
+ on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
+ both sides in symmetric mode will result in
+ [2, 1, 1, 2, 3, 4, 4, 3]
+
+ Returns:
+ ndarray: The padded image.
+ """
+
+ assert (shape is not None) ^ (padding is not None)
+ if shape is not None:
+ width = max(shape[1] - img.shape[1], 0)
+ height = max(shape[0] - img.shape[0], 0)
+ padding = (0, 0, int(width), int(height))
+
+ # check pad_val
+ import numbers
+ if isinstance(pad_val, tuple):
+ assert len(pad_val) == img.shape[-1]
+ elif not isinstance(pad_val, numbers.Number):
+ raise TypeError('pad_val must be a int or a tuple. '
+ f'But received {type(pad_val)}')
+
+ # check padding
+ if isinstance(padding, tuple) and len(padding) in [2, 4]:
+ if len(padding) == 2:
+ padding = (padding[0], padding[1], padding[0], padding[1])
+ elif isinstance(padding, numbers.Number):
+ padding = (padding, padding, padding, padding)
+ else:
+ raise ValueError('Padding must be a int or a 2, or 4 element tuple.'
+ f'But received {padding}')
+
+ # check padding mode
+ assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
+
+ border_type = {
+ 'constant': cv2.BORDER_CONSTANT,
+ 'edge': cv2.BORDER_REPLICATE,
+ 'reflect': cv2.BORDER_REFLECT_101,
+ 'symmetric': cv2.BORDER_REFLECT
+ }
+ img = cv2.copyMakeBorder(
+ img,
+ padding[1],
+ padding[3],
+ padding[0],
+ padding[2],
+ border_type[padding_mode],
+ value=pad_val)
+
+ return img
+
+ def checkmaxshape(self, samples):
+ maxh, maxw = 0, 0
+ for sample in samples:
+ h,w = sample['im_shape']
+ if h>maxh:
+ maxh = h
+ if w>maxw:
+ maxw = w
+ return (maxh, maxw)
def __call__(self, samples, context=None):
num_max_boxes = max([len(s['gt_bbox']) for s in samples])
+ num_max_boxes = max(self.minimum_gtnum, num_max_boxes)
+ if self.pad_img:
+ maxshape = self.checkmaxshape(samples)
for sample in samples:
+ if self.pad_img:
+ img = sample['image']
+ padimg = self._impad(img, shape=maxshape)
+ sample['image'] = padimg
if self.return_gt_mask:
sample['pad_gt_mask'] = np.zeros(
(num_max_boxes, 1), dtype=np.float32)
@@ -1142,6 +1244,17 @@ def __call__(self, samples, context=None):
if num_gt > 0:
pad_diff[:num_gt] = sample['difficult']
sample['difficult'] = pad_diff
+ if 'gt_joints' in sample:
+ num_joints = sample['gt_joints'].shape[1]
+ pad_gt_joints = np.zeros((num_max_boxes, num_joints, 3), dtype=np.float32)
+ if num_gt > 0:
+ pad_gt_joints[:num_gt] = sample['gt_joints']
+ sample['gt_joints'] = pad_gt_joints
+ if 'gt_areas' in sample:
+ pad_gt_areas = np.zeros((num_max_boxes, 1), dtype=np.float32)
+ if num_gt > 0:
+ pad_gt_areas[:num_gt, 0] = sample['gt_areas']
+ sample['gt_areas'] = pad_gt_areas
return samples
diff --git a/ppdet/data/transform/keypoint_operators.py b/ppdet/data/transform/keypoint_operators.py
index 9c7db162fc5..24cf63b8860 100644
--- a/ppdet/data/transform/keypoint_operators.py
+++ b/ppdet/data/transform/keypoint_operators.py
@@ -41,7 +41,7 @@
'TopDownAffine', 'ToHeatmapsTopDown', 'ToHeatmapsTopDown_DARK',
'ToHeatmapsTopDown_UDP', 'TopDownEvalAffine',
'AugmentationbyInformantionDropping', 'SinglePoseAffine', 'NoiseJitter',
- 'FlipPose'
+ 'FlipPose', 'PETR_Resize'
]
@@ -65,38 +65,77 @@ class KeyPointFlip(object):
"""
- def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
+ def __init__(self, flip_permutation, hmsize=None, flip_prob=0.5):
super(KeyPointFlip, self).__init__()
assert isinstance(flip_permutation, Sequence)
self.flip_permutation = flip_permutation
self.flip_prob = flip_prob
self.hmsize = hmsize
- def __call__(self, records):
- image = records['image']
- kpts_lst = records['joints']
- mask_lst = records['mask']
- flip = np.random.random() < self.flip_prob
- if flip:
- image = image[:, ::-1]
- for idx, hmsize in enumerate(self.hmsize):
- if len(mask_lst) > idx:
- mask_lst[idx] = mask_lst[idx][:, ::-1]
+ def _flipjoints(self, records, sizelst):
+ '''
+ records['gt_joints'] is Sequence in higherhrnet
+ '''
+ if not ('gt_joints' in records and records['gt_joints'].size > 0):
+ return records
+
+ kpts_lst = records['gt_joints']
+ if isinstance(kpts_lst, Sequence):
+ for idx, hmsize in enumerate(sizelst):
if kpts_lst[idx].ndim == 3:
kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
else:
kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
- kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
- kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
- kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
- kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
- kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
- records['image'] = image
- records['joints'] = kpts_lst
+ else:
+ hmsize = sizelst[0]
+ if kpts_lst.ndim == 3:
+ kpts_lst = kpts_lst[:, self.flip_permutation]
+ else:
+ kpts_lst = kpts_lst[self.flip_permutation]
+ kpts_lst[..., 0] = hmsize - kpts_lst[..., 0]
+
+ records['gt_joints'] = kpts_lst
+ return records
+
+ def _flipmask(self, records, sizelst):
+ if not 'mask' in records:
+ return records
+
+ mask_lst = records['mask']
+ for idx, hmsize in enumerate(sizelst):
+ if len(mask_lst) > idx:
+ mask_lst[idx] = mask_lst[idx][:, ::-1]
records['mask'] = mask_lst
return records
+ def _flipbbox(self, records, sizelst):
+ if not 'gt_bbox' in records:
+ return records
+
+ bboxes = records['gt_bbox']
+ hmsize = sizelst[0]
+ bboxes[:, 0::2] = hmsize - bboxes[:, 0::2][:, ::-1]
+ bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, hmsize)
+ records['gt_bbox'] = bboxes
+ return records
+
+ def __call__(self, records):
+ flip = np.random.random() < self.flip_prob
+ if flip:
+ image = records['image']
+ image = image[:, ::-1]
+ records['image'] = image
+ if self.hmsize is None:
+ sizelst = [image.shape[1]]
+ else:
+ sizelst = self.hmsize
+ self._flipjoints(records, sizelst)
+ self._flipmask(records, sizelst)
+ self._flipbbox(records, sizelst)
+
+ return records
+
@register_keypointop
class RandomAffine(object):
@@ -121,9 +160,10 @@ def __init__(self,
max_degree=30,
scale=[0.75, 1.5],
max_shift=0.2,
- hmsize=[128, 256],
+ hmsize=None,
trainsize=512,
- scale_type='short'):
+ scale_type='short',
+ boldervalue=[114, 114, 114]):
super(RandomAffine, self).__init__()
self.max_degree = max_degree
self.min_scale = scale[0]
@@ -132,8 +172,9 @@ def __init__(self,
self.hmsize = hmsize
self.trainsize = trainsize
self.scale_type = scale_type
+ self.boldervalue = boldervalue
- def _get_affine_matrix(self, center, scale, res, rot=0):
+ def _get_affine_matrix_old(self, center, scale, res, rot=0):
"""Generate transformation matrix."""
h = scale
t = np.zeros((3, 3), dtype=np.float32)
@@ -159,21 +200,94 @@ def _get_affine_matrix(self, center, scale, res, rot=0):
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
return t
+ def _get_affine_matrix(self, center, scale, res, rot=0):
+ """Generate transformation matrix."""
+ w, h = scale
+ t = np.zeros((3, 3), dtype=np.float32)
+ t[0, 0] = float(res[0]) / w
+ t[1, 1] = float(res[1]) / h
+ t[0, 2] = res[0] * (-float(center[0]) / w + .5)
+ t[1, 2] = res[1] * (-float(center[1]) / h + .5)
+ t[2, 2] = 1
+ if rot != 0:
+ rot = -rot # To match direction of rotation from cropping
+ rot_mat = np.zeros((3, 3), dtype=np.float32)
+ rot_rad = rot * np.pi / 180
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+ rot_mat[0, :2] = [cs, -sn]
+ rot_mat[1, :2] = [sn, cs]
+ rot_mat[2, 2] = 1
+ # Need to rotate around center
+ t_mat = np.eye(3)
+ t_mat[0, 2] = -res[0] / 2
+ t_mat[1, 2] = -res[1] / 2
+ t_inv = t_mat.copy()
+ t_inv[:2, 2] *= -1
+ t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
+ return t
+
+ def _affine_joints_mask(self,
+ degree,
+ center,
+ roi_size,
+ dsize,
+ keypoints=None,
+ heatmap_mask=None,
+ gt_bbox=None):
+ kpts = None
+ mask = None
+ bbox = None
+ mask_affine_mat = self._get_affine_matrix(center, roi_size, dsize,
+ degree)[:2]
+ if heatmap_mask is not None:
+ mask = cv2.warpAffine(heatmap_mask, mask_affine_mat, dsize)
+ mask = ((mask / 255) > 0.5).astype(np.float32)
+ if keypoints is not None:
+ kpts = copy.deepcopy(keypoints)
+ kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
+ mask_affine_mat)
+ kpts[(kpts[..., 0]) > dsize[0], :] = 0
+ kpts[(kpts[..., 1]) > dsize[1], :] = 0
+ kpts[(kpts[..., 0]) < 0, :] = 0
+ kpts[(kpts[..., 1]) < 0, :] = 0
+ if gt_bbox is not None:
+ temp_bbox = gt_bbox[:, [0, 3, 2, 1]]
+ cat_bbox = np.concatenate((gt_bbox, temp_bbox), axis=-1)
+ gt_bbox_warped = warp_affine_joints(cat_bbox, mask_affine_mat)
+ bbox = np.zeros_like(gt_bbox)
+ bbox[:, 0] = gt_bbox_warped[:, 0::2].min(1).clip(0, dsize[0])
+ bbox[:, 2] = gt_bbox_warped[:, 0::2].max(1).clip(0, dsize[0])
+ bbox[:, 1] = gt_bbox_warped[:, 1::2].min(1).clip(0, dsize[1])
+ bbox[:, 3] = gt_bbox_warped[:, 1::2].max(1).clip(0, dsize[1])
+ return kpts, mask, bbox
+
def __call__(self, records):
image = records['image']
- keypoints = records['joints']
- heatmap_mask = records['mask']
+ shape = np.array(image.shape[:2][::-1])
+ keypoints = None
+ heatmap_mask = None
+ gt_bbox = None
+ if 'gt_joints' in records:
+ keypoints = records['gt_joints']
+
+ if 'mask' in records:
+ heatmap_mask = records['mask']
+ heatmap_mask *= 255
+
+ if 'gt_bbox' in records:
+ gt_bbox = records['gt_bbox']
degree = (np.random.random() * 2 - 1) * self.max_degree
- shape = np.array(image.shape[:2][::-1])
center = center = np.array((np.array(shape) / 2))
aug_scale = np.random.random() * (self.max_scale - self.min_scale
) + self.min_scale
if self.scale_type == 'long':
- scale = max(shape[0], shape[1]) / 1.0
+ scale = np.array([max(shape[0], shape[1]) / 1.0] * 2)
elif self.scale_type == 'short':
- scale = min(shape[0], shape[1]) / 1.0
+ scale = np.array([min(shape[0], shape[1]) / 1.0] * 2)
+ elif self.scale_type == 'wh':
+ scale = shape
else:
raise ValueError('Unknown scale type: {}'.format(self.scale_type))
roi_size = aug_scale * scale
@@ -181,44 +295,55 @@ def __call__(self, records):
dy = int(0)
if self.max_shift > 0:
- dx = np.random.randint(-self.max_shift * roi_size,
- self.max_shift * roi_size)
- dy = np.random.randint(-self.max_shift * roi_size,
- self.max_shift * roi_size)
+ dx = np.random.randint(-self.max_shift * roi_size[0],
+ self.max_shift * roi_size[0])
+ dy = np.random.randint(-self.max_shift * roi_size[0],
+ self.max_shift * roi_size[1])
center += np.array([dx, dy])
input_size = 2 * center
+ if self.trainsize != -1:
+ dsize = self.trainsize
+ imgshape = (dsize, dsize)
+ else:
+ dsize = scale
+ imgshape = (shape.tolist())
- keypoints[..., :2] *= shape
- heatmap_mask *= 255
- kpts_lst = []
- mask_lst = []
-
- image_affine_mat = self._get_affine_matrix(
- center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
+ image_affine_mat = self._get_affine_matrix(center, roi_size, dsize,
+ degree)[:2]
image = cv2.warpAffine(
image,
- image_affine_mat, (self.trainsize, self.trainsize),
- flags=cv2.INTER_LINEAR)
+ image_affine_mat,
+ imgshape,
+ flags=cv2.INTER_LINEAR,
+ borderValue=self.boldervalue)
+
+ if self.hmsize is None:
+ kpts, mask, gt_bbox = self._affine_joints_mask(
+ degree, center, roi_size, dsize, keypoints, heatmap_mask,
+ gt_bbox)
+ records['image'] = image
+ if kpts is not None: records['gt_joints'] = kpts
+ if mask is not None: records['mask'] = mask
+ if gt_bbox is not None: records['gt_bbox'] = gt_bbox
+ return records
+
+ kpts_lst = []
+ mask_lst = []
for hmsize in self.hmsize:
- kpts = copy.deepcopy(keypoints)
- mask_affine_mat = self._get_affine_matrix(
- center, roi_size, (hmsize, hmsize), degree)[:2]
- if heatmap_mask is not None:
- mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
- (hmsize, hmsize))
- mask = ((mask / 255) > 0.5).astype(np.float32)
- kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
- mask_affine_mat)
- kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
- kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
- kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
- kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
+ kpts, mask, gt_bbox = self._affine_joints_mask(
+ degree, center, roi_size, [hmsize, hmsize], keypoints,
+ heatmap_mask, gt_bbox)
kpts_lst.append(kpts)
mask_lst.append(mask)
records['image'] = image
- records['joints'] = kpts_lst
- records['mask'] = mask_lst
+
+ if 'gt_joints' in records:
+ records['gt_joints'] = kpts_lst
+ if 'mask' in records:
+ records['mask'] = mask_lst
+ if 'gt_bbox' in records:
+ records['gt_bbox'] = gt_bbox
return records
@@ -251,8 +376,8 @@ def __call__(self, records):
if mask is not None:
mask = cv2.warpAffine(mask, trans, size_resized)
records['mask'] = mask
- if 'joints' in records:
- del records['joints']
+ if 'gt_joints' in records:
+ del records['gt_joints']
records['image'] = image_resized
return records
@@ -303,7 +428,7 @@ def __init__(self, num_joints, max_people=30):
self.num_joints = num_joints
def __call__(self, records):
- kpts_lst = records['joints']
+ kpts_lst = records['gt_joints']
kpts = kpts_lst[0]
tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64)
inds = np.where(kpts[..., 2] > 0)
@@ -315,7 +440,7 @@ def __call__(self, records):
tagmap[p, j, 2] = visible[..., 0] # x
tagmap[p, j, 3] = 1
records['tagmap'] = tagmap
- del records['joints']
+ del records['gt_joints']
return records
@@ -349,7 +474,7 @@ def __init__(self, num_joints, hmsize, sigma=None):
self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
def __call__(self, records):
- kpts_lst = records['joints']
+ kpts_lst = records['gt_joints']
mask_lst = records['mask']
for idx, hmsize in enumerate(self.hmsize):
mask = mask_lst[idx]
@@ -470,7 +595,7 @@ def flip_joints(self, joints, joints_vis, width, matched_parts):
def __call__(self, records):
image = records['image']
- joints = records['joints']
+ joints = records['gt_joints']
joints_vis = records['joints_vis']
c = records['center']
s = records['scale']
@@ -493,7 +618,7 @@ def __call__(self, records):
joints, joints_vis, image.shape[1], self.flip_pairs)
c[0] = image.shape[1] - c[0] - 1
records['image'] = image
- records['joints'] = joints
+ records['gt_joints'] = joints
records['joints_vis'] = joints_vis
records['center'] = c
records['scale'] = s
@@ -553,7 +678,7 @@ def _cutout(self, img, joints, joints_vis):
def __call__(self, records):
img = records['image']
- joints = records['joints']
+ joints = records['gt_joints']
joints_vis = records['joints_vis']
if np.random.rand() < self.prob_cutout:
img = self._cutout(img, joints, joints_vis)
@@ -581,7 +706,7 @@ def __init__(self, trainsize, use_udp=False):
def __call__(self, records):
image = records['image']
- joints = records['joints']
+ joints = records['gt_joints']
joints_vis = records['joints_vis']
rot = records['rotate'] if "rotate" in records else 0
if self.use_udp:
@@ -606,7 +731,7 @@ def __call__(self, records):
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
records['image'] = image
- records['joints'] = joints
+ records['gt_joints'] = joints
return records
@@ -842,7 +967,7 @@ def __call__(self, records):
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
Copyright (c) Microsoft, under the MIT License.
"""
- joints = records['joints']
+ joints = records['gt_joints']
joints_vis = records['joints_vis']
num_joints = joints.shape[0]
image_size = np.array(
@@ -885,7 +1010,7 @@ def __call__(self, records):
0]:g_y[1], g_x[0]:g_x[1]]
records['target'] = target
records['target_weight'] = target_weight
- del records['joints'], records['joints_vis']
+ del records['gt_joints'], records['joints_vis']
return records
@@ -910,7 +1035,7 @@ def __init__(self, hmsize, sigma):
self.sigma = sigma
def __call__(self, records):
- joints = records['joints']
+ joints = records['gt_joints']
joints_vis = records['joints_vis']
num_joints = joints.shape[0]
image_size = np.array(
@@ -943,7 +1068,7 @@ def __call__(self, records):
(x - mu_x)**2 + (y - mu_y)**2) / (2 * self.sigma**2))
records['target'] = target
records['target_weight'] = target_weight
- del records['joints'], records['joints_vis']
+ del records['gt_joints'], records['joints_vis']
return records
@@ -972,7 +1097,7 @@ def __init__(self, hmsize, sigma):
self.sigma = sigma
def __call__(self, records):
- joints = records['joints']
+ joints = records['gt_joints']
joints_vis = records['joints_vis']
num_joints = joints.shape[0]
image_size = np.array(
@@ -1017,6 +1142,472 @@ def __call__(self, records):
0]:g_y[1], g_x[0]:g_x[1]]
records['target'] = target
records['target_weight'] = target_weight
- del records['joints'], records['joints_vis']
+ del records['gt_joints'], records['joints_vis']
return records
+
+
+from typing import Optional, Tuple, Union, List
+import numbers
+
+
+def _scale_size(
+ size: Tuple[int, int],
+ scale: Union[float, int, tuple], ) -> Tuple[int, int]:
+ """Rescale a size by a ratio.
+
+ Args:
+ size (tuple[int]): (w, h).
+ scale (float | tuple(float)): Scaling factor.
+
+ Returns:
+ tuple[int]: scaled size.
+ """
+ if isinstance(scale, (float, int)):
+ scale = (scale, scale)
+ w, h = size
+ return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)
+
+
+def rescale_size(old_size: tuple,
+ scale: Union[float, int, tuple],
+ return_scale: bool=False) -> tuple:
+ """Calculate the new size to be rescaled to.
+
+ Args:
+ old_size (tuple[int]): The old size (w, h) of image.
+ scale (float | tuple[int]): The scaling factor or maximum size.
+ If it is a float number, then the image will be rescaled by this
+ factor, else if it is a tuple of 2 integers, then the image will
+ be rescaled as large as possible within the scale.
+ return_scale (bool): Whether to return the scaling factor besides the
+ rescaled image size.
+
+ Returns:
+ tuple[int]: The new rescaled image size.
+ """
+ w, h = old_size
+ if isinstance(scale, (float, int)):
+ if scale <= 0:
+ raise ValueError(f'Invalid scale {scale}, must be positive.')
+ scale_factor = scale
+ elif isinstance(scale, list):
+ max_long_edge = max(scale)
+ max_short_edge = min(scale)
+ scale_factor = min(max_long_edge / max(h, w),
+ max_short_edge / min(h, w))
+ else:
+ raise TypeError(
+ f'Scale must be a number or tuple of int, but got {type(scale)}')
+
+ new_size = _scale_size((w, h), scale_factor)
+
+ if return_scale:
+ return new_size, scale_factor
+ else:
+ return new_size
+
+
+def imrescale(img: np.ndarray,
+ scale: Union[float, Tuple[int, int]],
+ return_scale: bool=False,
+ interpolation: str='bilinear',
+ backend: Optional[str]=None) -> Union[np.ndarray, Tuple[
+ np.ndarray, float]]:
+ """Resize image while keeping the aspect ratio.
+
+ Args:
+ img (ndarray): The input image.
+ scale (float | tuple[int]): The scaling factor or maximum size.
+ If it is a float number, then the image will be rescaled by this
+ factor, else if it is a tuple of 2 integers, then the image will
+ be rescaled as large as possible within the scale.
+ return_scale (bool): Whether to return the scaling factor besides the
+ rescaled image.
+ interpolation (str): Same as :func:`resize`.
+ backend (str | None): Same as :func:`resize`.
+
+ Returns:
+ ndarray: The rescaled image.
+ """
+ h, w = img.shape[:2]
+ new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
+ rescaled_img = imresize(
+ img, new_size, interpolation=interpolation, backend=backend)
+ if return_scale:
+ return rescaled_img, scale_factor
+ else:
+ return rescaled_img
+
+
+def imresize(
+ img: np.ndarray,
+ size: Tuple[int, int],
+ return_scale: bool=False,
+ interpolation: str='bilinear',
+ out: Optional[np.ndarray]=None,
+ backend: Optional[str]=None,
+ interp=cv2.INTER_LINEAR, ) -> Union[Tuple[np.ndarray, float, float],
+ np.ndarray]:
+ """Resize image to a given size.
+
+ Args:
+ img (ndarray): The input image.
+ size (tuple[int]): Target size (w, h).
+ return_scale (bool): Whether to return `w_scale` and `h_scale`.
+ interpolation (str): Interpolation method, accepted values are
+ "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
+ backend, "nearest", "bilinear" for 'pillow' backend.
+ out (ndarray): The output destination.
+ backend (str | None): The image resize backend type. Options are `cv2`,
+ `pillow`, `None`. If backend is None, the global imread_backend
+ specified by ``mmcv.use_backend()`` will be used. Default: None.
+
+ Returns:
+ tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
+ `resized_img`.
+ """
+ h, w = img.shape[:2]
+ if backend is None:
+ backend = imread_backend
+ if backend not in ['cv2', 'pillow']:
+ raise ValueError(f'backend: {backend} is not supported for resize.'
+ f"Supported backends are 'cv2', 'pillow'")
+
+ if backend == 'pillow':
+ assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
+ pil_image = Image.fromarray(img)
+ pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
+ resized_img = np.array(pil_image)
+ else:
+ resized_img = cv2.resize(img, size, dst=out, interpolation=interp)
+ if not return_scale:
+ return resized_img
+ else:
+ w_scale = size[0] / w
+ h_scale = size[1] / h
+ return resized_img, w_scale, h_scale
+
+
+class PETR_Resize:
+ """Resize images & bbox & mask.
+
+ This transform resizes the input image to some scale. Bboxes and masks are
+ then resized with the same scale factor. If the input dict contains the key
+ "scale", then the scale in the input dict is used, otherwise the specified
+ scale in the init method is used. If the input dict contains the key
+ "scale_factor" (if MultiScaleFlipAug does not give img_scale but
+ scale_factor), the actual scale will be computed by image shape and
+ scale_factor.
+
+ `img_scale` can either be a tuple (single-scale) or a list of tuple
+ (multi-scale). There are 3 multiscale modes:
+
+ - ``ratio_range is not None``: randomly sample a ratio from the ratio \
+ range and multiply it with the image scale.
+ - ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly \
+ sample a scale from the multiscale range.
+ - ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly \
+ sample a scale from multiple scales.
+
+ Args:
+ img_scale (tuple or list[tuple]): Images scales for resizing.
+ multiscale_mode (str): Either "range" or "value".
+ ratio_range (tuple[float]): (min_ratio, max_ratio)
+ keep_ratio (bool): Whether to keep the aspect ratio when resizing the
+ image.
+ bbox_clip_border (bool, optional): Whether to clip the objects outside
+ the border of the image. In some dataset like MOT17, the gt bboxes
+ are allowed to cross the border of images. Therefore, we don't
+ need to clip the gt bboxes in these cases. Defaults to True.
+ backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
+ These two backends generates slightly different results. Defaults
+ to 'cv2'.
+ interpolation (str): Interpolation method, accepted values are
+ "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
+ backend, "nearest", "bilinear" for 'pillow' backend.
+ override (bool, optional): Whether to override `scale` and
+ `scale_factor` so as to call resize twice. Default False. If True,
+ after the first resizing, the existed `scale` and `scale_factor`
+ will be ignored so the second resizing can be allowed.
+ This option is a work-around for multiple times of resize in DETR.
+ Defaults to False.
+ """
+
+ def __init__(self,
+ img_scale=None,
+ multiscale_mode='range',
+ ratio_range=None,
+ keep_ratio=True,
+ bbox_clip_border=True,
+ backend='cv2',
+ interpolation='bilinear',
+ override=False,
+ keypoint_clip_border=True):
+ if img_scale is None:
+ self.img_scale = None
+ else:
+ if isinstance(img_scale, list):
+ self.img_scale = img_scale
+ else:
+ self.img_scale = [img_scale]
+ assert isinstance(self.img_scale, list)
+
+ if ratio_range is not None:
+ # mode 1: given a scale and a range of image ratio
+ assert len(self.img_scale) == 1
+ else:
+ # mode 2: given multiple scales or a range of scales
+ assert multiscale_mode in ['value', 'range']
+
+ self.backend = backend
+ self.multiscale_mode = multiscale_mode
+ self.ratio_range = ratio_range
+ self.keep_ratio = keep_ratio
+ # TODO: refactor the override option in Resize
+ self.interpolation = interpolation
+ self.override = override
+ self.bbox_clip_border = bbox_clip_border
+ self.keypoint_clip_border = keypoint_clip_border
+
+ @staticmethod
+ def random_select(img_scales):
+ """Randomly select an img_scale from given candidates.
+
+ Args:
+ img_scales (list[tuple]): Images scales for selection.
+
+ Returns:
+ (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \
+ where ``img_scale`` is the selected image scale and \
+ ``scale_idx`` is the selected index in the given candidates.
+ """
+
+ assert isinstance(img_scales, list)
+ scale_idx = np.random.randint(len(img_scales))
+ img_scale = img_scales[scale_idx]
+ return img_scale, scale_idx
+
+ @staticmethod
+ def random_sample(img_scales):
+ """Randomly sample an img_scale when ``multiscale_mode=='range'``.
+
+ Args:
+ img_scales (list[tuple]): Images scale range for sampling.
+ There must be two tuples in img_scales, which specify the lower
+ and upper bound of image scales.
+
+ Returns:
+ (tuple, None): Returns a tuple ``(img_scale, None)``, where \
+ ``img_scale`` is sampled scale and None is just a placeholder \
+ to be consistent with :func:`random_select`.
+ """
+
+ assert isinstance(img_scales, list) and len(img_scales) == 2
+ img_scale_long = [max(s) for s in img_scales]
+ img_scale_short = [min(s) for s in img_scales]
+ long_edge = np.random.randint(
+ min(img_scale_long), max(img_scale_long) + 1)
+ short_edge = np.random.randint(
+ min(img_scale_short), max(img_scale_short) + 1)
+ img_scale = (long_edge, short_edge)
+ return img_scale, None
+
+ @staticmethod
+ def random_sample_ratio(img_scale, ratio_range):
+ """Randomly sample an img_scale when ``ratio_range`` is specified.
+
+ A ratio will be randomly sampled from the range specified by
+ ``ratio_range``. Then it would be multiplied with ``img_scale`` to
+ generate sampled scale.
+
+ Args:
+ img_scale (list): Images scale base to multiply with ratio.
+ ratio_range (tuple[float]): The minimum and maximum ratio to scale
+ the ``img_scale``.
+
+ Returns:
+ (tuple, None): Returns a tuple ``(scale, None)``, where \
+ ``scale`` is sampled ratio multiplied with ``img_scale`` and \
+ None is just a placeholder to be consistent with \
+ :func:`random_select`.
+ """
+
+ assert isinstance(img_scale, list) and len(img_scale) == 2
+ min_ratio, max_ratio = ratio_range
+ assert min_ratio <= max_ratio
+ ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
+ scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
+ return scale, None
+
+ def _random_scale(self, results):
+ """Randomly sample an img_scale according to ``ratio_range`` and
+ ``multiscale_mode``.
+
+ If ``ratio_range`` is specified, a ratio will be sampled and be
+ multiplied with ``img_scale``.
+ If multiple scales are specified by ``img_scale``, a scale will be
+ sampled according to ``multiscale_mode``.
+ Otherwise, single scale will be used.
+
+ Args:
+ results (dict): Result dict from :obj:`dataset`.
+
+ Returns:
+ dict: Two new keys 'scale` and 'scale_idx` are added into \
+ ``results``, which would be used by subsequent pipelines.
+ """
+
+ if self.ratio_range is not None:
+ scale, scale_idx = self.random_sample_ratio(self.img_scale[0],
+ self.ratio_range)
+ elif len(self.img_scale) == 1:
+ scale, scale_idx = self.img_scale[0], 0
+ elif self.multiscale_mode == 'range':
+ scale, scale_idx = self.random_sample(self.img_scale)
+ elif self.multiscale_mode == 'value':
+ scale, scale_idx = self.random_select(self.img_scale)
+ else:
+ raise NotImplementedError
+ results['scale'] = scale
+ results['scale_idx'] = scale_idx
+
+ def _resize_img(self, results):
+ """Resize images with ``results['scale']``."""
+ for key in ['image'] if 'image' in results else []:
+ if self.keep_ratio:
+ img, scale_factor = imrescale(
+ results[key],
+ results['scale'],
+ return_scale=True,
+ interpolation=self.interpolation,
+ backend=self.backend)
+ # the w_scale and h_scale has minor difference
+ # a real fix should be done in the imrescale in the future
+ new_h, new_w = img.shape[:2]
+ h, w = results[key].shape[:2]
+ w_scale = new_w / w
+ h_scale = new_h / h
+ else:
+ img, w_scale, h_scale = imresize(
+ results[key],
+ results['scale'],
+ return_scale=True,
+ interpolation=self.interpolation,
+ backend=self.backend)
+
+ scale_factor = np.array(
+ [w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
+ results['im_shape'] = np.array(img.shape)
+ # in case that there is no padding
+ results['pad_shape'] = img.shape
+ results['scale_factor'] = scale_factor
+ results['keep_ratio'] = self.keep_ratio
+ # img_pad = self.impad(img, shape=results['scale'])
+ results[key] = img
+
+ def _resize_bboxes(self, results):
+ """Resize bounding boxes with ``results['scale_factor']``."""
+ for key in ['gt_bbox'] if 'gt_bbox' in results else []:
+ bboxes = results[key] * results['scale_factor']
+ if self.bbox_clip_border:
+ img_shape = results['im_shape']
+ bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
+ bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
+ results[key] = bboxes
+
+ def _resize_masks(self, results):
+ """Resize masks with ``results['scale']``"""
+ for key in ['mask'] if 'mask' in results else []:
+ if results[key] is None:
+ continue
+ if self.keep_ratio:
+ results[key] = results[key].rescale(results['scale'])
+ else:
+ results[key] = results[key].resize(results['im_shape'][:2])
+
+ def _resize_seg(self, results):
+ """Resize semantic segmentation map with ``results['scale']``."""
+ for key in ['seg'] if 'seg' in results else []:
+ if self.keep_ratio:
+ gt_seg = imrescale(
+ results[key],
+ results['scale'],
+ interpolation='nearest',
+ backend=self.backend)
+ else:
+ gt_seg = imresize(
+ results[key],
+ results['scale'],
+ interpolation='nearest',
+ backend=self.backend)
+ results[key] = gt_seg
+
+ def _resize_keypoints(self, results):
+ """Resize keypoints with ``results['scale_factor']``."""
+ for key in ['gt_joints'] if 'gt_joints' in results else []:
+ keypoints = results[key].copy()
+ keypoints[..., 0] = keypoints[..., 0] * results['scale_factor'][0]
+ keypoints[..., 1] = keypoints[..., 1] * results['scale_factor'][1]
+ if self.keypoint_clip_border:
+ img_shape = results['im_shape']
+ keypoints[..., 0] = np.clip(keypoints[..., 0], 0, img_shape[1])
+ keypoints[..., 1] = np.clip(keypoints[..., 1], 0, img_shape[0])
+ results[key] = keypoints
+
+ def _resize_areas(self, results):
+ """Resize mask areas with ``results['scale_factor']``."""
+ for key in ['gt_areas'] if 'gt_areas' in results else []:
+ areas = results[key].copy()
+ areas = areas * results['scale_factor'][0] * results[
+ 'scale_factor'][1]
+ results[key] = areas
+
+ def __call__(self, results):
+ """Call function to resize images, bounding boxes, masks, semantic
+ segmentation map.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Resized results, 'im_shape', 'pad_shape', 'scale_factor', \
+ 'keep_ratio' keys are added into result dict.
+ """
+
+ if 'scale' not in results:
+ if 'scale_factor' in results:
+ img_shape = results['image'].shape[:2]
+ scale_factor = results['scale_factor']
+ assert isinstance(scale_factor, float)
+ results['scale'] = tuple(
+ [int(x * scale_factor) for x in img_shape][::-1])
+ else:
+ self._random_scale(results)
+ else:
+ if not self.override:
+ assert 'scale_factor' not in results, (
+ 'scale and scale_factor cannot be both set.')
+ else:
+ results.pop('scale')
+ if 'scale_factor' in results:
+ results.pop('scale_factor')
+ self._random_scale(results)
+
+ self._resize_img(results)
+ self._resize_bboxes(results)
+ self._resize_masks(results)
+ self._resize_seg(results)
+ self._resize_keypoints(results)
+ self._resize_areas(results)
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(img_scale={self.img_scale}, '
+ repr_str += f'multiscale_mode={self.multiscale_mode}, '
+ repr_str += f'ratio_range={self.ratio_range}, '
+ repr_str += f'keep_ratio={self.keep_ratio}, '
+ repr_str += f'bbox_clip_border={self.bbox_clip_border})'
+ repr_str += f'keypoint_clip_border={self.keypoint_clip_border})'
+ return repr_str
diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py
index 2f57cdfe33f..61a4aacba02 100644
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -594,6 +594,108 @@ def apply(self, sample, context=None):
return sample
+@register_op
+class PhotoMetricDistortion(BaseOperator):
+ """Apply photometric distortion to image sequentially, every transformation
+ is applied with a probability of 0.5. The position of random contrast is in
+ second or second to last.
+
+ 1. random brightness
+ 2. random contrast (mode 0)
+ 3. convert color from BGR to HSV
+ 4. random saturation
+ 5. random hue
+ 6. convert color from HSV to BGR
+ 7. random contrast (mode 1)
+ 8. randomly swap channels
+
+ Args:
+ brightness_delta (int): delta of brightness.
+ contrast_range (tuple): range of contrast.
+ saturation_range (tuple): range of saturation.
+ hue_delta (int): delta of hue.
+ """
+
+ def __init__(self,
+ brightness_delta=32,
+ contrast_range=(0.5, 1.5),
+ saturation_range=(0.5, 1.5),
+ hue_delta=18):
+ super(PhotoMetricDistortion, self).__init__()
+ self.brightness_delta = brightness_delta
+ self.contrast_lower, self.contrast_upper = contrast_range
+ self.saturation_lower, self.saturation_upper = saturation_range
+ self.hue_delta = hue_delta
+
+ def apply(self, results, context=None):
+ """Call function to perform photometric distortion on images.
+
+ Args:
+ results (dict): Result dict from loading pipeline.
+
+ Returns:
+ dict: Result dict with images distorted.
+ """
+
+ img = results['image']
+ img = img.astype(np.float32)
+ # random brightness
+ if np.random.randint(2):
+ delta = np.random.uniform(-self.brightness_delta,
+ self.brightness_delta)
+ img += delta
+
+ # mode == 0 --> do random contrast first
+ # mode == 1 --> do random contrast last
+ mode = np.random.randint(2)
+ if mode == 1:
+ if np.random.randint(2):
+ alpha = np.random.uniform(self.contrast_lower,
+ self.contrast_upper)
+ img *= alpha
+
+ # convert color from BGR to HSV
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+
+ # random saturation
+ if np.random.randint(2):
+ img[..., 1] *= np.random.uniform(self.saturation_lower,
+ self.saturation_upper)
+
+ # random hue
+ if np.random.randint(2):
+ img[..., 0] += np.random.uniform(-self.hue_delta, self.hue_delta)
+ img[..., 0][img[..., 0] > 360] -= 360
+ img[..., 0][img[..., 0] < 0] += 360
+
+ # convert color from HSV to BGR
+ img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
+
+ # random contrast
+ if mode == 0:
+ if np.random.randint(2):
+ alpha = np.random.uniform(self.contrast_lower,
+ self.contrast_upper)
+ img *= alpha
+
+ # randomly swap channels
+ if np.random.randint(2):
+ img = img[..., np.random.permutation(3)]
+
+ results['image'] = img
+ return results
+
+ def __repr__(self):
+ repr_str = self.__class__.__name__
+ repr_str += f'(\nbrightness_delta={self.brightness_delta},\n'
+ repr_str += 'contrast_range='
+ repr_str += f'{(self.contrast_lower, self.contrast_upper)},\n'
+ repr_str += 'saturation_range='
+ repr_str += f'{(self.saturation_lower, self.saturation_upper)},\n'
+ repr_str += f'hue_delta={self.hue_delta})'
+ return repr_str
+
+
@register_op
class AutoAugment(BaseOperator):
def __init__(self, autoaug_type="v1"):
@@ -771,6 +873,19 @@ def apply_bbox(self, bbox, scale, size):
bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
return bbox
+ def apply_area(self, area, scale):
+ im_scale_x, im_scale_y = scale
+ return area * im_scale_x * im_scale_y
+
+ def apply_joints(self, joints, scale, size):
+ im_scale_x, im_scale_y = scale
+ resize_w, resize_h = size
+ joints[..., 0] *= im_scale_x
+ joints[..., 1] *= im_scale_y
+ joints[..., 0] = np.clip(joints[..., 0], 0, resize_w)
+ joints[..., 1] = np.clip(joints[..., 1], 0, resize_h)
+ return joints
+
def apply_segm(self, segms, im_size, scale):
def _resize_poly(poly, im_scale_x, im_scale_y):
resized_poly = np.array(poly).astype('float32')
@@ -833,8 +948,8 @@ def apply(self, sample, context=None):
im_scale = min(target_size_min / im_size_min,
target_size_max / im_size_max)
- resize_h = im_scale * float(im_shape[0])
- resize_w = im_scale * float(im_shape[1])
+ resize_h = int(im_scale * float(im_shape[0]) + 0.5)
+ resize_w = int(im_scale * float(im_shape[1]) + 0.5)
im_scale_x = im_scale
im_scale_y = im_scale
@@ -878,6 +993,11 @@ def apply(self, sample, context=None):
[im_scale_x, im_scale_y],
[resize_w, resize_h])
+ # apply areas
+ if 'gt_areas' in sample:
+ sample['gt_areas'] = self.apply_area(sample['gt_areas'],
+ [im_scale_x, im_scale_y])
+
# apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
@@ -911,6 +1031,11 @@ def apply(self, sample, context=None):
]
sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
+ if 'gt_joints' in sample:
+ sample['gt_joints'] = self.apply_joints(sample['gt_joints'],
+ [im_scale_x, im_scale_y],
+ [resize_w, resize_h])
+
return sample
@@ -1362,7 +1487,8 @@ def __init__(self,
num_attempts=50,
allow_no_crop=True,
cover_all_box=False,
- is_mask_crop=False):
+ is_mask_crop=False,
+ ioumode="iou"):
super(RandomCrop, self).__init__()
self.aspect_ratio = aspect_ratio
self.thresholds = thresholds
@@ -1371,6 +1497,7 @@ def __init__(self,
self.allow_no_crop = allow_no_crop
self.cover_all_box = cover_all_box
self.is_mask_crop = is_mask_crop
+ self.ioumode = ioumode
def crop_segms(self, segms, valid_ids, crop, height, width):
def _crop_poly(segm, crop):
@@ -1516,9 +1643,14 @@ def random_crop(self, sample, fake_bboxes=False):
crop_y = np.random.randint(0, h - crop_h)
crop_x = np.random.randint(0, w - crop_w)
crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
- iou = self._iou_matrix(
- gt_bbox, np.array(
- [crop_box], dtype=np.float32))
+ if self.ioumode == "iof":
+ iou = self._gtcropiou_matrix(
+ gt_bbox, np.array(
+ [crop_box], dtype=np.float32))
+ elif self.ioumode == "iou":
+ iou = self._iou_matrix(
+ gt_bbox, np.array(
+ [crop_box], dtype=np.float32))
if iou.max() < thresh:
continue
@@ -1582,6 +1714,10 @@ def random_crop(self, sample, fake_bboxes=False):
sample['difficult'] = np.take(
sample['difficult'], valid_ids, axis=0)
+ if 'gt_joints' in sample:
+ sample['gt_joints'] = self._crop_joints(sample['gt_joints'],
+ crop_box)
+
return sample
return sample
@@ -1596,6 +1732,16 @@ def _iou_matrix(self, a, b):
area_o = (area_a[:, np.newaxis] + area_b - area_i)
return area_i / (area_o + 1e-10)
+ def _gtcropiou_matrix(self, a, b):
+ tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+ br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+
+ area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
+ area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+ area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+ area_o = (area_a[:, np.newaxis] + area_b - area_i)
+ return area_i / (area_a + 1e-10)
+
def _crop_box_with_center_constraint(self, box, crop):
cropped_box = box.copy()
@@ -1620,6 +1766,16 @@ def _crop_segm(self, segm, crop):
x1, y1, x2, y2 = crop
return segm[:, y1:y2, x1:x2]
+ def _crop_joints(self, joints, crop):
+ x1, y1, x2, y2 = crop
+ joints[joints[..., 0] > x2, :] = 0
+ joints[joints[..., 1] > y2, :] = 0
+ joints[joints[..., 0] < x1, :] = 0
+ joints[joints[..., 1] < y1, :] = 0
+ joints[..., 0] -= x1
+ joints[..., 1] -= y1
+ return joints
+
@register_op
class RandomScaledCrop(BaseOperator):
@@ -1648,8 +1804,8 @@ def apply(self, sample, context=None):
random_dim = int(dim * random_scale)
dim_max = max(h, w)
scale = random_dim / dim_max
- resize_w = w * scale
- resize_h = h * scale
+ resize_w = int(w * scale + 0.5)
+ resize_h = int(h * scale + 0.5)
offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
@@ -2316,25 +2472,26 @@ class RandomResizeCrop(BaseOperator):
is_mask_crop(bool): whether crop the segmentation.
"""
- def __init__(
- self,
- resizes,
- cropsizes,
- prob=0.5,
- mode='short',
- keep_ratio=True,
- interp=cv2.INTER_LINEAR,
- num_attempts=3,
- cover_all_box=False,
- allow_no_crop=False,
- thresholds=[0.3, 0.5, 0.7],
- is_mask_crop=False, ):
+ def __init__(self,
+ resizes,
+ cropsizes,
+ prob=0.5,
+ mode='short',
+ keep_ratio=True,
+ interp=cv2.INTER_LINEAR,
+ num_attempts=3,
+ cover_all_box=False,
+ allow_no_crop=False,
+ thresholds=[0.3, 0.5, 0.7],
+ is_mask_crop=False,
+ ioumode="iou"):
super(RandomResizeCrop, self).__init__()
self.resizes = resizes
self.cropsizes = cropsizes
self.prob = prob
self.mode = mode
+ self.ioumode = ioumode
self.resizer = Resize(0, keep_ratio=keep_ratio, interp=interp)
self.croper = RandomCrop(
@@ -2389,9 +2546,14 @@ def _random_crop(croper, sample, size, context=None):
crop_x = random.randint(0, w - crop_w)
crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
- iou = self._iou_matrix(
- gt_bbox, np.array(
- [crop_box], dtype=np.float32))
+ if self.ioumode == "iof":
+ iou = self._gtcropiou_matrix(
+ gt_bbox, np.array(
+ [crop_box], dtype=np.float32))
+ elif self.ioumode == "iou":
+ iou = self._iou_matrix(
+ gt_bbox, np.array(
+ [crop_box], dtype=np.float32))
if iou.max() < thresh:
continue
@@ -2447,6 +2609,14 @@ def _random_crop(croper, sample, size, context=None):
if 'is_crowd' in sample:
sample['is_crowd'] = np.take(
sample['is_crowd'], valid_ids, axis=0)
+
+ if 'gt_areas' in sample:
+ sample['gt_areas'] = np.take(
+ sample['gt_areas'], valid_ids, axis=0)
+
+ if 'gt_joints' in sample:
+ gt_joints = self._crop_joints(sample['gt_joints'], crop_box)
+ sample['gt_joints'] = gt_joints[valid_ids]
return sample
return sample
@@ -2479,8 +2649,8 @@ def _resize(resizer, sample, size, mode='short', context=None):
im_scale = max(target_size_min / im_size_min,
target_size_max / im_size_max)
- resize_h = im_scale * float(im_shape[0])
- resize_w = im_scale * float(im_shape[1])
+ resize_h = int(im_scale * float(im_shape[0]) + 0.5)
+ resize_w = int(im_scale * float(im_shape[1]) + 0.5)
im_scale_x = im_scale
im_scale_y = im_scale
@@ -2540,6 +2710,11 @@ def _resize(resizer, sample, size, mode='short', context=None):
]
sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
+ if 'gt_joints' in sample:
+ sample['gt_joints'] = self.apply_joints(sample['gt_joints'],
+ [im_scale_x, im_scale_y],
+ [resize_w, resize_h])
+
return sample
@@ -2612,10 +2787,10 @@ def get_size_with_aspect_ratio(self, image_shape, size, max_size=None):
if w < h:
ow = size
- oh = int(size * h / w)
+ oh = int(round(size * h / w))
else:
oh = size
- ow = int(size * w / h)
+ ow = int(round(size * w / h))
return (ow, oh)
@@ -2672,6 +2847,16 @@ def resize(self,
for gt_segm in sample['gt_segm']
]
sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
+
+ if 'gt_joints' in sample:
+ sample['gt_joints'] = self.apply_joints(
+ sample['gt_joints'], [im_scale_x, im_scale_y], target_size)
+
+ # apply areas
+ if 'gt_areas' in sample:
+ sample['gt_areas'] = self.apply_area(sample['gt_areas'],
+ [im_scale_x, im_scale_y])
+
return sample
def apply_bbox(self, bbox, scale, size):
@@ -2683,6 +2868,23 @@ def apply_bbox(self, bbox, scale, size):
bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
return bbox.astype('float32')
+ def apply_joints(self, joints, scale, size):
+ im_scale_x, im_scale_y = scale
+ resize_w, resize_h = size
+ joints[..., 0] *= im_scale_x
+ joints[..., 1] *= im_scale_y
+ # joints[joints[..., 0] >= resize_w, :] = 0
+ # joints[joints[..., 1] >= resize_h, :] = 0
+ # joints[joints[..., 0] < 0, :] = 0
+ # joints[joints[..., 1] < 0, :] = 0
+ joints[..., 0] = np.clip(joints[..., 0], 0, resize_w)
+ joints[..., 1] = np.clip(joints[..., 1], 0, resize_h)
+ return joints
+
+ def apply_area(self, area, scale):
+ im_scale_x, im_scale_y = scale
+ return area * im_scale_x * im_scale_y
+
def apply_segm(self, segms, im_size, scale):
def _resize_poly(poly, im_scale_x, im_scale_y):
resized_poly = np.array(poly).astype('float32')
@@ -2730,6 +2932,44 @@ def apply(self, sample, context=None):
return self.resize(sample, target_size, self.max_size, interp)
+@register_op
+class RandomShortSideRangeResize(RandomShortSideResize):
+ def __init__(self, scales, interp=cv2.INTER_LINEAR, random_interp=False):
+ """
+ Resize the image randomly according to the short side. If max_size is not None,
+ the long side is scaled according to max_size. The whole process will be keep ratio.
+ Args:
+ short_side_sizes (list|tuple): Image target short side size.
+ interp (int): The interpolation method.
+ random_interp (bool): Whether random select interpolation method.
+ """
+ super(RandomShortSideRangeResize, self).__init__(scales, None, interp,
+ random_interp)
+
+ assert isinstance(scales,
+ Sequence), "short_side_sizes must be List or Tuple"
+
+ self.scales = scales
+
+ def random_sample(self, img_scales):
+ img_scale_long = [max(s) for s in img_scales]
+ img_scale_short = [min(s) for s in img_scales]
+ long_edge = np.random.randint(
+ min(img_scale_long), max(img_scale_long) + 1)
+ short_edge = np.random.randint(
+ min(img_scale_short), max(img_scale_short) + 1)
+ img_scale = (long_edge, short_edge)
+ return img_scale
+
+ def apply(self, sample, context=None):
+ long_edge, short_edge = self.random_sample(self.short_side_sizes)
+ # print("target size:{}".format((long_edge, short_edge)))
+ interp = random.choice(
+ self.interps) if self.random_interp else self.interp
+
+ return self.resize(sample, short_edge, long_edge, interp)
+
+
@register_op
class RandomSizeCrop(BaseOperator):
"""
@@ -2805,6 +3045,9 @@ def crop(self, sample, region):
sample['is_crowd'] = sample['is_crowd'][keep_index] if len(
keep_index) > 0 else np.zeros(
[0, 1], dtype=np.float32)
+ if 'gt_areas' in sample:
+ sample['gt_areas'] = np.take(
+ sample['gt_areas'], keep_index, axis=0)
image_shape = sample['image'].shape[:2]
sample['image'] = self.paddle_crop(sample['image'], *region)
@@ -2826,6 +3069,12 @@ def crop(self, sample, region):
if keep_index is not None and len(keep_index) > 0:
sample['gt_segm'] = sample['gt_segm'][keep_index]
+ if 'gt_joints' in sample:
+ gt_joints = self._crop_joints(sample['gt_joints'], region)
+ sample['gt_joints'] = gt_joints
+ if keep_index is not None:
+ sample['gt_joints'] = sample['gt_joints'][keep_index]
+
return sample
def apply_bbox(self, bbox, region):
@@ -2836,6 +3085,19 @@ def apply_bbox(self, bbox, region):
crop_bbox = crop_bbox.clip(min=0)
return crop_bbox.reshape([-1, 4]).astype('float32')
+ def _crop_joints(self, joints, region):
+ y1, x1, h, w = region
+ x2 = x1 + w
+ y2 = y1 + h
+ # x1, y1, x2, y2 = crop
+ joints[..., 0] -= x1
+ joints[..., 1] -= y1
+ joints[joints[..., 0] > w, :] = 0
+ joints[joints[..., 1] > h, :] = 0
+ joints[joints[..., 0] < 0, :] = 0
+ joints[joints[..., 1] < 0, :] = 0
+ return joints
+
def apply_segm(self, segms, region, image_shape):
def _crop_poly(segm, crop):
xmin, ymin, xmax, ymax = crop
diff --git a/ppdet/modeling/architectures/__init__.py b/ppdet/modeling/architectures/__init__.py
index 5efdec0775e..8899e5c0b4c 100644
--- a/ppdet/modeling/architectures/__init__.py
+++ b/ppdet/modeling/architectures/__init__.py
@@ -72,3 +72,4 @@
from .pose3d_metro import *
from .centertrack import *
from .queryinst import *
+from .keypoint_petr import *
diff --git a/ppdet/modeling/architectures/keypoint_petr.py b/ppdet/modeling/architectures/keypoint_petr.py
new file mode 100644
index 00000000000..b587c1f0668
--- /dev/null
+++ b/ppdet/modeling/architectures/keypoint_petr.py
@@ -0,0 +1,217 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+this code is base on https://github.com/hikvision-research/opera/blob/main/opera/models/detectors/petr.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from ppdet.core.workspace import register
+from .meta_arch import BaseArch
+from .. import layers as L
+
+__all__ = ['PETR']
+
+
+@register
+class PETR(BaseArch):
+ __category__ = 'architecture'
+ __inject__ = ['backbone', 'neck', 'bbox_head']
+
+ def __init__(self,
+ backbone='ResNet',
+ neck='ChannelMapper',
+ bbox_head='PETRHead'):
+ """
+ PETR, see https://openaccess.thecvf.com/content/CVPR2022/papers/Shi_End-to-End_Multi-Person_Pose_Estimation_With_Transformers_CVPR_2022_paper.pdf
+
+ Args:
+ backbone (nn.Layer): backbone instance
+ neck (nn.Layer): neck between backbone and head
+ bbox_head (nn.Layer): model output and loss
+ """
+ super(PETR, self).__init__()
+ self.backbone = backbone
+ if neck is not None:
+ self.with_neck = True
+ self.neck = neck
+ self.bbox_head = bbox_head
+ self.deploy = False
+
+ def extract_feat(self, img):
+ """Directly extract features from the backbone+neck."""
+ x = self.backbone(img)
+ if self.with_neck:
+ x = self.neck(x)
+ return x
+
+ def get_inputs(self):
+ img_metas = []
+ gt_bboxes = []
+ gt_labels = []
+ gt_keypoints = []
+ gt_areas = []
+ pad_gt_mask = self.inputs['pad_gt_mask'].astype("bool").squeeze(-1)
+ for idx, im_shape in enumerate(self.inputs['im_shape']):
+ img_meta = {
+ 'img_shape': im_shape.astype("int32").tolist() + [1, ],
+ 'batch_input_shape': self.inputs['image'].shape[-2:],
+ 'image_name': self.inputs['image_file'][idx]
+ }
+ img_metas.append(img_meta)
+ if (not pad_gt_mask[idx].any()):
+ gt_keypoints.append(self.inputs['gt_joints'][idx][:1])
+ gt_labels.append(self.inputs['gt_class'][idx][:1])
+ gt_bboxes.append(self.inputs['gt_bbox'][idx][:1])
+ gt_areas.append(self.inputs['gt_areas'][idx][:1])
+ continue
+
+ gt_keypoints.append(self.inputs['gt_joints'][idx][pad_gt_mask[idx]])
+ gt_labels.append(self.inputs['gt_class'][idx][pad_gt_mask[idx]])
+ gt_bboxes.append(self.inputs['gt_bbox'][idx][pad_gt_mask[idx]])
+ gt_areas.append(self.inputs['gt_areas'][idx][pad_gt_mask[idx]])
+
+ return img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas
+
+ def get_loss(self):
+ """
+ Args:
+ img (Tensor): Input images of shape (N, C, H, W).
+ Typically these should be mean centered and std scaled.
+ img_metas (list[dict]): A List of image info dict where each dict
+ has: 'img_shape', 'scale_factor', 'flip', and may also contain
+ 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+ For details on the values of these keys see
+ :class:`mmdet.datasets.pipelines.Collect`.
+ gt_bboxes (list[Tensor]): Each item are the truth boxes for each
+ image in [tl_x, tl_y, br_x, br_y] format.
+ gt_labels (list[Tensor]): Class indices corresponding to each box.
+ gt_keypoints (list[Tensor]): Each item are the truth keypoints for
+ each image in [p^{1}_x, p^{1}_y, p^{1}_v, ..., p^{K}_x,
+ p^{K}_y, p^{K}_v] format.
+ gt_areas (list[Tensor]): mask areas corresponding to each box.
+ gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
+ boxes can be ignored when computing the loss.
+
+ Returns:
+ dict[str, Tensor]: A dictionary of loss components.
+ """
+
+ img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas = self.get_inputs(
+ )
+ gt_bboxes_ignore = getattr(self.inputs, 'gt_bboxes_ignore', None)
+
+ x = self.extract_feat(self.inputs)
+ losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
+ gt_labels, gt_keypoints, gt_areas,
+ gt_bboxes_ignore)
+ loss = 0
+ for k, v in losses.items():
+ loss += v
+ losses['loss'] = loss
+
+ return losses
+
+ def get_pred_numpy(self):
+ """Used for computing network flops.
+ """
+
+ img = self.inputs['image']
+ batch_size, _, height, width = img.shape
+ dummy_img_metas = [
+ dict(
+ batch_input_shape=(height, width),
+ img_shape=(height, width, 3),
+ scale_factor=(1., 1., 1., 1.)) for _ in range(batch_size)
+ ]
+ x = self.extract_feat(img)
+ outs = self.bbox_head(x, img_metas=dummy_img_metas)
+ bbox_list = self.bbox_head.get_bboxes(
+ *outs, dummy_img_metas, rescale=True)
+ return bbox_list
+
+ def get_pred(self):
+ """
+ """
+ img = self.inputs['image']
+ batch_size, _, height, width = img.shape
+ img_metas = [
+ dict(
+ batch_input_shape=(height, width),
+ img_shape=(height, width, 3),
+ scale_factor=self.inputs['scale_factor'][i])
+ for i in range(batch_size)
+ ]
+ kptpred = self.simple_test(
+ self.inputs, img_metas=img_metas, rescale=True)
+ keypoints = kptpred[0][1][0]
+ bboxs = kptpred[0][0][0]
+ keypoints[..., 2] = bboxs[:, None, 4]
+ res_lst = [[keypoints, bboxs[:, 4]]]
+ outputs = {'keypoint': res_lst}
+ return outputs
+
+ def simple_test(self, inputs, img_metas, rescale=False):
+ """Test function without test time augmentation.
+
+ Args:
+ inputs (list[paddle.Tensor]): List of multiple images.
+ img_metas (list[dict]): List of image information.
+ rescale (bool, optional): Whether to rescale the results.
+ Defaults to False.
+
+ Returns:
+ list[list[np.ndarray]]: BBox and keypoint results of each image
+ and classes. The outer list corresponds to each image.
+ The inner list corresponds to each class.
+ """
+ batch_size = len(img_metas)
+ assert batch_size == 1, 'Currently only batch_size 1 for inference ' \
+ f'mode is supported. Found batch_size {batch_size}.'
+ feat = self.extract_feat(inputs)
+ results_list = self.bbox_head.simple_test(
+ feat, img_metas, rescale=rescale)
+
+ bbox_kpt_results = [
+ self.bbox_kpt2result(det_bboxes, det_labels, det_kpts,
+ self.bbox_head.num_classes)
+ for det_bboxes, det_labels, det_kpts in results_list
+ ]
+ return bbox_kpt_results
+
+ def bbox_kpt2result(self, bboxes, labels, kpts, num_classes):
+ """Convert detection results to a list of numpy arrays.
+
+ Args:
+ bboxes (paddle.Tensor | np.ndarray): shape (n, 5).
+ labels (paddle.Tensor | np.ndarray): shape (n, ).
+ kpts (paddle.Tensor | np.ndarray): shape (n, K, 3).
+ num_classes (int): class number, including background class.
+
+ Returns:
+ list(ndarray): bbox and keypoint results of each class.
+ """
+ if bboxes.shape[0] == 0:
+ return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)], \
+ [np.zeros((0, kpts.size(1), 3), dtype=np.float32)
+ for i in range(num_classes)]
+ else:
+ if isinstance(bboxes, paddle.Tensor):
+ bboxes = bboxes.numpy()
+ labels = labels.numpy()
+ kpts = kpts.numpy()
+ return [bboxes[labels == i, :] for i in range(num_classes)], \
+ [kpts[labels == i, :, :] for i in range(num_classes)]
diff --git a/ppdet/modeling/assigners/__init__.py b/ppdet/modeling/assigners/__init__.py
index da548298ae7..f462a9fd351 100644
--- a/ppdet/modeling/assigners/__init__.py
+++ b/ppdet/modeling/assigners/__init__.py
@@ -31,3 +31,5 @@
from .rotated_task_aligned_assigner import *
from .task_aligned_assigner_cr import *
from .uniform_assigner import *
+from .hungarian_assigner import *
+from .pose_utils import *
diff --git a/ppdet/modeling/assigners/hungarian_assigner.py b/ppdet/modeling/assigners/hungarian_assigner.py
new file mode 100644
index 00000000000..154c27ce978
--- /dev/null
+++ b/ppdet/modeling/assigners/hungarian_assigner.py
@@ -0,0 +1,316 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+try:
+ from scipy.optimize import linear_sum_assignment
+except ImportError:
+ linear_sum_assignment = None
+
+import paddle
+
+from ppdet.core.workspace import register
+
+__all__ = ['PoseHungarianAssigner', 'PseudoSampler']
+
+
+class AssignResult:
+ """Stores assignments between predicted and truth boxes.
+
+ Attributes:
+ num_gts (int): the number of truth boxes considered when computing this
+ assignment
+
+ gt_inds (LongTensor): for each predicted box indicates the 1-based
+ index of the assigned truth box. 0 means unassigned and -1 means
+ ignore.
+
+ max_overlaps (FloatTensor): the iou between the predicted box and its
+ assigned truth box.
+
+ labels (None | LongTensor): If specified, for each predicted box
+ indicates the category label of the assigned truth box.
+ """
+
+ def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
+ self.num_gts = num_gts
+ self.gt_inds = gt_inds
+ self.max_overlaps = max_overlaps
+ self.labels = labels
+ # Interface for possible user-defined properties
+ self._extra_properties = {}
+
+ @property
+ def num_preds(self):
+ """int: the number of predictions in this assignment"""
+ return len(self.gt_inds)
+
+ def set_extra_property(self, key, value):
+ """Set user-defined new property."""
+ assert key not in self.info
+ self._extra_properties[key] = value
+
+ def get_extra_property(self, key):
+ """Get user-defined property."""
+ return self._extra_properties.get(key, None)
+
+ @property
+ def info(self):
+ """dict: a dictionary of info about the object"""
+ basic_info = {
+ 'num_gts': self.num_gts,
+ 'num_preds': self.num_preds,
+ 'gt_inds': self.gt_inds,
+ 'max_overlaps': self.max_overlaps,
+ 'labels': self.labels,
+ }
+ basic_info.update(self._extra_properties)
+ return basic_info
+
+
+@register
+class PoseHungarianAssigner:
+ """Computes one-to-one matching between predictions and ground truth.
+
+ This class computes an assignment between the targets and the predictions
+ based on the costs. The costs are weighted sum of three components:
+ classification cost, regression L1 cost and regression oks cost. The
+ targets don't include the no_object, so generally there are more
+ predictions than targets. After the one-to-one matching, the un-matched
+ are treated as backgrounds. Thus each query prediction will be assigned
+ with `0` or a positive integer indicating the ground truth index:
+
+ - 0: negative sample, no assigned gt.
+ - positive integer: positive sample, index (1-based) of assigned gt.
+
+ Args:
+ cls_weight (int | float, optional): The scale factor for classification
+ cost. Default 1.0.
+ kpt_weight (int | float, optional): The scale factor for regression
+ L1 cost. Default 1.0.
+ oks_weight (int | float, optional): The scale factor for regression
+ oks cost. Default 1.0.
+ """
+ __inject__ = ['cls_cost', 'kpt_cost', 'oks_cost']
+
+ def __init__(self,
+ cls_cost='ClassificationCost',
+ kpt_cost='KptL1Cost',
+ oks_cost='OksCost'):
+ self.cls_cost = cls_cost
+ self.kpt_cost = kpt_cost
+ self.oks_cost = oks_cost
+
+ def assign(self,
+ cls_pred,
+ kpt_pred,
+ gt_labels,
+ gt_keypoints,
+ gt_areas,
+ img_meta,
+ eps=1e-7):
+ """Computes one-to-one matching based on the weighted costs.
+
+ This method assign each query prediction to a ground truth or
+ background. The `assigned_gt_inds` with -1 means don't care,
+ 0 means negative sample, and positive number is the index (1-based)
+ of assigned gt.
+ The assignment is done in the following steps, the order matters.
+
+ 1. assign every prediction to -1
+ 2. compute the weighted costs
+ 3. do Hungarian matching on CPU based on the costs
+ 4. assign all to 0 (background) first, then for each matched pair
+ between predictions and gts, treat this prediction as foreground
+ and assign the corresponding gt index (plus 1) to it.
+
+ Args:
+ cls_pred (Tensor): Predicted classification logits, shape
+ [num_query, num_class].
+ kpt_pred (Tensor): Predicted keypoints with normalized coordinates
+ (x_{i}, y_{i}), which are all in range [0, 1]. Shape
+ [num_query, K*2].
+ gt_labels (Tensor): Label of `gt_keypoints`, shape (num_gt,).
+ gt_keypoints (Tensor): Ground truth keypoints with unnormalized
+ coordinates [p^{1}_x, p^{1}_y, p^{1}_v, ..., \
+ p^{K}_x, p^{K}_y, p^{K}_v]. Shape [num_gt, K*3].
+ gt_areas (Tensor): Ground truth mask areas, shape (num_gt,).
+ img_meta (dict): Meta information for current image.
+ eps (int | float, optional): A value added to the denominator for
+ numerical stability. Default 1e-7.
+
+ Returns:
+ :obj:`AssignResult`: The assigned result.
+ """
+ num_gts, num_kpts = gt_keypoints.shape[0], kpt_pred.shape[0]
+ if not gt_keypoints.astype('bool').any():
+ num_gts = 0
+
+ # 1. assign -1 by default
+ assigned_gt_inds = paddle.full((num_kpts, ), -1, dtype="int64")
+ assigned_labels = paddle.full((num_kpts, ), -1, dtype="int64")
+ if num_gts == 0 or num_kpts == 0:
+ # No ground truth or keypoints, return empty assignment
+ if num_gts == 0:
+ # No ground truth, assign all to background
+ assigned_gt_inds[:] = 0
+ return AssignResult(
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
+ img_h, img_w, _ = img_meta['img_shape']
+ factor = paddle.to_tensor(
+ [img_w, img_h, img_w, img_h], dtype=gt_keypoints.dtype).reshape(
+ (1, -1))
+
+ # 2. compute the weighted costs
+ # classification cost
+ cls_cost = self.cls_cost(cls_pred, gt_labels)
+
+ # keypoint regression L1 cost
+ gt_keypoints_reshape = gt_keypoints.reshape((gt_keypoints.shape[0], -1,
+ 3))
+ valid_kpt_flag = gt_keypoints_reshape[..., -1]
+ kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
+ 2))
+ normalize_gt_keypoints = gt_keypoints_reshape[
+ ..., :2] / factor[:, :2].unsqueeze(0)
+ kpt_cost = self.kpt_cost(kpt_pred_tmp, normalize_gt_keypoints,
+ valid_kpt_flag)
+ # keypoint OKS cost
+ kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
+ 2))
+ kpt_pred_tmp = kpt_pred_tmp * factor[:, :2].unsqueeze(0)
+ oks_cost = self.oks_cost(kpt_pred_tmp, gt_keypoints_reshape[..., :2],
+ valid_kpt_flag, gt_areas)
+ # weighted sum of above three costs
+ cost = cls_cost + kpt_cost + oks_cost
+
+ # 3. do Hungarian matching on CPU using linear_sum_assignment
+ cost = cost.detach().cpu()
+ if linear_sum_assignment is None:
+ raise ImportError('Please run "pip install scipy" '
+ 'to install scipy first.')
+ matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+ matched_row_inds = paddle.to_tensor(matched_row_inds)
+ matched_col_inds = paddle.to_tensor(matched_col_inds)
+
+ # 4. assign backgrounds and foregrounds
+ # assign all indices to backgrounds first
+ assigned_gt_inds[:] = 0
+ # assign foregrounds based on matching results
+ assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+ assigned_labels[matched_row_inds] = gt_labels[matched_col_inds][
+ ..., 0].astype("int64")
+ return AssignResult(
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
+
+
+class SamplingResult:
+ """Bbox sampling result.
+ """
+
+ def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
+ gt_flags):
+ self.pos_inds = pos_inds
+ self.neg_inds = neg_inds
+ if pos_inds.size > 0:
+ self.pos_bboxes = bboxes[pos_inds]
+ self.neg_bboxes = bboxes[neg_inds]
+ self.pos_is_gt = gt_flags[pos_inds]
+
+ self.num_gts = gt_bboxes.shape[0]
+ self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
+
+ if gt_bboxes.numel() == 0:
+ # hack for index error case
+ assert self.pos_assigned_gt_inds.numel() == 0
+ self.pos_gt_bboxes = paddle.zeros(
+ gt_bboxes.shape, dtype=gt_bboxes.dtype).reshape((-1, 4))
+ else:
+ if len(gt_bboxes.shape) < 2:
+ gt_bboxes = gt_bboxes.reshape((-1, 4))
+
+ self.pos_gt_bboxes = paddle.index_select(
+ gt_bboxes,
+ self.pos_assigned_gt_inds.astype('int64'),
+ axis=0)
+
+ if assign_result.labels is not None:
+ self.pos_gt_labels = assign_result.labels[pos_inds]
+ else:
+ self.pos_gt_labels = None
+
+ @property
+ def bboxes(self):
+ """paddle.Tensor: concatenated positive and negative boxes"""
+ return paddle.concat([self.pos_bboxes, self.neg_bboxes])
+
+ def __nice__(self):
+ data = self.info.copy()
+ data['pos_bboxes'] = data.pop('pos_bboxes').shape
+ data['neg_bboxes'] = data.pop('neg_bboxes').shape
+ parts = [f"'{k}': {v!r}" for k, v in sorted(data.items())]
+ body = ' ' + ',\n '.join(parts)
+ return '{\n' + body + '\n}'
+
+ @property
+ def info(self):
+ """Returns a dictionary of info about the object."""
+ return {
+ 'pos_inds': self.pos_inds,
+ 'neg_inds': self.neg_inds,
+ 'pos_bboxes': self.pos_bboxes,
+ 'neg_bboxes': self.neg_bboxes,
+ 'pos_is_gt': self.pos_is_gt,
+ 'num_gts': self.num_gts,
+ 'pos_assigned_gt_inds': self.pos_assigned_gt_inds,
+ }
+
+
+@register
+class PseudoSampler:
+ """A pseudo sampler that does not do sampling actually."""
+
+ def __init__(self, **kwargs):
+ pass
+
+ def _sample_pos(self, **kwargs):
+ """Sample positive samples."""
+ raise NotImplementedError
+
+ def _sample_neg(self, **kwargs):
+ """Sample negative samples."""
+ raise NotImplementedError
+
+ def sample(self, assign_result, bboxes, gt_bboxes, *args, **kwargs):
+ """Directly returns the positive and negative indices of samples.
+
+ Args:
+ assign_result (:obj:`AssignResult`): Assigned results
+ bboxes (paddle.Tensor): Bounding boxes
+ gt_bboxes (paddle.Tensor): Ground truth boxes
+
+ Returns:
+ :obj:`SamplingResult`: sampler results
+ """
+ pos_inds = paddle.nonzero(
+ assign_result.gt_inds > 0, as_tuple=False).squeeze(-1)
+ neg_inds = paddle.nonzero(
+ assign_result.gt_inds == 0, as_tuple=False).squeeze(-1)
+ gt_flags = paddle.zeros([bboxes.shape[0]], dtype='int32')
+ sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
+ assign_result, gt_flags)
+ return sampling_result
diff --git a/ppdet/modeling/assigners/pose_utils.py b/ppdet/modeling/assigners/pose_utils.py
new file mode 100644
index 00000000000..313215a4dd4
--- /dev/null
+++ b/ppdet/modeling/assigners/pose_utils.py
@@ -0,0 +1,275 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+
+__all__ = ['KptL1Cost', 'OksCost', 'ClassificationCost']
+
+
+def masked_fill(x, mask, value):
+ y = paddle.full(x.shape, value, x.dtype)
+ return paddle.where(mask, y, x)
+
+
+@register
+class KptL1Cost(object):
+ """KptL1Cost.
+
+ this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
+
+ Args:
+ weight (int | float, optional): loss_weight.
+ """
+
+ def __init__(self, weight=1.0):
+ self.weight = weight
+
+ def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag):
+ """
+ Args:
+ kpt_pred (Tensor): Predicted keypoints with normalized coordinates
+ (x_{i}, y_{i}), which are all in range [0, 1]. Shape
+ [num_query, K, 2].
+ gt_keypoints (Tensor): Ground truth keypoints with normalized
+ coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
+ valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
+ Shape [num_gt, K].
+
+ Returns:
+ paddle.Tensor: kpt_cost value with weight.
+ """
+ kpt_cost = []
+ for i in range(len(gt_keypoints)):
+ if gt_keypoints[i].size == 0:
+ kpt_cost.append(kpt_pred.sum() * 0)
+ kpt_pred_tmp = kpt_pred.clone()
+ valid_flag = valid_kpt_flag[i] > 0
+ valid_flag_expand = valid_flag.unsqueeze(0).unsqueeze(-1).expand_as(
+ kpt_pred_tmp)
+ if not valid_flag_expand.all():
+ kpt_pred_tmp = masked_fill(kpt_pred_tmp, ~valid_flag_expand, 0)
+ cost = F.pairwise_distance(
+ kpt_pred_tmp.reshape((kpt_pred_tmp.shape[0], -1)),
+ gt_keypoints[i].reshape((-1, )).unsqueeze(0),
+ p=1,
+ keepdim=True)
+ avg_factor = paddle.clip(
+ valid_flag.astype('float32').sum() * 2, 1.0)
+ cost = cost / avg_factor
+ kpt_cost.append(cost)
+ kpt_cost = paddle.concat(kpt_cost, axis=1)
+ return kpt_cost * self.weight
+
+
+@register
+class OksCost(object):
+ """OksCost.
+
+ this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
+
+ Args:
+ num_keypoints (int): number of keypoints
+ weight (int | float, optional): loss_weight.
+ """
+
+ def __init__(self, num_keypoints=17, weight=1.0):
+ self.weight = weight
+ if num_keypoints == 17:
+ self.sigmas = np.array(
+ [
+ .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
+ 1.07, .87, .87, .89, .89
+ ],
+ dtype=np.float32) / 10.0
+ elif num_keypoints == 14:
+ self.sigmas = np.array(
+ [
+ .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89,
+ .89, .79, .79
+ ],
+ dtype=np.float32) / 10.0
+ else:
+ raise ValueError(f'Unsupported keypoints number {num_keypoints}')
+
+ def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag, gt_areas):
+ """
+ Args:
+ kpt_pred (Tensor): Predicted keypoints with unnormalized
+ coordinates (x_{i}, y_{i}). Shape [num_query, K, 2].
+ gt_keypoints (Tensor): Ground truth keypoints with unnormalized
+ coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
+ valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
+ Shape [num_gt, K].
+ gt_areas (Tensor): Ground truth mask areas. Shape [num_gt,].
+
+ Returns:
+ paddle.Tensor: oks_cost value with weight.
+ """
+ sigmas = paddle.to_tensor(self.sigmas)
+ variances = (sigmas * 2)**2
+
+ oks_cost = []
+ assert len(gt_keypoints) == len(gt_areas)
+ for i in range(len(gt_keypoints)):
+ if gt_keypoints[i].size == 0:
+ oks_cost.append(kpt_pred.sum() * 0)
+ squared_distance = \
+ (kpt_pred[:, :, 0] - gt_keypoints[i, :, 0].unsqueeze(0)) ** 2 + \
+ (kpt_pred[:, :, 1] - gt_keypoints[i, :, 1].unsqueeze(0)) ** 2
+ vis_flag = (valid_kpt_flag[i] > 0).astype('int')
+ vis_ind = vis_flag.nonzero(as_tuple=False)[:, 0]
+ num_vis_kpt = vis_ind.shape[0]
+ # assert num_vis_kpt > 0
+ if num_vis_kpt == 0:
+ oks_cost.append(paddle.zeros((squared_distance.shape[0], 1)))
+ continue
+ area = gt_areas[i]
+
+ squared_distance0 = squared_distance / (area * variances * 2)
+ squared_distance0 = paddle.index_select(
+ squared_distance0, vis_ind, axis=1)
+ squared_distance1 = paddle.exp(-squared_distance0).sum(axis=1,
+ keepdim=True)
+ oks = squared_distance1 / num_vis_kpt
+ # The 1 is a constant that doesn't change the matching, so omitted.
+ oks_cost.append(-oks)
+ oks_cost = paddle.concat(oks_cost, axis=1)
+ return oks_cost * self.weight
+
+
+@register
+class ClassificationCost:
+ """ClsSoftmaxCost.
+
+ Args:
+ weight (int | float, optional): loss_weight
+ """
+
+ def __init__(self, weight=1.):
+ self.weight = weight
+
+ def __call__(self, cls_pred, gt_labels):
+ """
+ Args:
+ cls_pred (Tensor): Predicted classification logits, shape
+ (num_query, num_class).
+ gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+
+ Returns:
+ paddle.Tensor: cls_cost value with weight
+ """
+ # Following the official DETR repo, contrary to the loss that
+ # NLL is used, we approximate it in 1 - cls_score[gt_label].
+ # The 1 is a constant that doesn't change the matching,
+ # so it can be omitted.
+ cls_score = cls_pred.softmax(-1)
+ cls_cost = -cls_score[:, gt_labels]
+ return cls_cost * self.weight
+
+
+@register
+class FocalLossCost:
+ """FocalLossCost.
+
+ Args:
+ weight (int | float, optional): loss_weight
+ alpha (int | float, optional): focal_loss alpha
+ gamma (int | float, optional): focal_loss gamma
+ eps (float, optional): default 1e-12
+ binary_input (bool, optional): Whether the input is binary,
+ default False.
+ """
+
+ def __init__(self,
+ weight=1.,
+ alpha=0.25,
+ gamma=2,
+ eps=1e-12,
+ binary_input=False):
+ self.weight = weight
+ self.alpha = alpha
+ self.gamma = gamma
+ self.eps = eps
+ self.binary_input = binary_input
+
+ def _focal_loss_cost(self, cls_pred, gt_labels):
+ """
+ Args:
+ cls_pred (Tensor): Predicted classification logits, shape
+ (num_query, num_class).
+ gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+
+ Returns:
+ paddle.Tensor: cls_cost value with weight
+ """
+ if gt_labels.size == 0:
+ return cls_pred.sum() * 0
+ cls_pred = F.sigmoid(cls_pred)
+ neg_cost = -(1 - cls_pred + self.eps).log() * (
+ 1 - self.alpha) * cls_pred.pow(self.gamma)
+ pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
+ 1 - cls_pred).pow(self.gamma)
+
+ cls_cost = paddle.index_select(
+ pos_cost, gt_labels, axis=1) - paddle.index_select(
+ neg_cost, gt_labels, axis=1)
+ return cls_cost * self.weight
+
+ def _mask_focal_loss_cost(self, cls_pred, gt_labels):
+ """
+ Args:
+ cls_pred (Tensor): Predicted classfication logits
+ in shape (num_query, d1, ..., dn), dtype=paddle.float32.
+ gt_labels (Tensor): Ground truth in shape (num_gt, d1, ..., dn),
+ dtype=paddle.long. Labels should be binary.
+
+ Returns:
+ Tensor: Focal cost matrix with weight in shape\
+ (num_query, num_gt).
+ """
+ cls_pred = cls_pred.flatten(1)
+ gt_labels = gt_labels.flatten(1).float()
+ n = cls_pred.shape[1]
+ cls_pred = F.sigmoid(cls_pred)
+ neg_cost = -(1 - cls_pred + self.eps).log() * (
+ 1 - self.alpha) * cls_pred.pow(self.gamma)
+ pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
+ 1 - cls_pred).pow(self.gamma)
+
+ cls_cost = paddle.einsum('nc,mc->nm', pos_cost, gt_labels) + \
+ paddle.einsum('nc,mc->nm', neg_cost, (1 - gt_labels))
+ return cls_cost / n * self.weight
+
+ def __call__(self, cls_pred, gt_labels):
+ """
+ Args:
+ cls_pred (Tensor): Predicted classfication logits.
+ gt_labels (Tensor)): Labels.
+
+ Returns:
+ Tensor: Focal cost matrix with weight in shape\
+ (num_query, num_gt).
+ """
+ if self.binary_input:
+ return self._mask_focal_loss_cost(cls_pred, gt_labels)
+ else:
+ return self._focal_loss_cost(cls_pred, gt_labels)
diff --git a/ppdet/modeling/backbones/resnet.py b/ppdet/modeling/backbones/resnet.py
index 6f8eb0b89cc..3b9508c49f9 100755
--- a/ppdet/modeling/backbones/resnet.py
+++ b/ppdet/modeling/backbones/resnet.py
@@ -285,36 +285,6 @@ def __init__(self,
# ResNeXt
width = int(ch_out * (base_width / 64.)) * groups
- self.shortcut = shortcut
- if not shortcut:
- if variant == 'd' and stride == 2:
- self.short = nn.Sequential()
- self.short.add_sublayer(
- 'pool',
- nn.AvgPool2D(
- kernel_size=2, stride=2, padding=0, ceil_mode=True))
- self.short.add_sublayer(
- 'conv',
- ConvNormLayer(
- ch_in=ch_in,
- ch_out=ch_out * self.expansion,
- filter_size=1,
- stride=1,
- norm_type=norm_type,
- norm_decay=norm_decay,
- freeze_norm=freeze_norm,
- lr=lr))
- else:
- self.short = ConvNormLayer(
- ch_in=ch_in,
- ch_out=ch_out * self.expansion,
- filter_size=1,
- stride=stride,
- norm_type=norm_type,
- norm_decay=norm_decay,
- freeze_norm=freeze_norm,
- lr=lr)
-
self.branch2a = ConvNormLayer(
ch_in=ch_in,
ch_out=width,
@@ -351,6 +321,36 @@ def __init__(self,
freeze_norm=freeze_norm,
lr=lr)
+ self.shortcut = shortcut
+ if not shortcut:
+ if variant == 'd' and stride == 2:
+ self.short = nn.Sequential()
+ self.short.add_sublayer(
+ 'pool',
+ nn.AvgPool2D(
+ kernel_size=2, stride=2, padding=0, ceil_mode=True))
+ self.short.add_sublayer(
+ 'conv',
+ ConvNormLayer(
+ ch_in=ch_in,
+ ch_out=ch_out * self.expansion,
+ filter_size=1,
+ stride=1,
+ norm_type=norm_type,
+ norm_decay=norm_decay,
+ freeze_norm=freeze_norm,
+ lr=lr))
+ else:
+ self.short = ConvNormLayer(
+ ch_in=ch_in,
+ ch_out=ch_out * self.expansion,
+ filter_size=1,
+ stride=stride,
+ norm_type=norm_type,
+ norm_decay=norm_decay,
+ freeze_norm=freeze_norm,
+ lr=lr)
+
self.std_senet = std_senet
if self.std_senet:
self.se = SELayer(ch_out * self.expansion)
diff --git a/ppdet/modeling/backbones/vision_transformer.py b/ppdet/modeling/backbones/vision_transformer.py
index 825724fa4b5..a21eefc7aca 100644
--- a/ppdet/modeling/backbones/vision_transformer.py
+++ b/ppdet/modeling/backbones/vision_transformer.py
@@ -284,9 +284,9 @@ def __init__(self, window_size, num_heads):
def forward(self):
relative_position_bias = \
- self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+ self.relative_position_bias_table[self.relative_position_index.reshape([-1])].reshape([
self.window_size[0] * self.window_size[1] + 1,
- self.window_size[0] * self.window_size[1] + 1, -1) # Wh*Ww,Wh*Ww,nH
+ self.window_size[0] * self.window_size[1] + 1, -1]) # Wh*Ww,Wh*Ww,nH
return relative_position_bias.transpose((2, 0, 1)) # nH, Wh*Ww, Wh*Ww
diff --git a/ppdet/modeling/heads/__init__.py b/ppdet/modeling/heads/__init__.py
index 9cceb268a7d..07df124cd3a 100644
--- a/ppdet/modeling/heads/__init__.py
+++ b/ppdet/modeling/heads/__init__.py
@@ -67,3 +67,4 @@
from .ppyoloe_contrast_head import *
from .centertrack_head import *
from .sparse_roi_head import *
+from .petr_head import *
diff --git a/ppdet/modeling/heads/petr_head.py b/ppdet/modeling/heads/petr_head.py
new file mode 100644
index 00000000000..90760c66515
--- /dev/null
+++ b/ppdet/modeling/heads/petr_head.py
@@ -0,0 +1,1161 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+this code is base on https://github.com/hikvision-research/opera/blob/main/opera/models/dense_heads/petr_head.py
+"""
+import copy
+import numpy as np
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import register
+import paddle.distributed as dist
+
+from ..transformers.petr_transformer import inverse_sigmoid, masked_fill
+from ..initializer import constant_, normal_
+
+__all__ = ["PETRHead"]
+
+from functools import partial
+
+
+def bias_init_with_prob(prior_prob: float) -> float:
+ """initialize conv/fc bias value according to a given probability value."""
+ bias_init = float(-np.log((1 - prior_prob) / prior_prob))
+ return bias_init
+
+
+def multi_apply(func, *args, **kwargs):
+ """Apply function to a list of arguments.
+
+ Note:
+ This function applies the ``func`` to multiple inputs and
+ map the multiple outputs of the ``func`` into different
+ list. Each list contains the same type of outputs corresponding
+ to different inputs.
+
+ Args:
+ func (Function): A function that will be applied to a list of
+ arguments
+
+ Returns:
+ tuple(list): A tuple containing multiple list, each list contains \
+ a kind of returned results by the function
+ """
+ pfunc = partial(func, **kwargs) if kwargs else func
+ map_results = map(pfunc, *args)
+ res = tuple(map(list, zip(*map_results)))
+ return res
+
+
+def reduce_mean(tensor):
+ """"Obtain the mean of tensor on different GPUs."""
+ if not (dist.get_world_size() and dist.is_initialized()):
+ return tensor
+ tensor = tensor.clone()
+ dist.all_reduce(
+ tensor.divide(
+ paddle.to_tensor(
+ dist.get_world_size(), dtype='float32')),
+ op=dist.ReduceOp.SUM)
+ return tensor
+
+
+def gaussian_radius(det_size, min_overlap=0.7):
+ """calculate gaussian radius according to object size.
+ """
+ height, width = det_size
+
+ a1 = 1
+ b1 = (height + width)
+ c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+ sq1 = paddle.sqrt(b1**2 - 4 * a1 * c1)
+ r1 = (b1 + sq1) / 2
+
+ a2 = 4
+ b2 = 2 * (height + width)
+ c2 = (1 - min_overlap) * width * height
+ sq2 = paddle.sqrt(b2**2 - 4 * a2 * c2)
+ r2 = (b2 + sq2) / 2
+
+ a3 = 4 * min_overlap
+ b3 = -2 * min_overlap * (height + width)
+ c3 = (min_overlap - 1) * width * height
+ sq3 = paddle.sqrt(b3**2 - 4 * a3 * c3)
+ r3 = (b3 + sq3) / 2
+ return min(r1, r2, r3)
+
+
+def gaussian2D(shape, sigma=1):
+ m, n = [(ss - 1.) / 2. for ss in shape]
+ y = paddle.arange(-m, m + 1, dtype="float32")[:, None]
+ x = paddle.arange(-n, n + 1, dtype="float32")[None, :]
+ # y, x = np.ogrid[-m:m + 1, -n:n + 1]
+
+ h = paddle.exp(-(x * x + y * y) / (2 * sigma * sigma))
+ h[h < np.finfo(np.float32).eps * h.max()] = 0
+ return h
+
+
+def draw_umich_gaussian(heatmap, center, radius, k=1):
+ diameter = 2 * radius + 1
+ gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
+ gaussian = paddle.to_tensor(gaussian, dtype=heatmap.dtype)
+
+ x, y = int(center[0]), int(center[1])
+ radius = int(radius)
+
+ height, width = heatmap.shape[0:2]
+
+ left, right = min(x, radius), min(width - x, radius + 1)
+ top, bottom = min(y, radius), min(height - y, radius + 1)
+
+ masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+ masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
+ radius + right]
+ # assert masked_gaussian.equal(1).float().sum() == 1
+ if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+ heatmap[y - top:y + bottom, x - left:x + right] = paddle.maximum(
+ masked_heatmap, masked_gaussian * k)
+ return heatmap
+
+
+@register
+class PETRHead(nn.Layer):
+ """Head of `End-to-End Multi-Person Pose Estimation with Transformers`.
+
+ Args:
+ num_classes (int): Number of categories excluding the background.
+ in_channels (int): Number of channels in the input feature map.
+ num_query (int): Number of query in Transformer.
+ num_kpt_fcs (int, optional): Number of fully-connected layers used in
+ `FFN`, which is then used for the keypoint regression head.
+ Default 2.
+ transformer (obj:`mmcv.ConfigDict`|dict): ConfigDict is used for
+ building the Encoder and Decoder. Default: None.
+ sync_cls_avg_factor (bool): Whether to sync the avg_factor of
+ all ranks. Default to False.
+ positional_encoding (obj:`mmcv.ConfigDict`|dict):
+ Config for position encoding.
+ loss_cls (obj:`mmcv.ConfigDict`|dict): Config of the
+ classification loss. Default `CrossEntropyLoss`.
+ loss_kpt (obj:`mmcv.ConfigDict`|dict): Config of the
+ regression loss. Default `L1Loss`.
+ loss_oks (obj:`mmcv.ConfigDict`|dict): Config of the
+ regression oks loss. Default `OKSLoss`.
+ loss_hm (obj:`mmcv.ConfigDict`|dict): Config of the
+ regression heatmap loss. Default `NegLoss`.
+ as_two_stage (bool) : Whether to generate the proposal from
+ the outputs of encoder.
+ with_kpt_refine (bool): Whether to refine the reference points
+ in the decoder. Defaults to True.
+ test_cfg (obj:`mmcv.ConfigDict`|dict): Testing config of
+ transformer head.
+ init_cfg (dict or list[dict], optional): Initialization config dict.
+ Default: None.
+ """
+ __inject__ = [
+ "transformer", "positional_encoding", "assigner", "sampler", "loss_cls",
+ "loss_kpt", "loss_oks", "loss_hm", "loss_kpt_rpn", "loss_kpt_refine",
+ "loss_oks_refine"
+ ]
+
+ def __init__(self,
+ num_classes,
+ in_channels,
+ num_query=100,
+ num_kpt_fcs=2,
+ num_keypoints=17,
+ transformer=None,
+ sync_cls_avg_factor=True,
+ positional_encoding='SinePositionalEncoding',
+ loss_cls='FocalLoss',
+ loss_kpt='L1Loss',
+ loss_oks='OKSLoss',
+ loss_hm='CenterFocalLoss',
+ with_kpt_refine=True,
+ assigner='PoseHungarianAssigner',
+ sampler='PseudoSampler',
+ loss_kpt_rpn='L1Loss',
+ loss_kpt_refine='L1Loss',
+ loss_oks_refine='opera.OKSLoss',
+ test_cfg=dict(max_per_img=100),
+ init_cfg=None,
+ **kwargs):
+ # NOTE here use `AnchorFreeHead` instead of `TransformerHead`,
+ # since it brings inconvenience when the initialization of
+ # `AnchorFreeHead` is called.
+ super().__init__()
+ self.bg_cls_weight = 0
+ self.sync_cls_avg_factor = sync_cls_avg_factor
+ self.assigner = assigner
+ self.sampler = sampler
+ self.num_query = num_query
+ self.num_classes = num_classes
+ self.in_channels = in_channels
+ self.num_kpt_fcs = num_kpt_fcs
+ self.test_cfg = test_cfg
+ self.fp16_enabled = False
+ self.as_two_stage = transformer.as_two_stage
+ self.with_kpt_refine = with_kpt_refine
+ self.num_keypoints = num_keypoints
+ self.loss_cls = loss_cls
+ self.loss_kpt = loss_kpt
+ self.loss_kpt_rpn = loss_kpt_rpn
+ self.loss_kpt_refine = loss_kpt_refine
+ self.loss_oks = loss_oks
+ self.loss_oks_refine = loss_oks_refine
+ self.loss_hm = loss_hm
+ if self.loss_cls.use_sigmoid:
+ self.cls_out_channels = num_classes
+ else:
+ self.cls_out_channels = num_classes + 1
+ self.positional_encoding = positional_encoding
+ self.transformer = transformer
+ self.embed_dims = self.transformer.embed_dims
+ # assert 'num_feats' in positional_encoding
+ num_feats = positional_encoding.num_pos_feats
+ assert num_feats * 2 == self.embed_dims, 'embed_dims should' \
+ f' be exactly 2 times of num_feats. Found {self.embed_dims}' \
+ f' and {num_feats}.'
+ self._init_layers()
+ self.init_weights()
+
+ def _init_layers(self):
+ """Initialize classification branch and keypoint branch of head."""
+
+ fc_cls = nn.Linear(self.embed_dims, self.cls_out_channels)
+
+ kpt_branch = []
+ kpt_branch.append(nn.Linear(self.embed_dims, 512))
+ kpt_branch.append(nn.ReLU())
+ for _ in range(self.num_kpt_fcs):
+ kpt_branch.append(nn.Linear(512, 512))
+ kpt_branch.append(nn.ReLU())
+ kpt_branch.append(nn.Linear(512, 2 * self.num_keypoints))
+ kpt_branch = nn.Sequential(*kpt_branch)
+
+ def _get_clones(module, N):
+ return nn.LayerList([copy.deepcopy(module) for i in range(N)])
+
+ # last kpt_branch is used to generate proposal from
+ # encode feature map when as_two_stage is True.
+ num_pred = (self.transformer.decoder.num_layers + 1) if \
+ self.as_two_stage else self.transformer.decoder.num_layers
+
+ if self.with_kpt_refine:
+ self.cls_branches = _get_clones(fc_cls, num_pred)
+ self.kpt_branches = _get_clones(kpt_branch, num_pred)
+ else:
+ self.cls_branches = nn.LayerList([fc_cls for _ in range(num_pred)])
+ self.kpt_branches = nn.LayerList(
+ [kpt_branch for _ in range(num_pred)])
+
+ self.query_embedding = nn.Embedding(self.num_query, self.embed_dims * 2)
+
+ refine_kpt_branch = []
+ for _ in range(self.num_kpt_fcs):
+ refine_kpt_branch.append(
+ nn.Linear(self.embed_dims, self.embed_dims))
+ refine_kpt_branch.append(nn.ReLU())
+ refine_kpt_branch.append(nn.Linear(self.embed_dims, 2))
+ refine_kpt_branch = nn.Sequential(*refine_kpt_branch)
+ if self.with_kpt_refine:
+ num_pred = self.transformer.refine_decoder.num_layers
+ self.refine_kpt_branches = _get_clones(refine_kpt_branch, num_pred)
+ self.fc_hm = nn.Linear(self.embed_dims, self.num_keypoints)
+
+ def init_weights(self):
+ """Initialize weights of the PETR head."""
+ self.transformer.init_weights()
+ if self.loss_cls.use_sigmoid:
+ bias_init = bias_init_with_prob(0.01)
+ for m in self.cls_branches:
+ constant_(m.bias, bias_init)
+ for m in self.kpt_branches:
+ constant_(m[-1].bias, 0)
+ # initialization of keypoint refinement branch
+ if self.with_kpt_refine:
+ for m in self.refine_kpt_branches:
+ constant_(m[-1].bias, 0)
+ # initialize bias for heatmap prediction
+ bias_init = bias_init_with_prob(0.1)
+ normal_(self.fc_hm.weight, std=0.01)
+ constant_(self.fc_hm.bias, bias_init)
+
+ def forward(self, mlvl_feats, img_metas):
+ """Forward function.
+
+ Args:
+ mlvl_feats (tuple[Tensor]): Features from the upstream
+ network, each is a 4D-tensor with shape
+ (N, C, H, W).
+ img_metas (list[dict]): List of image information.
+
+ Returns:
+ outputs_classes (Tensor): Outputs from the classification head,
+ shape [nb_dec, bs, num_query, cls_out_channels]. Note
+ cls_out_channels should include background.
+ outputs_kpts (Tensor): Sigmoid outputs from the regression
+ head with normalized coordinate format (cx, cy, w, h).
+ Shape [nb_dec, bs, num_query, K*2].
+ enc_outputs_class (Tensor): The score of each point on encode
+ feature map, has shape (N, h*w, num_class). Only when
+ as_two_stage is Ture it would be returned, otherwise
+ `None` would be returned.
+ enc_outputs_kpt (Tensor): The proposal generate from the
+ encode feature map, has shape (N, h*w, K*2). Only when
+ as_two_stage is Ture it would be returned, otherwise
+ `None` would be returned.
+ """
+
+ batch_size = mlvl_feats[0].shape[0]
+ input_img_h, input_img_w = img_metas[0]['batch_input_shape']
+ img_masks = paddle.zeros(
+ (batch_size, input_img_h, input_img_w), dtype=mlvl_feats[0].dtype)
+ for img_id in range(batch_size):
+ img_h, img_w, _ = img_metas[img_id]['img_shape']
+ img_masks[img_id, :img_h, :img_w] = 1
+
+ mlvl_masks = []
+ mlvl_positional_encodings = []
+ for feat in mlvl_feats:
+ mlvl_masks.append(
+ F.interpolate(
+ img_masks[None], size=feat.shape[-2:]).squeeze(0))
+ mlvl_positional_encodings.append(
+ self.positional_encoding(mlvl_masks[-1]).transpose(
+ [0, 3, 1, 2]))
+
+ query_embeds = self.query_embedding.weight
+ hs, init_reference, inter_references, \
+ enc_outputs_class, enc_outputs_kpt, hm_proto, memory = \
+ self.transformer(
+ mlvl_feats,
+ mlvl_masks,
+ query_embeds,
+ mlvl_positional_encodings,
+ kpt_branches=self.kpt_branches \
+ if self.with_kpt_refine else None, # noqa:E501
+ cls_branches=self.cls_branches \
+ if self.as_two_stage else None # noqa:E501
+ )
+
+ outputs_classes = []
+ outputs_kpts = []
+
+ for lvl in range(hs.shape[0]):
+ if lvl == 0:
+ reference = init_reference
+ else:
+ reference = inter_references[lvl - 1]
+ reference = inverse_sigmoid(reference)
+ outputs_class = self.cls_branches[lvl](hs[lvl])
+ tmp_kpt = self.kpt_branches[lvl](hs[lvl])
+ assert reference.shape[-1] == self.num_keypoints * 2
+ tmp_kpt += reference
+ outputs_kpt = F.sigmoid(tmp_kpt)
+ outputs_classes.append(outputs_class)
+ outputs_kpts.append(outputs_kpt)
+
+ outputs_classes = paddle.stack(outputs_classes)
+ outputs_kpts = paddle.stack(outputs_kpts)
+
+ if hm_proto is not None:
+ # get heatmap prediction (training phase)
+ hm_memory, hm_mask = hm_proto
+ hm_pred = self.fc_hm(hm_memory)
+ hm_proto = (hm_pred.transpose((0, 3, 1, 2)), hm_mask)
+
+ if self.as_two_stage:
+ return outputs_classes, outputs_kpts, \
+ enc_outputs_class, F.sigmoid(enc_outputs_kpt), \
+ hm_proto, memory, mlvl_masks
+ else:
+ raise RuntimeError('only "as_two_stage=True" is supported.')
+
+ def forward_refine(self, memory, mlvl_masks, refine_targets, losses,
+ img_metas):
+ """Forward function.
+
+ Args:
+ mlvl_masks (tuple[Tensor]): The key_padding_mask from
+ different level used for encoder and decoder,
+ each is a 3D-tensor with shape (bs, H, W).
+ losses (dict[str, Tensor]): A dictionary of loss components.
+ img_metas (list[dict]): List of image information.
+
+ Returns:
+ dict[str, Tensor]: A dictionary of loss components.
+ """
+ kpt_preds, kpt_targets, area_targets, kpt_weights = refine_targets
+ pos_inds = kpt_weights.sum(-1) > 0
+ if not pos_inds.any():
+ pos_kpt_preds = paddle.zeros_like(kpt_preds[:1])
+ pos_img_inds = paddle.zeros([1], dtype="int64")
+ else:
+ pos_kpt_preds = kpt_preds[pos_inds]
+ pos_img_inds = (pos_inds.nonzero() /
+ self.num_query).squeeze(1).astype("int64")
+ hs, init_reference, inter_references = self.transformer.forward_refine(
+ mlvl_masks,
+ memory,
+ pos_kpt_preds.detach(),
+ pos_img_inds,
+ kpt_branches=self.refine_kpt_branches
+ if self.with_kpt_refine else None, # noqa:E501
+ )
+
+ outputs_kpts = []
+
+ for lvl in range(hs.shape[0]):
+ if lvl == 0:
+ reference = init_reference
+ else:
+ reference = inter_references[lvl - 1]
+ reference = inverse_sigmoid(reference)
+ tmp_kpt = self.refine_kpt_branches[lvl](hs[lvl])
+ assert reference.shape[-1] == 2
+ tmp_kpt += reference
+ outputs_kpt = F.sigmoid(tmp_kpt)
+ outputs_kpts.append(outputs_kpt)
+ outputs_kpts = paddle.stack(outputs_kpts)
+
+ if not self.training:
+ return outputs_kpts
+
+ num_valid_kpt = paddle.clip(
+ reduce_mean(kpt_weights.sum()), min=1).item()
+ num_total_pos = paddle.to_tensor(
+ [outputs_kpts.shape[1]], dtype=kpt_weights.dtype)
+ num_total_pos = paddle.clip(reduce_mean(num_total_pos), min=1).item()
+
+ if not pos_inds.any():
+ for i, kpt_refine_preds in enumerate(outputs_kpts):
+ loss_kpt = loss_oks = kpt_refine_preds.sum() * 0
+ losses[f'd{i}.loss_kpt_refine'] = loss_kpt
+ losses[f'd{i}.loss_oks_refine'] = loss_oks
+ continue
+ return losses
+
+ batch_size = mlvl_masks[0].shape[0]
+ factors = []
+ for img_id in range(batch_size):
+ img_h, img_w, _ = img_metas[img_id]['img_shape']
+ factor = paddle.to_tensor(
+ [img_w, img_h, img_w, img_h],
+ dtype="float32").squeeze(-1).unsqueeze(0).tile(
+ (self.num_query, 1))
+ factors.append(factor)
+ factors = paddle.concat(factors, 0)
+ factors = factors[pos_inds][:, :2].tile((1, kpt_preds.shape[-1] // 2))
+
+ pos_kpt_weights = kpt_weights[pos_inds]
+ pos_kpt_targets = kpt_targets[pos_inds]
+ pos_kpt_targets_scaled = pos_kpt_targets * factors
+ pos_areas = area_targets[pos_inds]
+ pos_valid = kpt_weights[pos_inds][:, 0::2]
+ for i, kpt_refine_preds in enumerate(outputs_kpts):
+ if not pos_inds.any():
+ print("refine kpt and oks skip")
+ loss_kpt = loss_oks = kpt_refine_preds.sum() * 0
+ losses[f'd{i}.loss_kpt_refine'] = loss_kpt
+ losses[f'd{i}.loss_oks_refine'] = loss_oks
+ continue
+
+ # kpt L1 Loss
+ pos_refine_preds = kpt_refine_preds.reshape(
+ (kpt_refine_preds.shape[0], -1))
+ loss_kpt = self.loss_kpt_refine(
+ pos_refine_preds,
+ pos_kpt_targets,
+ pos_kpt_weights,
+ avg_factor=num_valid_kpt)
+ losses[f'd{i}.loss_kpt_refine'] = loss_kpt
+ # kpt oks loss
+ pos_refine_preds_scaled = pos_refine_preds * factors
+ assert (pos_areas > 0).all()
+ loss_oks = self.loss_oks_refine(
+ pos_refine_preds_scaled,
+ pos_kpt_targets_scaled,
+ pos_valid,
+ pos_areas,
+ avg_factor=num_total_pos)
+ losses[f'd{i}.loss_oks_refine'] = loss_oks
+ return losses
+
+ # over-write because img_metas are needed as inputs for bbox_head.
+ def forward_train(self,
+ x,
+ img_metas,
+ gt_bboxes,
+ gt_labels=None,
+ gt_keypoints=None,
+ gt_areas=None,
+ gt_bboxes_ignore=None,
+ proposal_cfg=None,
+ **kwargs):
+ """Forward function for training mode.
+
+ Args:
+ x (list[Tensor]): Features from backbone.
+ img_metas (list[dict]): Meta information of each image, e.g.,
+ image size, scaling factor, etc.
+ gt_bboxes (list[Tensor]): Ground truth bboxes of the image,
+ shape (num_gts, 4).
+ gt_labels (list[Tensor]): Ground truth labels of each box,
+ shape (num_gts,).
+ gt_keypoints (list[Tensor]): Ground truth keypoints of the image,
+ shape (num_gts, K*3).
+ gt_areas (list[Tensor]): Ground truth mask areas of each box,
+ shape (num_gts,).
+ gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be
+ ignored, shape (num_ignored_gts, 4).
+ proposal_cfg (mmcv.Config): Test / postprocessing configuration,
+ if None, test_cfg would be used.
+
+ Returns:
+ dict[str, Tensor]: A dictionary of loss components.
+ """
+ assert proposal_cfg is None, '"proposal_cfg" must be None'
+ outs = self(x, img_metas)
+ memory, mlvl_masks = outs[-2:]
+ outs = outs[:-2]
+ if gt_labels is None:
+ loss_inputs = outs + (gt_bboxes, gt_keypoints, gt_areas, img_metas)
+ else:
+ loss_inputs = outs + (gt_bboxes, gt_labels, gt_keypoints, gt_areas,
+ img_metas)
+ losses_and_targets = self.loss(
+ *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+ # losses = losses_and_targets
+ losses, refine_targets = losses_and_targets
+ # get pose refinement loss
+ losses = self.forward_refine(memory, mlvl_masks, refine_targets, losses,
+ img_metas)
+ return losses
+
+ def loss(self,
+ all_cls_scores,
+ all_kpt_preds,
+ enc_cls_scores,
+ enc_kpt_preds,
+ enc_hm_proto,
+ gt_bboxes_list,
+ gt_labels_list,
+ gt_keypoints_list,
+ gt_areas_list,
+ img_metas,
+ gt_bboxes_ignore=None):
+ """Loss function.
+
+ Args:
+ all_cls_scores (Tensor): Classification score of all
+ decoder layers, has shape
+ [nb_dec, bs, num_query, cls_out_channels].
+ all_kpt_preds (Tensor): Sigmoid regression
+ outputs of all decode layers. Each is a 4D-tensor with
+ normalized coordinate format (x_{i}, y_{i}) and shape
+ [nb_dec, bs, num_query, K*2].
+ enc_cls_scores (Tensor): Classification scores of
+ points on encode feature map, has shape
+ (N, h*w, num_classes). Only be passed when as_two_stage is
+ True, otherwise is None.
+ enc_kpt_preds (Tensor): Regression results of each points
+ on the encode feature map, has shape (N, h*w, K*2). Only be
+ passed when as_two_stage is True, otherwise is None.
+ gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image
+ with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
+ gt_labels_list (list[Tensor]): Ground truth class indices for each
+ image with shape (num_gts, ).
+ gt_keypoints_list (list[Tensor]): Ground truth keypoints for each
+ image with shape (num_gts, K*3) in [p^{1}_x, p^{1}_y, p^{1}_v,
+ ..., p^{K}_x, p^{K}_y, p^{K}_v] format.
+ gt_areas_list (list[Tensor]): Ground truth mask areas for each
+ image with shape (num_gts, ).
+ img_metas (list[dict]): List of image meta information.
+ gt_bboxes_ignore (list[Tensor], optional): Bounding boxes
+ which can be ignored for each image. Default None.
+
+ Returns:
+ dict[str, Tensor]: A dictionary of loss components.
+ """
+ assert gt_bboxes_ignore is None, \
+ f'{self.__class__.__name__} only supports ' \
+ f'for gt_bboxes_ignore setting to None.'
+
+ num_dec_layers = len(all_cls_scores)
+ all_gt_labels_list = [gt_labels_list for _ in range(num_dec_layers)]
+ all_gt_keypoints_list = [
+ gt_keypoints_list for _ in range(num_dec_layers)
+ ]
+ all_gt_areas_list = [gt_areas_list for _ in range(num_dec_layers)]
+ img_metas_list = [img_metas for _ in range(num_dec_layers)]
+
+ losses_cls, losses_kpt, losses_oks, kpt_preds_list, kpt_targets_list, \
+ area_targets_list, kpt_weights_list = multi_apply(
+ self.loss_single, all_cls_scores, all_kpt_preds,
+ all_gt_labels_list, all_gt_keypoints_list,
+ all_gt_areas_list, img_metas_list)
+
+ loss_dict = dict()
+ # loss of proposal generated from encode feature map.
+ if enc_cls_scores is not None:
+ binary_labels_list = [
+ paddle.zeros_like(gt_labels_list[i])
+ for i in range(len(img_metas))
+ ]
+ enc_loss_cls, enc_losses_kpt = \
+ self.loss_single_rpn(
+ enc_cls_scores, enc_kpt_preds, binary_labels_list,
+ gt_keypoints_list, gt_areas_list, img_metas)
+ loss_dict['enc_loss_cls'] = enc_loss_cls
+ loss_dict['enc_loss_kpt'] = enc_losses_kpt
+
+ # loss from the last decoder layer
+ loss_dict['loss_cls'] = losses_cls[-1]
+ loss_dict['loss_kpt'] = losses_kpt[-1]
+ loss_dict['loss_oks'] = losses_oks[-1]
+ # loss from other decoder layers
+ num_dec_layer = 0
+ for loss_cls_i, loss_kpt_i, loss_oks_i in zip(
+ losses_cls[:-1], losses_kpt[:-1], losses_oks[:-1]):
+ loss_dict[f'd{num_dec_layer}.loss_cls'] = loss_cls_i
+ loss_dict[f'd{num_dec_layer}.loss_kpt'] = loss_kpt_i
+ loss_dict[f'd{num_dec_layer}.loss_oks'] = loss_oks_i
+ num_dec_layer += 1
+
+ # losses of heatmap generated from P3 feature map
+ hm_pred, hm_mask = enc_hm_proto
+ loss_hm = self.loss_heatmap(hm_pred, hm_mask, gt_keypoints_list,
+ gt_labels_list, gt_bboxes_list)
+ loss_dict['loss_hm'] = loss_hm
+
+ return loss_dict, (kpt_preds_list[-1], kpt_targets_list[-1],
+ area_targets_list[-1], kpt_weights_list[-1])
+
+ def loss_heatmap(self, hm_pred, hm_mask, gt_keypoints, gt_labels,
+ gt_bboxes):
+ assert hm_pred.shape[-2:] == hm_mask.shape[-2:]
+ num_img, _, h, w = hm_pred.shape
+ # placeholder of heatmap target (Gaussian distribution)
+ hm_target = paddle.zeros(hm_pred.shape, hm_pred.dtype)
+ for i, (gt_label, gt_bbox, gt_keypoint
+ ) in enumerate(zip(gt_labels, gt_bboxes, gt_keypoints)):
+ if gt_label.shape[0] == 0:
+ continue
+ gt_keypoint = gt_keypoint.reshape((gt_keypoint.shape[0], -1,
+ 3)).clone()
+ gt_keypoint[..., :2] /= 8
+
+ assert gt_keypoint[..., 0].max() <= w + 0.5 # new coordinate system
+ assert gt_keypoint[..., 1].max() <= h + 0.5 # new coordinate system
+ gt_bbox /= 8
+ gt_w = gt_bbox[:, 2] - gt_bbox[:, 0]
+ gt_h = gt_bbox[:, 3] - gt_bbox[:, 1]
+ for j in range(gt_label.shape[0]):
+ # get heatmap radius
+ kp_radius = paddle.clip(
+ paddle.floor(
+ gaussian_radius(
+ (gt_h[j], gt_w[j]), min_overlap=0.9)),
+ min=0,
+ max=3)
+ for k in range(self.num_keypoints):
+ if gt_keypoint[j, k, 2] > 0:
+ gt_kp = gt_keypoint[j, k, :2]
+ gt_kp_int = paddle.floor(gt_kp)
+ hm_target[i, k] = draw_umich_gaussian(
+ hm_target[i, k], gt_kp_int, kp_radius)
+ # compute heatmap loss
+ hm_pred = paddle.clip(
+ F.sigmoid(hm_pred), min=1e-4, max=1 - 1e-4) # refer to CenterNet
+ loss_hm = self.loss_hm(
+ hm_pred,
+ hm_target.detach(),
+ mask=~hm_mask.astype("bool").unsqueeze(1))
+ return loss_hm
+
+ def loss_single(self, cls_scores, kpt_preds, gt_labels_list,
+ gt_keypoints_list, gt_areas_list, img_metas):
+ """Loss function for outputs from a single decoder layer of a single
+ feature level.
+
+ Args:
+ cls_scores (Tensor): Box score logits from a single decoder layer
+ for all images. Shape [bs, num_query, cls_out_channels].
+ kpt_preds (Tensor): Sigmoid outputs from a single decoder layer
+ for all images, with normalized coordinate (x_{i}, y_{i}) and
+ shape [bs, num_query, K*2].
+ gt_labels_list (list[Tensor]): Ground truth class indices for each
+ image with shape (num_gts, ).
+ gt_keypoints_list (list[Tensor]): Ground truth keypoints for each
+ image with shape (num_gts, K*3) in [p^{1}_x, p^{1}_y, p^{1}_v,
+ ..., p^{K}_x, p^{K}_y, p^{K}_v] format.
+ gt_areas_list (list[Tensor]): Ground truth mask areas for each
+ image with shape (num_gts, ).
+ img_metas (list[dict]): List of image meta information.
+
+ Returns:
+ dict[str, Tensor]: A dictionary of loss components for outputs from
+ a single decoder layer.
+ """
+ num_imgs = cls_scores.shape[0]
+ cls_scores_list = [cls_scores[i] for i in range(num_imgs)]
+ kpt_preds_list = [kpt_preds[i] for i in range(num_imgs)]
+ cls_reg_targets = self.get_targets(cls_scores_list, kpt_preds_list,
+ gt_labels_list, gt_keypoints_list,
+ gt_areas_list, img_metas)
+ (labels_list, label_weights_list, kpt_targets_list, kpt_weights_list,
+ area_targets_list, num_total_pos, num_total_neg) = cls_reg_targets
+ labels = paddle.concat(labels_list, 0)
+ label_weights = paddle.concat(label_weights_list, 0)
+ kpt_targets = paddle.concat(kpt_targets_list, 0)
+ kpt_weights = paddle.concat(kpt_weights_list, 0)
+ area_targets = paddle.concat(area_targets_list, 0)
+
+ # classification loss
+ cls_scores = cls_scores.reshape((-1, self.cls_out_channels))
+ # construct weighted avg_factor to match with the official DETR repo
+ cls_avg_factor = num_total_pos * 1.0 + \
+ num_total_neg * self.bg_cls_weight
+ if self.sync_cls_avg_factor:
+ cls_avg_factor = reduce_mean(
+ paddle.to_tensor(
+ [cls_avg_factor], dtype=cls_scores.dtype))
+ cls_avg_factor = max(cls_avg_factor, 1)
+
+ loss_cls = self.loss_cls(
+ cls_scores, labels, label_weights, avg_factor=cls_avg_factor)
+
+ # Compute the average number of gt keypoints accross all gpus, for
+ # normalization purposes
+ num_total_pos = paddle.to_tensor([num_total_pos], dtype=loss_cls.dtype)
+ num_total_pos = paddle.clip(reduce_mean(num_total_pos), min=1).item()
+
+ # construct factors used for rescale keypoints
+ factors = []
+ for img_meta, kpt_pred in zip(img_metas, kpt_preds):
+ img_h, img_w, _ = img_meta['img_shape']
+ factor = paddle.to_tensor(
+ [img_w, img_h, img_w, img_h],
+ dtype=kpt_pred.dtype).squeeze().unsqueeze(0).tile(
+ (kpt_pred.shape[0], 1))
+ factors.append(factor)
+ factors = paddle.concat(factors, 0)
+
+ # keypoint regression loss
+ kpt_preds = kpt_preds.reshape((-1, kpt_preds.shape[-1]))
+ num_valid_kpt = paddle.clip(
+ reduce_mean(kpt_weights.sum()), min=1).item()
+ # assert num_valid_kpt == (kpt_targets>0).sum().item()
+ loss_kpt = self.loss_kpt(
+ kpt_preds,
+ kpt_targets.detach(),
+ kpt_weights.detach(),
+ avg_factor=num_valid_kpt)
+
+ # keypoint oks loss
+ pos_inds = kpt_weights.sum(-1) > 0
+ if not pos_inds.any():
+ loss_oks = kpt_preds.sum() * 0
+ else:
+ factors = factors[pos_inds][:, :2].tile((
+ (1, kpt_preds.shape[-1] // 2)))
+ pos_kpt_preds = kpt_preds[pos_inds] * factors
+ pos_kpt_targets = kpt_targets[pos_inds] * factors
+ pos_areas = area_targets[pos_inds]
+ pos_valid = kpt_weights[pos_inds][..., 0::2]
+ assert (pos_areas > 0).all()
+ loss_oks = self.loss_oks(
+ pos_kpt_preds,
+ pos_kpt_targets,
+ pos_valid,
+ pos_areas,
+ avg_factor=num_total_pos)
+ return loss_cls, loss_kpt, loss_oks, kpt_preds, kpt_targets, \
+ area_targets, kpt_weights
+
+ def get_targets(self, cls_scores_list, kpt_preds_list, gt_labels_list,
+ gt_keypoints_list, gt_areas_list, img_metas):
+ """Compute regression and classification targets for a batch image.
+
+ Outputs from a single decoder layer of a single feature level are used.
+
+ Args:
+ cls_scores_list (list[Tensor]): Box score logits from a single
+ decoder layer for each image with shape [num_query,
+ cls_out_channels].
+ kpt_preds_list (list[Tensor]): Sigmoid outputs from a single
+ decoder layer for each image, with normalized coordinate
+ (x_{i}, y_{i}) and shape [num_query, K*2].
+ gt_labels_list (list[Tensor]): Ground truth class indices for each
+ image with shape (num_gts, ).
+ gt_keypoints_list (list[Tensor]): Ground truth keypoints for each
+ image with shape (num_gts, K*3).
+ gt_areas_list (list[Tensor]): Ground truth mask areas for each
+ image with shape (num_gts, ).
+ img_metas (list[dict]): List of image meta information.
+
+ Returns:
+ tuple: a tuple containing the following targets.
+
+ - labels_list (list[Tensor]): Labels for all images.
+ - label_weights_list (list[Tensor]): Label weights for all
+ images.
+ - kpt_targets_list (list[Tensor]): Keypoint targets for all
+ images.
+ - kpt_weights_list (list[Tensor]): Keypoint weights for all
+ images.
+ - area_targets_list (list[Tensor]): area targets for all
+ images.
+ - num_total_pos (int): Number of positive samples in all
+ images.
+ - num_total_neg (int): Number of negative samples in all
+ images.
+ """
+ (labels_list, label_weights_list, kpt_targets_list, kpt_weights_list,
+ area_targets_list, pos_inds_list, neg_inds_list) = multi_apply(
+ self._get_target_single, cls_scores_list, kpt_preds_list,
+ gt_labels_list, gt_keypoints_list, gt_areas_list, img_metas)
+ num_total_pos = sum((inds.numel() for inds in pos_inds_list))
+ num_total_neg = sum((inds.numel() for inds in neg_inds_list))
+ return (labels_list, label_weights_list, kpt_targets_list,
+ kpt_weights_list, area_targets_list, num_total_pos,
+ num_total_neg)
+
+ def _get_target_single(self, cls_score, kpt_pred, gt_labels, gt_keypoints,
+ gt_areas, img_meta):
+ """Compute regression and classification targets for one image.
+
+ Outputs from a single decoder layer of a single feature level are used.
+
+ Args:
+ cls_score (Tensor): Box score logits from a single decoder layer
+ for one image. Shape [num_query, cls_out_channels].
+ kpt_pred (Tensor): Sigmoid outputs from a single decoder layer
+ for one image, with normalized coordinate (x_{i}, y_{i}) and
+ shape [num_query, K*2].
+ gt_labels (Tensor): Ground truth class indices for one image
+ with shape (num_gts, ).
+ gt_keypoints (Tensor): Ground truth keypoints for one image with
+ shape (num_gts, K*3) in [p^{1}_x, p^{1}_y, p^{1}_v, ..., \
+ p^{K}_x, p^{K}_y, p^{K}_v] format.
+ gt_areas (Tensor): Ground truth mask areas for one image
+ with shape (num_gts, ).
+ img_meta (dict): Meta information for one image.
+
+ Returns:
+ tuple[Tensor]: a tuple containing the following for one image.
+
+ - labels (Tensor): Labels of each image.
+ - label_weights (Tensor): Label weights of each image.
+ - kpt_targets (Tensor): Keypoint targets of each image.
+ - kpt_weights (Tensor): Keypoint weights of each image.
+ - area_targets (Tensor): Area targets of each image.
+ - pos_inds (Tensor): Sampled positive indices for each image.
+ - neg_inds (Tensor): Sampled negative indices for each image.
+ """
+ num_bboxes = kpt_pred.shape[0]
+ # assigner and sampler
+ assign_result = self.assigner.assign(cls_score, kpt_pred, gt_labels,
+ gt_keypoints, gt_areas, img_meta)
+ sampling_result = self.sampler.sample(assign_result, kpt_pred,
+ gt_keypoints)
+
+ pos_inds = sampling_result.pos_inds
+ neg_inds = sampling_result.neg_inds
+
+ # label targets
+ labels = paddle.full((num_bboxes, ), self.num_classes, dtype="int64")
+ label_weights = paddle.ones((num_bboxes, ), dtype=gt_labels.dtype)
+ kpt_targets = paddle.zeros_like(kpt_pred)
+ kpt_weights = paddle.zeros_like(kpt_pred)
+ area_targets = paddle.zeros((kpt_pred.shape[0], ), dtype=kpt_pred.dtype)
+
+ if pos_inds.size == 0:
+ return (labels, label_weights, kpt_targets, kpt_weights,
+ area_targets, pos_inds, neg_inds)
+
+ labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds][
+ ..., 0].astype("int64")
+
+ img_h, img_w, _ = img_meta['img_shape']
+ # keypoint targets
+ pos_gt_kpts = gt_keypoints[sampling_result.pos_assigned_gt_inds]
+ pos_gt_kpts = pos_gt_kpts.reshape(
+ (len(sampling_result.pos_assigned_gt_inds), -1, 3))
+ valid_idx = pos_gt_kpts[:, :, 2] > 0
+ pos_kpt_weights = kpt_weights[pos_inds].reshape(
+ (pos_gt_kpts.shape[0], kpt_weights.shape[-1] // 2, 2))
+ # pos_kpt_weights[valid_idx][...] = 1.0
+ pos_kpt_weights = masked_fill(pos_kpt_weights,
+ valid_idx.unsqueeze(-1), 1.0)
+ kpt_weights[pos_inds] = pos_kpt_weights.reshape(
+ (pos_kpt_weights.shape[0], kpt_pred.shape[-1]))
+
+ factor = paddle.to_tensor(
+ [img_w, img_h], dtype=kpt_pred.dtype).squeeze().unsqueeze(0)
+ pos_gt_kpts_normalized = pos_gt_kpts[..., :2]
+ pos_gt_kpts_normalized[..., 0] = pos_gt_kpts_normalized[..., 0] / \
+ factor[:, 0:1]
+ pos_gt_kpts_normalized[..., 1] = pos_gt_kpts_normalized[..., 1] / \
+ factor[:, 1:2]
+ kpt_targets[pos_inds] = pos_gt_kpts_normalized.reshape(
+ (pos_gt_kpts.shape[0], kpt_pred.shape[-1]))
+
+ pos_gt_areas = gt_areas[sampling_result.pos_assigned_gt_inds][..., 0]
+ area_targets[pos_inds] = pos_gt_areas
+
+ return (labels, label_weights, kpt_targets, kpt_weights, area_targets,
+ pos_inds, neg_inds)
+
+ def loss_single_rpn(self, cls_scores, kpt_preds, gt_labels_list,
+ gt_keypoints_list, gt_areas_list, img_metas):
+ """Loss function for outputs from a single decoder layer of a single
+ feature level.
+
+ Args:
+ cls_scores (Tensor): Box score logits from a single decoder layer
+ for all images. Shape [bs, num_query, cls_out_channels].
+ kpt_preds (Tensor): Sigmoid outputs from a single decoder layer
+ for all images, with normalized coordinate (x_{i}, y_{i}) and
+ shape [bs, num_query, K*2].
+ gt_labels_list (list[Tensor]): Ground truth class indices for each
+ image with shape (num_gts, ).
+ gt_keypoints_list (list[Tensor]): Ground truth keypoints for each
+ image with shape (num_gts, K*3) in [p^{1}_x, p^{1}_y, p^{1}_v,
+ ..., p^{K}_x, p^{K}_y, p^{K}_v] format.
+ gt_areas_list (list[Tensor]): Ground truth mask areas for each
+ image with shape (num_gts, ).
+ img_metas (list[dict]): List of image meta information.
+
+ Returns:
+ dict[str, Tensor]: A dictionary of loss components for outputs from
+ a single decoder layer.
+ """
+ num_imgs = cls_scores.shape[0]
+ cls_scores_list = [cls_scores[i] for i in range(num_imgs)]
+ kpt_preds_list = [kpt_preds[i] for i in range(num_imgs)]
+ cls_reg_targets = self.get_targets(cls_scores_list, kpt_preds_list,
+ gt_labels_list, gt_keypoints_list,
+ gt_areas_list, img_metas)
+ (labels_list, label_weights_list, kpt_targets_list, kpt_weights_list,
+ area_targets_list, num_total_pos, num_total_neg) = cls_reg_targets
+ labels = paddle.concat(labels_list, 0)
+ label_weights = paddle.concat(label_weights_list, 0)
+ kpt_targets = paddle.concat(kpt_targets_list, 0)
+ kpt_weights = paddle.concat(kpt_weights_list, 0)
+
+ # classification loss
+ cls_scores = cls_scores.reshape((-1, self.cls_out_channels))
+ # construct weighted avg_factor to match with the official DETR repo
+ cls_avg_factor = num_total_pos * 1.0 + \
+ num_total_neg * self.bg_cls_weight
+ if self.sync_cls_avg_factor:
+ cls_avg_factor = reduce_mean(
+ paddle.to_tensor(
+ [cls_avg_factor], dtype=cls_scores.dtype))
+ cls_avg_factor = max(cls_avg_factor, 1)
+
+ cls_avg_factor = max(cls_avg_factor, 1)
+ loss_cls = self.loss_cls(
+ cls_scores, labels, label_weights, avg_factor=cls_avg_factor)
+
+ # Compute the average number of gt keypoints accross all gpus, for
+ # normalization purposes
+ # num_total_pos = loss_cls.to_tensor([num_total_pos])
+ # num_total_pos = paddle.clip(reduce_mean(num_total_pos), min=1).item()
+
+ # keypoint regression loss
+ kpt_preds = kpt_preds.reshape((-1, kpt_preds.shape[-1]))
+ num_valid_kpt = paddle.clip(
+ reduce_mean(kpt_weights.sum()), min=1).item()
+ # assert num_valid_kpt == (kpt_targets>0).sum().item()
+ loss_kpt = self.loss_kpt_rpn(
+ kpt_preds, kpt_targets, kpt_weights, avg_factor=num_valid_kpt)
+
+ return loss_cls, loss_kpt
+
+ def get_bboxes(self,
+ all_cls_scores,
+ all_kpt_preds,
+ enc_cls_scores,
+ enc_kpt_preds,
+ hm_proto,
+ memory,
+ mlvl_masks,
+ img_metas,
+ rescale=False):
+ """Transform network outputs for a batch into bbox predictions.
+
+ Args:
+ all_cls_scores (Tensor): Classification score of all
+ decoder layers, has shape
+ [nb_dec, bs, num_query, cls_out_channels].
+ all_kpt_preds (Tensor): Sigmoid regression
+ outputs of all decode layers. Each is a 4D-tensor with
+ normalized coordinate format (x_{i}, y_{i}) and shape
+ [nb_dec, bs, num_query, K*2].
+ enc_cls_scores (Tensor): Classification scores of points on
+ encode feature map, has shape (N, h*w, num_classes).
+ Only be passed when as_two_stage is True, otherwise is None.
+ enc_kpt_preds (Tensor): Regression results of each points
+ on the encode feature map, has shape (N, h*w, K*2). Only be
+ passed when as_two_stage is True, otherwise is None.
+ img_metas (list[dict]): Meta information of each image.
+ rescale (bool, optional): If True, return boxes in original
+ image space. Defalut False.
+
+ Returns:
+ list[list[Tensor, Tensor]]: Each item in result_list is 3-tuple.
+ The first item is an (n, 5) tensor, where the first 4 columns
+ are bounding box positions (tl_x, tl_y, br_x, br_y) and the
+ 5-th column is a score between 0 and 1. The second item is a
+ (n,) tensor where each item is the predicted class label of
+ the corresponding box. The third item is an (n, K, 3) tensor
+ with [p^{1}_x, p^{1}_y, p^{1}_v, ..., p^{K}_x, p^{K}_y,
+ p^{K}_v] format.
+ """
+ cls_scores = all_cls_scores[-1]
+ kpt_preds = all_kpt_preds[-1]
+
+ result_list = []
+ for img_id in range(len(img_metas)):
+ cls_score = cls_scores[img_id]
+ kpt_pred = kpt_preds[img_id]
+ img_shape = img_metas[img_id]['img_shape']
+ scale_factor = img_metas[img_id]['scale_factor']
+ # TODO: only support single image test
+ # memory_i = memory[:, img_id, :]
+ # mlvl_mask = mlvl_masks[img_id]
+ proposals = self._get_bboxes_single(cls_score, kpt_pred, img_shape,
+ scale_factor, memory,
+ mlvl_masks, rescale)
+ result_list.append(proposals)
+ return result_list
+
+ def _get_bboxes_single(self,
+ cls_score,
+ kpt_pred,
+ img_shape,
+ scale_factor,
+ memory,
+ mlvl_masks,
+ rescale=False):
+ """Transform outputs from the last decoder layer into bbox predictions
+ for each image.
+
+ Args:
+ cls_score (Tensor): Box score logits from the last decoder layer
+ for each image. Shape [num_query, cls_out_channels].
+ kpt_pred (Tensor): Sigmoid outputs from the last decoder layer
+ for each image, with coordinate format (x_{i}, y_{i}) and
+ shape [num_query, K*2].
+ img_shape (tuple[int]): Shape of input image, (height, width, 3).
+ scale_factor (ndarray, optional): Scale factor of the image arange
+ as (w_scale, h_scale, w_scale, h_scale).
+ rescale (bool, optional): If True, return boxes in original image
+ space. Default False.
+
+ Returns:
+ tuple[Tensor]: Results of detected bboxes and labels.
+
+ - det_bboxes: Predicted bboxes with shape [num_query, 5],
+ where the first 4 columns are bounding box positions
+ (tl_x, tl_y, br_x, br_y) and the 5-th column are scores
+ between 0 and 1.
+ - det_labels: Predicted labels of the corresponding box with
+ shape [num_query].
+ - det_kpts: Predicted keypoints with shape [num_query, K, 3].
+ """
+ assert len(cls_score) == len(kpt_pred)
+ max_per_img = self.test_cfg.get('max_per_img', self.num_query)
+ # exclude background
+ if self.loss_cls.use_sigmoid:
+ cls_score = F.sigmoid(cls_score)
+ scores, indexs = cls_score.reshape([-1]).topk(max_per_img)
+ det_labels = indexs % self.num_classes
+ bbox_index = indexs // self.num_classes
+ kpt_pred = kpt_pred[bbox_index]
+ else:
+ scores, det_labels = F.softmax(cls_score, axis=-1)[..., :-1].max(-1)
+ scores, bbox_index = scores.topk(max_per_img)
+ kpt_pred = kpt_pred[bbox_index]
+ det_labels = det_labels[bbox_index]
+
+ # ----- results after pose decoder -----
+ # det_kpts = kpt_pred.reshape((kpt_pred.shape[0], -1, 2))
+
+ # ----- results after joint decoder (default) -----
+ # import time
+ # start = time.time()
+ refine_targets = (kpt_pred, None, None, paddle.ones_like(kpt_pred))
+ refine_outputs = self.forward_refine(memory, mlvl_masks, refine_targets,
+ None, None)
+ # end = time.time()
+ # print(f'refine time: {end - start:.6f}')
+ det_kpts = refine_outputs[-1]
+
+ det_kpts[..., 0] = det_kpts[..., 0] * img_shape[1]
+ det_kpts[..., 1] = det_kpts[..., 1] * img_shape[0]
+ det_kpts[..., 0].clip_(min=0, max=img_shape[1])
+ det_kpts[..., 1].clip_(min=0, max=img_shape[0])
+ if rescale:
+ det_kpts /= paddle.to_tensor(
+ scale_factor[:2],
+ dtype=det_kpts.dtype).unsqueeze(0).unsqueeze(0)
+
+ # use circumscribed rectangle box of keypoints as det bboxes
+ x1 = det_kpts[..., 0].min(axis=1, keepdim=True)
+ y1 = det_kpts[..., 1].min(axis=1, keepdim=True)
+ x2 = det_kpts[..., 0].max(axis=1, keepdim=True)
+ y2 = det_kpts[..., 1].max(axis=1, keepdim=True)
+ det_bboxes = paddle.concat([x1, y1, x2, y2], axis=1)
+ det_bboxes = paddle.concat((det_bboxes, scores.unsqueeze(1)), -1)
+
+ det_kpts = paddle.concat(
+ (det_kpts, paddle.ones(
+ det_kpts[..., :1].shape, dtype=det_kpts.dtype)),
+ axis=2)
+
+ return det_bboxes, det_labels, det_kpts
+
+ def simple_test(self, feats, img_metas, rescale=False):
+ """Test det bboxes without test-time augmentation.
+
+ Args:
+ feats (tuple[paddle.Tensor]): Multi-level features from the
+ upstream network, each is a 4D-tensor.
+ img_metas (list[dict]): List of image information.
+ rescale (bool, optional): Whether to rescale the results.
+ Defaults to False.
+
+ Returns:
+ list[tuple[Tensor, Tensor, Tensor]]: Each item in result_list is
+ 3-tuple. The first item is ``bboxes`` with shape (n, 5),
+ where 5 represent (tl_x, tl_y, br_x, br_y, score).
+ The shape of the second tensor in the tuple is ``labels``
+ with shape (n,). The third item is ``kpts`` with shape
+ (n, K, 3), in [p^{1}_x, p^{1}_y, p^{1}_v, p^{K}_x, p^{K}_y,
+ p^{K}_v] format.
+ """
+ # forward of this head requires img_metas
+ outs = self.forward(feats, img_metas)
+ results_list = self.get_bboxes(*outs, img_metas, rescale=rescale)
+ return results_list
+
+ def get_loss(self, boxes, scores, gt_bbox, gt_class, prior_boxes):
+ return self.loss(boxes, scores, gt_bbox, gt_class, prior_boxes)
diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py
index 388be3cc9b9..16368e81e62 100644
--- a/ppdet/modeling/layers.py
+++ b/ppdet/modeling/layers.py
@@ -1135,7 +1135,7 @@ def _convert_attention_mask(attn_mask, dtype):
"""
return nn.layer.transformer._convert_attention_mask(attn_mask, dtype)
-
+@register
class MultiHeadAttention(nn.Layer):
"""
Attention mapps queries and a set of key-value pairs to outputs, and
diff --git a/ppdet/modeling/losses/focal_loss.py b/ppdet/modeling/losses/focal_loss.py
index 083e1dd3dbd..b9a64e1bc22 100644
--- a/ppdet/modeling/losses/focal_loss.py
+++ b/ppdet/modeling/losses/focal_loss.py
@@ -21,7 +21,7 @@
import paddle.nn as nn
from ppdet.core.workspace import register
-__all__ = ['FocalLoss']
+__all__ = ['FocalLoss', 'Weighted_FocalLoss']
@register
class FocalLoss(nn.Layer):
@@ -59,3 +59,80 @@ def forward(self, pred, target, reduction='none'):
pred, target, alpha=self.alpha, gamma=self.gamma,
reduction=reduction)
return loss * self.loss_weight
+
+
+@register
+class Weighted_FocalLoss(FocalLoss):
+ """A wrapper around paddle.nn.functional.sigmoid_focal_loss.
+ Args:
+ use_sigmoid (bool): currently only support use_sigmoid=True
+ alpha (float): parameter alpha in Focal Loss
+ gamma (float): parameter gamma in Focal Loss
+ loss_weight (float): final loss will be multiplied by this
+ """
+ def __init__(self,
+ use_sigmoid=True,
+ alpha=0.25,
+ gamma=2.0,
+ loss_weight=1.0,
+ reduction="mean"):
+ super(FocalLoss, self).__init__()
+ assert use_sigmoid == True, \
+ 'Focal Loss only supports sigmoid at the moment'
+ self.use_sigmoid = use_sigmoid
+ self.alpha = alpha
+ self.gamma = gamma
+ self.loss_weight = loss_weight
+ self.reduction = reduction
+
+ def forward(self, pred, target, weight=None, avg_factor=None, reduction_override=None):
+ """forward function.
+ Args:
+ pred (Tensor): logits of class prediction, of shape (N, num_classes)
+ target (Tensor): target class label, of shape (N, )
+ reduction (str): the way to reduce loss, one of (none, sum, mean)
+ """
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ num_classes = pred.shape[1]
+ target = F.one_hot(target, num_classes + 1).astype(pred.dtype)
+ target = target[:, :-1].detach()
+ loss = F.sigmoid_focal_loss(
+ pred, target, alpha=self.alpha, gamma=self.gamma,
+ reduction='none')
+
+ if weight is not None:
+ if weight.shape != loss.shape:
+ if weight.shape[0] == loss.shape[0]:
+ # For most cases, weight is of shape (num_priors, ),
+ # which means it does not have the second axis num_class
+ weight = weight.reshape((-1, 1))
+ else:
+ # Sometimes, weight per anchor per class is also needed. e.g.
+ # in FSAF. But it may be flattened of shape
+ # (num_priors x num_class, ), while loss is still of shape
+ # (num_priors, num_class).
+ assert weight.numel() == loss.numel()
+ weight = weight.reshape((loss.shape[0], -1))
+ assert weight.ndim == loss.ndim
+ loss = loss * weight
+
+ # if avg_factor is not specified, just reduce the loss
+ if avg_factor is None:
+ if reduction == 'mean':
+ loss = loss.mean()
+ elif reduction == 'sum':
+ loss = loss.sum()
+ else:
+ # if reduction is mean, then average the loss by avg_factor
+ if reduction == 'mean':
+ # Avoid causing ZeroDivisionError when avg_factor is 0.0,
+ # i.e., all labels of an image belong to ignore index.
+ eps = 1e-10
+ loss = loss.sum() / (avg_factor + eps)
+ # if reduction is 'none', then do nothing, otherwise raise an error
+ elif reduction != 'none':
+ raise ValueError('avg_factor can not be used with reduction="sum"')
+
+ return loss * self.loss_weight
diff --git a/ppdet/modeling/losses/keypoint_loss.py b/ppdet/modeling/losses/keypoint_loss.py
index 9c3c113db36..37a24102a85 100644
--- a/ppdet/modeling/losses/keypoint_loss.py
+++ b/ppdet/modeling/losses/keypoint_loss.py
@@ -18,12 +18,13 @@
from itertools import cycle, islice
from collections import abc
+import numpy as np
import paddle
import paddle.nn as nn
from ppdet.core.workspace import register, serializable
-__all__ = ['HrHRNetLoss', 'KeyPointMSELoss']
+__all__ = ['HrHRNetLoss', 'KeyPointMSELoss', 'OKSLoss', 'CenterFocalLoss', 'L1Loss']
@register
@@ -226,3 +227,406 @@ def recursive_sum(inputs):
if isinstance(inputs, abc.Sequence):
return sum([recursive_sum(x) for x in inputs])
return inputs
+
+
+def oks_overlaps(kpt_preds, kpt_gts, kpt_valids, kpt_areas, sigmas):
+ if not kpt_gts.astype('bool').any():
+ return kpt_preds.sum()*0
+
+ sigmas = paddle.to_tensor(sigmas, dtype=kpt_preds.dtype)
+ variances = (sigmas * 2)**2
+
+ assert kpt_preds.shape[0] == kpt_gts.shape[0]
+ kpt_preds = kpt_preds.reshape((-1, kpt_preds.shape[-1] // 2, 2))
+ kpt_gts = kpt_gts.reshape((-1, kpt_gts.shape[-1] // 2, 2))
+
+ squared_distance = (kpt_preds[:, :, 0] - kpt_gts[:, :, 0]) ** 2 + \
+ (kpt_preds[:, :, 1] - kpt_gts[:, :, 1]) ** 2
+ assert (kpt_valids.sum(-1) > 0).all()
+ squared_distance0 = squared_distance / (
+ kpt_areas[:, None] * variances[None, :] * 2)
+ squared_distance1 = paddle.exp(-squared_distance0)
+ squared_distance1 = squared_distance1 * kpt_valids
+ oks = squared_distance1.sum(axis=1) / kpt_valids.sum(axis=1)
+
+ return oks
+
+
+def oks_loss(pred,
+ target,
+ weight,
+ valid=None,
+ area=None,
+ linear=False,
+ sigmas=None,
+ eps=1e-6,
+ avg_factor=None,
+ reduction=None):
+ """Oks loss.
+
+ Computing the oks loss between a set of predicted poses and target poses.
+ The loss is calculated as negative log of oks.
+
+ Args:
+ pred (Tensor): Predicted poses of format (x1, y1, x2, y2, ...),
+ shape (n, K*2).
+ target (Tensor): Corresponding gt poses, shape (n, K*2).
+ linear (bool, optional): If True, use linear scale of loss instead of
+ log scale. Default: False.
+ eps (float): Eps to avoid log(0).
+
+ Returns:
+ Tensor: Loss tensor.
+ """
+ oks = oks_overlaps(pred, target, valid, area, sigmas).clip(min=eps)
+ if linear:
+ loss = 1 - oks
+ else:
+ loss = -oks.log()
+
+ if weight is not None:
+ if weight.shape != loss.shape:
+ if weight.shape[0] == loss.shape[0]:
+ # For most cases, weight is of shape (num_priors, ),
+ # which means it does not have the second axis num_class
+ weight = weight.reshape((-1, 1))
+ else:
+ # Sometimes, weight per anchor per class is also needed. e.g.
+ # in FSAF. But it may be flattened of shape
+ # (num_priors x num_class, ), while loss is still of shape
+ # (num_priors, num_class).
+ assert weight.numel() == loss.numel()
+ weight = weight.reshape((loss.shape[0], -1))
+ assert weight.ndim == loss.ndim
+ loss = loss * weight
+
+ # if avg_factor is not specified, just reduce the loss
+ if avg_factor is None:
+ if reduction == 'mean':
+ loss = loss.mean()
+ elif reduction == 'sum':
+ loss = loss.sum()
+ else:
+ # if reduction is mean, then average the loss by avg_factor
+ if reduction == 'mean':
+ # Avoid causing ZeroDivisionError when avg_factor is 0.0,
+ # i.e., all labels of an image belong to ignore index.
+ eps = 1e-10
+ loss = loss.sum() / (avg_factor + eps)
+ # if reduction is 'none', then do nothing, otherwise raise an error
+ elif reduction != 'none':
+ raise ValueError('avg_factor can not be used with reduction="sum"')
+
+
+ return loss
+
+@register
+@serializable
+class OKSLoss(nn.Layer):
+ """OKSLoss.
+
+ Computing the oks loss between a set of predicted poses and target poses.
+
+ Args:
+ linear (bool): If True, use linear scale of loss instead of log scale.
+ Default: False.
+ eps (float): Eps to avoid log(0).
+ reduction (str): Options are "none", "mean" and "sum".
+ loss_weight (float): Weight of loss.
+ """
+
+ def __init__(self,
+ linear=False,
+ num_keypoints=17,
+ eps=1e-6,
+ reduction='mean',
+ loss_weight=1.0):
+ super(OKSLoss, self).__init__()
+ self.linear = linear
+ self.eps = eps
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+ if num_keypoints == 17:
+ self.sigmas = np.array([
+ .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
+ 1.07, .87, .87, .89, .89
+ ], dtype=np.float32) / 10.0
+ elif num_keypoints == 14:
+ self.sigmas = np.array([
+ .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89,
+ .79, .79
+ ]) / 10.0
+ else:
+ raise ValueError(f'Unsupported keypoints number {num_keypoints}')
+
+ def forward(self,
+ pred,
+ target,
+ valid,
+ area,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None,
+ **kwargs):
+ """Forward function.
+
+ Args:
+ pred (Tensor): The prediction.
+ target (Tensor): The learning target of the prediction.
+ valid (Tensor): The visible flag of the target pose.
+ area (Tensor): The area of the target pose.
+ weight (Tensor, optional): The weight of loss for each
+ prediction. Defaults to None.
+ avg_factor (int, optional): Average factor that is used to average
+ the loss. Defaults to None.
+ reduction_override (str, optional): The reduction method used to
+ override the original reduction method of the loss.
+ Defaults to None. Options are "none", "mean" and "sum".
+ """
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ if (weight is not None) and (not paddle.any(weight > 0)) and (
+ reduction != 'none'):
+ if pred.dim() == weight.dim() + 1:
+ weight = weight.unsqueeze(1)
+ return (pred * weight).sum() # 0
+ if weight is not None and weight.dim() > 1:
+ # TODO: remove this in the future
+ # reduce the weight of shape (n, 4) to (n,) to match the
+ # iou_loss of shape (n,)
+ assert weight.shape == pred.shape
+ weight = weight.mean(-1)
+ loss = self.loss_weight * oks_loss(
+ pred,
+ target,
+ weight,
+ valid=valid,
+ area=area,
+ linear=self.linear,
+ sigmas=self.sigmas,
+ eps=self.eps,
+ reduction=reduction,
+ avg_factor=avg_factor,
+ **kwargs)
+ return loss
+
+
+def center_focal_loss(pred, gt, weight=None, mask=None, avg_factor=None, reduction=None):
+ """Modified focal loss. Exactly the same as CornerNet.
+ Runs faster and costs a little bit more memory.
+
+ Args:
+ pred (Tensor): The prediction with shape [bs, c, h, w].
+ gt (Tensor): The learning target of the prediction in gaussian
+ distribution, with shape [bs, c, h, w].
+ mask (Tensor): The valid mask. Defaults to None.
+ """
+ if not gt.astype('bool').any():
+ return pred.sum()*0
+ pos_inds = gt.equal(1).astype('float32')
+ if mask is None:
+ neg_inds = gt.less_than(paddle.to_tensor([1], dtype='float32')).astype('float32')
+ else:
+ neg_inds = gt.less_than(paddle.to_tensor([1], dtype='float32')).astype('float32') * mask.equal(0).astype('float32')
+
+ neg_weights = paddle.pow(1 - gt, 4)
+
+ loss = 0
+
+ pos_loss = paddle.log(pred) * paddle.pow(1 - pred, 2) * pos_inds
+ neg_loss = paddle.log(1 - pred) * paddle.pow(pred, 2) * neg_weights * \
+ neg_inds
+
+ num_pos = pos_inds.astype('float32').sum()
+ pos_loss = pos_loss.sum()
+ neg_loss = neg_loss.sum()
+
+ if num_pos == 0:
+ loss = loss - neg_loss
+ else:
+ loss = loss - (pos_loss + neg_loss) / num_pos
+
+ if weight is not None:
+ if weight.shape != loss.shape:
+ if weight.shape[0] == loss.shape[0]:
+ # For most cases, weight is of shape (num_priors, ),
+ # which means it does not have the second axis num_class
+ weight = weight.reshape((-1, 1))
+ else:
+ # Sometimes, weight per anchor per class is also needed. e.g.
+ # in FSAF. But it may be flattened of shape
+ # (num_priors x num_class, ), while loss is still of shape
+ # (num_priors, num_class).
+ assert weight.numel() == loss.numel()
+ weight = weight.reshape((loss.shape[0], -1))
+ assert weight.ndim == loss.ndim
+ loss = loss * weight
+
+ # if avg_factor is not specified, just reduce the loss
+ if avg_factor is None:
+ if reduction == 'mean':
+ loss = loss.mean()
+ elif reduction == 'sum':
+ loss = loss.sum()
+ else:
+ # if reduction is mean, then average the loss by avg_factor
+ if reduction == 'mean':
+ # Avoid causing ZeroDivisionError when avg_factor is 0.0,
+ # i.e., all labels of an image belong to ignore index.
+ eps = 1e-10
+ loss = loss.sum() / (avg_factor + eps)
+ # if reduction is 'none', then do nothing, otherwise raise an error
+ elif reduction != 'none':
+ raise ValueError('avg_factor can not be used with reduction="sum"')
+
+ return loss
+
+@register
+@serializable
+class CenterFocalLoss(nn.Layer):
+ """CenterFocalLoss is a variant of focal loss.
+
+ More details can be found in the `paper
+ `_
+
+ Args:
+ reduction (str): Options are "none", "mean" and "sum".
+ loss_weight (float): Loss weight of current loss.
+ """
+
+ def __init__(self,
+ reduction='none',
+ loss_weight=1.0):
+ super(CenterFocalLoss, self).__init__()
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ mask=None,
+ avg_factor=None,
+ reduction_override=None):
+ """Forward function.
+
+ Args:
+ pred (Tensor): The prediction.
+ target (Tensor): The learning target of the prediction in gaussian
+ distribution.
+ weight (Tensor, optional): The weight of loss for each
+ prediction. Defaults to None.
+ mask (Tensor): The valid mask. Defaults to None.
+ avg_factor (int, optional): Average factor that is used to average
+ the loss. Defaults to None.
+ reduction_override (str, optional): The reduction method used to
+ override the original reduction method of the loss.
+ Defaults to None.
+ """
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss_reg = self.loss_weight * center_focal_loss(
+ pred,
+ target,
+ weight,
+ mask=mask,
+ reduction=reduction,
+ avg_factor=avg_factor)
+ return loss_reg
+
+def l1_loss(pred, target, weight=None, reduction='mean', avg_factor=None):
+ """L1 loss.
+
+ Args:
+ pred (Tensor): The prediction.
+ target (Tensor): The learning target of the prediction.
+
+ Returns:
+ Tensor: Calculated loss
+ """
+ if not target.astype('bool').any():
+ return pred.sum() * 0
+
+ assert pred.shape == target.shape
+ loss = paddle.abs(pred - target)
+
+ if weight is not None:
+ if weight.shape != loss.shape:
+ if weight.shape[0] == loss.shape[0]:
+ # For most cases, weight is of shape (num_priors, ),
+ # which means it does not have the second axis num_class
+ weight = weight.reshape((-1, 1))
+ else:
+ # Sometimes, weight per anchor per class is also needed. e.g.
+ # in FSAF. But it may be flattened of shape
+ # (num_priors x num_class, ), while loss is still of shape
+ # (num_priors, num_class).
+ assert weight.numel() == loss.numel()
+ weight = weight.reshape((loss.shape[0], -1))
+ assert weight.ndim == loss.ndim
+ loss = loss * weight
+
+ # if avg_factor is not specified, just reduce the loss
+ if avg_factor is None:
+ if reduction == 'mean':
+ loss = loss.mean()
+ elif reduction == 'sum':
+ loss = loss.sum()
+ else:
+ # if reduction is mean, then average the loss by avg_factor
+ if reduction == 'mean':
+ # Avoid causing ZeroDivisionError when avg_factor is 0.0,
+ # i.e., all labels of an image belong to ignore index.
+ eps = 1e-10
+ loss = loss.sum() / (avg_factor + eps)
+ # if reduction is 'none', then do nothing, otherwise raise an error
+ elif reduction != 'none':
+ raise ValueError('avg_factor can not be used with reduction="sum"')
+
+
+ return loss
+
+@register
+@serializable
+class L1Loss(nn.Layer):
+ """L1 loss.
+
+ Args:
+ reduction (str, optional): The method to reduce the loss.
+ Options are "none", "mean" and "sum".
+ loss_weight (float, optional): The weight of loss.
+ """
+
+ def __init__(self, reduction='mean', loss_weight=1.0):
+ super(L1Loss, self).__init__()
+ self.reduction = reduction
+ self.loss_weight = loss_weight
+
+ def forward(self,
+ pred,
+ target,
+ weight=None,
+ avg_factor=None,
+ reduction_override=None):
+ """Forward function.
+
+ Args:
+ pred (Tensor): The prediction.
+ target (Tensor): The learning target of the prediction.
+ weight (Tensor, optional): The weight of loss for each
+ prediction. Defaults to None.
+ avg_factor (int, optional): Average factor that is used to average
+ the loss. Defaults to None.
+ reduction_override (str, optional): The reduction method used to
+ override the original reduction method of the loss.
+ Defaults to None.
+ """
+ assert reduction_override in (None, 'none', 'mean', 'sum')
+ reduction = (
+ reduction_override if reduction_override else self.reduction)
+ loss_bbox = self.loss_weight * l1_loss(
+ pred, target, weight, reduction=reduction, avg_factor=avg_factor)
+ return loss_bbox
+
diff --git a/ppdet/modeling/necks/__init__.py b/ppdet/modeling/necks/__init__.py
index 51d367b27d4..478efec98e3 100644
--- a/ppdet/modeling/necks/__init__.py
+++ b/ppdet/modeling/necks/__init__.py
@@ -36,3 +36,4 @@
from .lc_pan import *
from .custom_pan import *
from .dilated_encoder import *
+from .channel_mapper import *
diff --git a/ppdet/modeling/necks/channel_mapper.py b/ppdet/modeling/necks/channel_mapper.py
new file mode 100644
index 00000000000..6eff3f85476
--- /dev/null
+++ b/ppdet/modeling/necks/channel_mapper.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+this code is base on mmdet: git@github.com:open-mmlab/mmdetection.git
+"""
+import paddle.nn as nn
+
+from ppdet.core.workspace import register, serializable
+from ..backbones.hrnet import ConvNormLayer
+from ..shape_spec import ShapeSpec
+from ..initializer import xavier_uniform_, constant_
+
+__all__ = ['ChannelMapper']
+
+
+@register
+@serializable
+class ChannelMapper(nn.Layer):
+ """Channel Mapper to reduce/increase channels of backbone features.
+
+ This is used to reduce/increase channels of backbone features.
+
+ Args:
+ in_channels (List[int]): Number of input channels per scale.
+ out_channels (int): Number of output channels (used at each scale).
+ kernel_size (int, optional): kernel_size for reducing channels (used
+ at each scale). Default: 3.
+ conv_cfg (dict, optional): Config dict for convolution layer.
+ Default: None.
+ norm_cfg (dict, optional): Config dict for normalization layer.
+ Default: None.
+ act_cfg (dict, optional): Config dict for activation layer in
+ ConvModule. Default: dict(type='ReLU').
+ num_outs (int, optional): Number of output feature maps. There
+ would be extra_convs when num_outs larger than the length
+ of in_channels.
+ init_cfg (dict or list[dict], optional): Initialization config dict.
+
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ norm_type="gn",
+ norm_groups=32,
+ act='relu',
+ num_outs=None,
+ init_cfg=dict(
+ type='Xavier', layer='Conv2d', distribution='uniform')):
+ super(ChannelMapper, self).__init__()
+ assert isinstance(in_channels, list)
+ self.extra_convs = None
+ if num_outs is None:
+ num_outs = len(in_channels)
+ self.convs = nn.LayerList()
+ for in_channel in in_channels:
+ self.convs.append(
+ ConvNormLayer(
+ ch_in=in_channel,
+ ch_out=out_channels,
+ filter_size=kernel_size,
+ norm_type='gn',
+ norm_groups=32,
+ act=act))
+
+ if num_outs > len(in_channels):
+ self.extra_convs = nn.LayerList()
+ for i in range(len(in_channels), num_outs):
+ if i == len(in_channels):
+ in_channel = in_channels[-1]
+ else:
+ in_channel = out_channels
+ self.extra_convs.append(
+ ConvNormLayer(
+ ch_in=in_channel,
+ ch_out=out_channels,
+ filter_size=3,
+ stride=2,
+ norm_type='gn',
+ norm_groups=32,
+ act=act))
+ self.init_weights()
+
+ def forward(self, inputs):
+ """Forward function."""
+ assert len(inputs) == len(self.convs)
+ outs = [self.convs[i](inputs[i]) for i in range(len(inputs))]
+ if self.extra_convs:
+ for i in range(len(self.extra_convs)):
+ if i == 0:
+ outs.append(self.extra_convs[0](inputs[-1]))
+ else:
+ outs.append(self.extra_convs[i](outs[-1]))
+ return tuple(outs)
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=self.out_channel, stride=1. / s)
+ for s in self.spatial_scales
+ ]
+
+ def init_weights(self):
+ """Initialize the transformer weights."""
+ for p in self.parameters():
+ if p.rank() > 1:
+ xavier_uniform_(p)
+ if hasattr(p, 'bias') and p.bias is not None:
+ constant_(p.bais)
diff --git a/ppdet/modeling/transformers/__init__.py b/ppdet/modeling/transformers/__init__.py
index 9be26fc3463..e55cb0c1de9 100644
--- a/ppdet/modeling/transformers/__init__.py
+++ b/ppdet/modeling/transformers/__init__.py
@@ -25,3 +25,4 @@
from .position_encoding import *
from .deformable_transformer import *
from .dino_transformer import *
+from .petr_transformer import *
diff --git a/ppdet/modeling/transformers/petr_transformer.py b/ppdet/modeling/transformers/petr_transformer.py
new file mode 100644
index 00000000000..7859b0df028
--- /dev/null
+++ b/ppdet/modeling/transformers/petr_transformer.py
@@ -0,0 +1,1198 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+this code is base on https://github.com/hikvision-research/opera/blob/main/opera/models/utils/transformer.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+from ppdet.core.workspace import register
+from ..layers import MultiHeadAttention, _convert_attention_mask
+from .utils import _get_clones
+from ..initializer import linear_init_, normal_, constant_, xavier_uniform_
+
+__all__ = [
+ 'PETRTransformer', 'MultiScaleDeformablePoseAttention',
+ 'PETR_TransformerDecoderLayer', 'PETR_TransformerDecoder',
+ 'PETR_DeformableDetrTransformerDecoder',
+ 'PETR_DeformableTransformerDecoder', 'TransformerEncoderLayer',
+ 'TransformerEncoder', 'MSDeformableAttention'
+]
+
+
+def masked_fill(x, mask, value):
+ y = paddle.full(x.shape, value, x.dtype)
+ return paddle.where(mask, y, x)
+
+
+def inverse_sigmoid(x, eps=1e-5):
+ """Inverse function of sigmoid.
+
+ Args:
+ x (Tensor): The tensor to do the
+ inverse.
+ eps (float): EPS avoid numerical
+ overflow. Defaults 1e-5.
+ Returns:
+ Tensor: The x has passed the inverse
+ function of sigmoid, has same
+ shape with input.
+ """
+ x = x.clip(min=0, max=1)
+ x1 = x.clip(min=eps)
+ x2 = (1 - x).clip(min=eps)
+ return paddle.log(x1 / x2)
+
+
+@register
+class TransformerEncoderLayer(nn.Layer):
+ __inject__ = ['attn']
+
+ def __init__(self,
+ d_model,
+ attn=None,
+ nhead=8,
+ dim_feedforward=2048,
+ dropout=0.1,
+ activation="relu",
+ attn_dropout=None,
+ act_dropout=None,
+ normalize_before=False):
+ super(TransformerEncoderLayer, self).__init__()
+ attn_dropout = dropout if attn_dropout is None else attn_dropout
+ act_dropout = dropout if act_dropout is None else act_dropout
+ self.normalize_before = normalize_before
+ self.embed_dims = d_model
+
+ if attn is None:
+ self.self_attn = MultiHeadAttention(d_model, nhead, attn_dropout)
+ else:
+ self.self_attn = attn
+ # Implementation of Feedforward model
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
+ self.dropout = nn.Dropout(act_dropout, mode="upscale_in_train")
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+ self.norm1 = nn.LayerNorm(d_model)
+ self.norm2 = nn.LayerNorm(d_model)
+ self.dropout1 = nn.Dropout(dropout, mode="upscale_in_train")
+ self.dropout2 = nn.Dropout(dropout, mode="upscale_in_train")
+ self.activation = getattr(F, activation)
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ linear_init_(self.linear1)
+ linear_init_(self.linear2)
+
+ @staticmethod
+ def with_pos_embed(tensor, pos_embed):
+ return tensor if pos_embed is None else tensor + pos_embed
+
+ def forward(self, src, src_mask=None, pos_embed=None, **kwargs):
+ residual = src
+ if self.normalize_before:
+ src = self.norm1(src)
+ q = k = self.with_pos_embed(src, pos_embed)
+ src = self.self_attn(q, k, value=src, attn_mask=src_mask, **kwargs)
+
+ src = residual + self.dropout1(src)
+ if not self.normalize_before:
+ src = self.norm1(src)
+
+ residual = src
+ if self.normalize_before:
+ src = self.norm2(src)
+ src = self.linear2(self.dropout(self.activation(self.linear1(src))))
+ src = residual + self.dropout2(src)
+ if not self.normalize_before:
+ src = self.norm2(src)
+ return src
+
+
+@register
+class TransformerEncoder(nn.Layer):
+ __inject__ = ['encoder_layer']
+
+ def __init__(self, encoder_layer, num_layers, norm=None):
+ super(TransformerEncoder, self).__init__()
+ self.layers = _get_clones(encoder_layer, num_layers)
+ self.num_layers = num_layers
+ self.norm = norm
+ self.embed_dims = encoder_layer.embed_dims
+
+ def forward(self, src, src_mask=None, pos_embed=None, **kwargs):
+ output = src
+ for layer in self.layers:
+ output = layer(
+ output, src_mask=src_mask, pos_embed=pos_embed, **kwargs)
+
+ if self.norm is not None:
+ output = self.norm(output)
+
+ return output
+
+
+@register
+class MSDeformableAttention(nn.Layer):
+ def __init__(self,
+ embed_dim=256,
+ num_heads=8,
+ num_levels=4,
+ num_points=4,
+ lr_mult=0.1):
+ """
+ Multi-Scale Deformable Attention Module
+ """
+ super(MSDeformableAttention, self).__init__()
+ self.embed_dim = embed_dim
+ self.num_heads = num_heads
+ self.num_levels = num_levels
+ self.num_points = num_points
+ self.total_points = num_heads * num_levels * num_points
+
+ self.head_dim = embed_dim // num_heads
+ assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+
+ self.sampling_offsets = nn.Linear(
+ embed_dim,
+ self.total_points * 2,
+ weight_attr=ParamAttr(learning_rate=lr_mult),
+ bias_attr=ParamAttr(learning_rate=lr_mult))
+
+ self.attention_weights = nn.Linear(embed_dim, self.total_points)
+ self.value_proj = nn.Linear(embed_dim, embed_dim)
+ self.output_proj = nn.Linear(embed_dim, embed_dim)
+ try:
+ # use cuda op
+ print("use deformable_detr_ops in ms_deformable_attn")
+ from deformable_detr_ops import ms_deformable_attn
+ except:
+ # use paddle func
+ from .utils import deformable_attention_core_func as ms_deformable_attn
+ self.ms_deformable_attn_core = ms_deformable_attn
+
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ # sampling_offsets
+ constant_(self.sampling_offsets.weight)
+ thetas = paddle.arange(
+ self.num_heads,
+ dtype=paddle.float32) * (2.0 * math.pi / self.num_heads)
+ grid_init = paddle.stack([thetas.cos(), thetas.sin()], -1)
+ grid_init = grid_init / grid_init.abs().max(-1, keepdim=True)
+ grid_init = grid_init.reshape([self.num_heads, 1, 1, 2]).tile(
+ [1, self.num_levels, self.num_points, 1])
+ scaling = paddle.arange(
+ 1, self.num_points + 1,
+ dtype=paddle.float32).reshape([1, 1, -1, 1])
+ grid_init *= scaling
+ self.sampling_offsets.bias.set_value(grid_init.flatten())
+ # attention_weights
+ constant_(self.attention_weights.weight)
+ constant_(self.attention_weights.bias)
+ # proj
+ xavier_uniform_(self.value_proj.weight)
+ constant_(self.value_proj.bias)
+ xavier_uniform_(self.output_proj.weight)
+ constant_(self.output_proj.bias)
+
+ def forward(self,
+ query,
+ key,
+ value,
+ reference_points,
+ value_spatial_shapes,
+ value_level_start_index,
+ attn_mask=None,
+ **kwargs):
+ """
+ Args:
+ query (Tensor): [bs, query_length, C]
+ reference_points (Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
+ bottom-right (1, 1), including padding area
+ value (Tensor): [bs, value_length, C]
+ value_spatial_shapes (Tensor): [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
+ value_level_start_index (Tensor(int64)): [n_levels], [0, H_0*W_0, H_0*W_0+H_1*W_1, ...]
+ attn_mask (Tensor): [bs, value_length], True for non-padding elements, False for padding elements
+
+ Returns:
+ output (Tensor): [bs, Length_{query}, C]
+ """
+ bs, Len_q = query.shape[:2]
+ Len_v = value.shape[1]
+ assert int(value_spatial_shapes.prod(1).sum()) == Len_v
+
+ value = self.value_proj(value)
+ if attn_mask is not None:
+ attn_mask = attn_mask.astype(value.dtype).unsqueeze(-1)
+ value *= attn_mask
+ value = value.reshape([bs, Len_v, self.num_heads, self.head_dim])
+
+ sampling_offsets = self.sampling_offsets(query).reshape(
+ [bs, Len_q, self.num_heads, self.num_levels, self.num_points, 2])
+ attention_weights = self.attention_weights(query).reshape(
+ [bs, Len_q, self.num_heads, self.num_levels * self.num_points])
+ attention_weights = F.softmax(attention_weights).reshape(
+ [bs, Len_q, self.num_heads, self.num_levels, self.num_points])
+
+ if reference_points.shape[-1] == 2:
+ offset_normalizer = value_spatial_shapes.flip([1]).reshape(
+ [1, 1, 1, self.num_levels, 1, 2])
+ sampling_locations = reference_points.reshape([
+ bs, Len_q, 1, self.num_levels, 1, 2
+ ]) + sampling_offsets / offset_normalizer
+ elif reference_points.shape[-1] == 4:
+ sampling_locations = (
+ reference_points[:, :, None, :, None, :2] + sampling_offsets /
+ self.num_points * reference_points[:, :, None, :, None, 2:] *
+ 0.5)
+ else:
+ raise ValueError(
+ "Last dim of reference_points must be 2 or 4, but get {} instead.".
+ format(reference_points.shape[-1]))
+
+ output = self.ms_deformable_attn_core(
+ value, value_spatial_shapes, value_level_start_index,
+ sampling_locations, attention_weights)
+ output = self.output_proj(output)
+
+ return output
+
+
+@register
+class MultiScaleDeformablePoseAttention(nn.Layer):
+ """An attention module used in PETR. `End-to-End Multi-Person
+ Pose Estimation with Transformers`.
+
+ Args:
+ embed_dims (int): The embedding dimension of Attention.
+ Default: 256.
+ num_heads (int): Parallel attention heads. Default: 8.
+ num_levels (int): The number of feature map used in
+ Attention. Default: 4.
+ num_points (int): The number of sampling points for
+ each query in each head. Default: 17.
+ im2col_step (int): The step used in image_to_column.
+ Default: 64.
+ dropout (float): A Dropout layer on `inp_residual`.
+ Default: 0.1.
+ init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+ Default: None.
+ """
+
+ def __init__(self,
+ embed_dims=256,
+ num_heads=8,
+ num_levels=4,
+ num_points=17,
+ im2col_step=64,
+ dropout=0.1,
+ norm_cfg=None,
+ init_cfg=None,
+ batch_first=False,
+ lr_mult=0.1):
+ super().__init__()
+ if embed_dims % num_heads != 0:
+ raise ValueError(f'embed_dims must be divisible by num_heads, '
+ f'but got {embed_dims} and {num_heads}')
+ dim_per_head = embed_dims // num_heads
+ self.norm_cfg = norm_cfg
+ self.init_cfg = init_cfg
+ self.dropout = nn.Dropout(dropout)
+ self.batch_first = batch_first
+
+ # you'd better set dim_per_head to a power of 2
+ # which is more efficient in the CUDA implementation
+ def _is_power_of_2(n):
+ if (not isinstance(n, int)) or (n < 0):
+ raise ValueError(
+ 'invalid input for _is_power_of_2: {} (type: {})'.format(
+ n, type(n)))
+ return (n & (n - 1) == 0) and n != 0
+
+ if not _is_power_of_2(dim_per_head):
+ warnings.warn("You'd better set embed_dims in "
+ 'MultiScaleDeformAttention to make '
+ 'the dimension of each attention head a power of 2 '
+ 'which is more efficient in our CUDA implementation.')
+
+ self.im2col_step = im2col_step
+ self.embed_dims = embed_dims
+ self.num_levels = num_levels
+ self.num_heads = num_heads
+ self.num_points = num_points
+ self.sampling_offsets = nn.Linear(
+ embed_dims,
+ num_heads * num_levels * num_points * 2,
+ weight_attr=ParamAttr(learning_rate=lr_mult),
+ bias_attr=ParamAttr(learning_rate=lr_mult))
+ self.attention_weights = nn.Linear(embed_dims,
+ num_heads * num_levels * num_points)
+ self.value_proj = nn.Linear(embed_dims, embed_dims)
+ self.output_proj = nn.Linear(embed_dims, embed_dims)
+
+ try:
+ # use cuda op
+ from deformable_detr_ops import ms_deformable_attn
+ except:
+ # use paddle func
+ from .utils import deformable_attention_core_func as ms_deformable_attn
+ self.ms_deformable_attn_core = ms_deformable_attn
+
+ self.init_weights()
+
+ def init_weights(self):
+ """Default initialization for Parameters of Module."""
+ constant_(self.sampling_offsets.weight)
+ constant_(self.sampling_offsets.bias)
+ constant_(self.attention_weights.weight)
+ constant_(self.attention_weights.bias)
+ xavier_uniform_(self.value_proj.weight)
+ constant_(self.value_proj.bias)
+ xavier_uniform_(self.output_proj.weight)
+ constant_(self.output_proj.bias)
+
+ def forward(self,
+ query,
+ key,
+ value,
+ residual=None,
+ attn_mask=None,
+ reference_points=None,
+ value_spatial_shapes=None,
+ value_level_start_index=None,
+ **kwargs):
+ """Forward Function of MultiScaleDeformAttention.
+
+ Args:
+ query (Tensor): Query of Transformer with shape
+ (num_query, bs, embed_dims).
+ key (Tensor): The key tensor with shape (num_key, bs, embed_dims).
+ value (Tensor): The value tensor with shape
+ (num_key, bs, embed_dims).
+ residual (Tensor): The tensor used for addition, with the
+ same shape as `x`. Default None. If None, `x` will be used.
+ reference_points (Tensor): The normalized reference points with
+ shape (bs, num_query, num_levels, K*2), all elements is range
+ in [0, 1], top-left (0,0), bottom-right (1, 1), including
+ padding area.
+ attn_mask (Tensor): ByteTensor for `query`, with
+ shape [bs, num_key].
+ value_spatial_shapes (Tensor): Spatial shape of features in
+ different level. With shape (num_levels, 2),
+ last dimension represent (h, w).
+ value_level_start_index (Tensor): The start index of each level.
+ A tensor has shape (num_levels) and can be represented
+ as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
+
+ Returns:
+ Tensor: forwarded results with shape [num_query, bs, embed_dims].
+ """
+
+ if key is None:
+ key = query
+ if value is None:
+ value = key
+
+ bs, num_query, _ = query.shape
+ bs, num_key, _ = value.shape
+ assert (value_spatial_shapes[:, 0].numpy() *
+ value_spatial_shapes[:, 1].numpy()).sum() == num_key
+
+ value = self.value_proj(value)
+ if attn_mask is not None:
+ # value = value.masked_fill(attn_mask[..., None], 0.0)
+ value *= attn_mask.unsqueeze(-1)
+ value = value.reshape([bs, num_key, self.num_heads, -1])
+ sampling_offsets = self.sampling_offsets(query).reshape([
+ bs, num_query, self.num_heads, self.num_levels, self.num_points, 2
+ ])
+ attention_weights = self.attention_weights(query).reshape(
+ [bs, num_query, self.num_heads, self.num_levels * self.num_points])
+ attention_weights = F.softmax(attention_weights, axis=-1)
+
+ attention_weights = attention_weights.reshape(
+ [bs, num_query, self.num_heads, self.num_levels, self.num_points])
+ if reference_points.shape[-1] == self.num_points * 2:
+ reference_points_reshape = reference_points.reshape(
+ (bs, num_query, self.num_levels, -1, 2)).unsqueeze(2)
+ x1 = reference_points[:, :, :, 0::2].min(axis=-1, keepdim=True)
+ y1 = reference_points[:, :, :, 1::2].min(axis=-1, keepdim=True)
+ x2 = reference_points[:, :, :, 0::2].max(axis=-1, keepdim=True)
+ y2 = reference_points[:, :, :, 1::2].max(axis=-1, keepdim=True)
+ w = paddle.clip(x2 - x1, min=1e-4)
+ h = paddle.clip(y2 - y1, min=1e-4)
+ wh = paddle.concat([w, h], axis=-1)[:, :, None, :, None, :]
+
+ sampling_locations = reference_points_reshape \
+ + sampling_offsets * wh * 0.5
+ else:
+ raise ValueError(
+ f'Last dim of reference_points must be'
+ f' 2K, but get {reference_points.shape[-1]} instead.')
+
+ output = self.ms_deformable_attn_core(
+ value, value_spatial_shapes, value_level_start_index,
+ sampling_locations, attention_weights)
+
+ output = self.output_proj(output)
+ return output
+
+
+@register
+class PETR_TransformerDecoderLayer(nn.Layer):
+ __inject__ = ['self_attn', 'cross_attn']
+
+ def __init__(self,
+ d_model,
+ nhead=8,
+ self_attn=None,
+ cross_attn=None,
+ dim_feedforward=2048,
+ dropout=0.1,
+ activation="relu",
+ attn_dropout=None,
+ act_dropout=None,
+ normalize_before=False):
+ super(PETR_TransformerDecoderLayer, self).__init__()
+ attn_dropout = dropout if attn_dropout is None else attn_dropout
+ act_dropout = dropout if act_dropout is None else act_dropout
+ self.normalize_before = normalize_before
+
+ if self_attn is None:
+ self.self_attn = MultiHeadAttention(d_model, nhead, attn_dropout)
+ else:
+ self.self_attn = self_attn
+ if cross_attn is None:
+ self.cross_attn = MultiHeadAttention(d_model, nhead, attn_dropout)
+ else:
+ self.cross_attn = cross_attn
+ # Implementation of Feedforward model
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
+ self.dropout = nn.Dropout(act_dropout, mode="upscale_in_train")
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+ self.norm1 = nn.LayerNorm(d_model)
+ self.norm2 = nn.LayerNorm(d_model)
+ self.norm3 = nn.LayerNorm(d_model)
+ self.dropout1 = nn.Dropout(dropout, mode="upscale_in_train")
+ self.dropout2 = nn.Dropout(dropout, mode="upscale_in_train")
+ self.dropout3 = nn.Dropout(dropout, mode="upscale_in_train")
+ self.activation = getattr(F, activation)
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ linear_init_(self.linear1)
+ linear_init_(self.linear2)
+
+ @staticmethod
+ def with_pos_embed(tensor, pos_embed):
+ return tensor if pos_embed is None else tensor + pos_embed
+
+ def forward(self,
+ tgt,
+ memory,
+ tgt_mask=None,
+ memory_mask=None,
+ pos_embed=None,
+ query_pos_embed=None,
+ **kwargs):
+ tgt_mask = _convert_attention_mask(tgt_mask, tgt.dtype)
+
+ residual = tgt
+ if self.normalize_before:
+ tgt = self.norm1(tgt)
+ q = k = self.with_pos_embed(tgt, query_pos_embed)
+ tgt = self.self_attn(q, k, value=tgt, attn_mask=tgt_mask)
+ tgt = residual + self.dropout1(tgt)
+ if not self.normalize_before:
+ tgt = self.norm1(tgt)
+
+ residual = tgt
+ if self.normalize_before:
+ tgt = self.norm2(tgt)
+ q = self.with_pos_embed(tgt, query_pos_embed)
+ key_tmp = tgt
+ # k = self.with_pos_embed(memory, pos_embed)
+ tgt = self.cross_attn(
+ q, key=key_tmp, value=memory, attn_mask=memory_mask, **kwargs)
+ tgt = residual + self.dropout2(tgt)
+ if not self.normalize_before:
+ tgt = self.norm2(tgt)
+
+ residual = tgt
+ if self.normalize_before:
+ tgt = self.norm3(tgt)
+ tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+ tgt = residual + self.dropout3(tgt)
+ if not self.normalize_before:
+ tgt = self.norm3(tgt)
+ return tgt
+
+
+@register
+class PETR_TransformerDecoder(nn.Layer):
+ """Implements the decoder in PETR transformer.
+
+ Args:
+ return_intermediate (bool): Whether to return intermediate outputs.
+ coder_norm_cfg (dict): Config of last normalization layer. Default:
+ `LN`.
+ """
+ __inject__ = ['decoder_layer']
+
+ def __init__(self,
+ decoder_layer,
+ num_layers,
+ norm=None,
+ return_intermediate=False,
+ num_keypoints=17,
+ **kwargs):
+ super(PETR_TransformerDecoder, self).__init__()
+ self.layers = _get_clones(decoder_layer, num_layers)
+ self.num_layers = num_layers
+ self.norm = norm
+ self.return_intermediate = return_intermediate
+ self.num_keypoints = num_keypoints
+
+ def forward(self,
+ query,
+ *args,
+ reference_points=None,
+ valid_ratios=None,
+ kpt_branches=None,
+ **kwargs):
+ """Forward function for `TransformerDecoder`.
+
+ Args:
+ query (Tensor): Input query with shape (num_query, bs, embed_dims).
+ reference_points (Tensor): The reference points of offset,
+ has shape (bs, num_query, K*2).
+ valid_ratios (Tensor): The radios of valid points on the feature
+ map, has shape (bs, num_levels, 2).
+ kpt_branches: (obj:`nn.LayerList`): Used for refining the
+ regression results. Only would be passed when `with_box_refine`
+ is True, otherwise would be passed a `None`.
+
+ Returns:
+ tuple (Tensor): Results with shape [1, num_query, bs, embed_dims] when
+ return_intermediate is `False`, otherwise it has shape
+ [num_layers, num_query, bs, embed_dims] and
+ [num_layers, bs, num_query, K*2].
+ """
+ output = query
+ intermediate = []
+ intermediate_reference_points = []
+ for lid, layer in enumerate(self.layers):
+ if reference_points.shape[-1] == self.num_keypoints * 2:
+ reference_points_input = \
+ reference_points[:, :, None] * \
+ valid_ratios.tile((1, 1, self.num_keypoints))[:, None]
+ else:
+ assert reference_points.shape[-1] == 2
+ reference_points_input = reference_points[:, :, None] * \
+ valid_ratios[:, None]
+ output = layer(
+ output,
+ *args,
+ reference_points=reference_points_input,
+ **kwargs)
+
+ if kpt_branches is not None:
+ tmp = kpt_branches[lid](output)
+ if reference_points.shape[-1] == self.num_keypoints * 2:
+ new_reference_points = tmp + inverse_sigmoid(
+ reference_points)
+ new_reference_points = F.sigmoid(new_reference_points)
+ else:
+ raise NotImplementedError
+ reference_points = new_reference_points.detach()
+
+ if self.return_intermediate:
+ intermediate.append(output)
+ intermediate_reference_points.append(reference_points)
+
+ if self.return_intermediate:
+ return paddle.stack(intermediate), paddle.stack(
+ intermediate_reference_points)
+
+ return output, reference_points
+
+
+@register
+class PETR_DeformableTransformerDecoder(nn.Layer):
+ __inject__ = ['decoder_layer']
+
+ def __init__(self, decoder_layer, num_layers, return_intermediate=False):
+ super(PETR_DeformableTransformerDecoder, self).__init__()
+ self.layers = _get_clones(decoder_layer, num_layers)
+ self.num_layers = num_layers
+ self.return_intermediate = return_intermediate
+
+ def forward(self,
+ tgt,
+ reference_points,
+ memory,
+ memory_spatial_shapes,
+ memory_mask=None,
+ query_pos_embed=None):
+ output = tgt
+ intermediate = []
+ for lid, layer in enumerate(self.layers):
+ output = layer(output, reference_points, memory,
+ memory_spatial_shapes, memory_mask, query_pos_embed)
+
+ if self.return_intermediate:
+ intermediate.append(output)
+
+ if self.return_intermediate:
+ return paddle.stack(intermediate)
+
+ return output.unsqueeze(0)
+
+
+@register
+class PETR_DeformableDetrTransformerDecoder(PETR_DeformableTransformerDecoder):
+ """Implements the decoder in DETR transformer.
+
+ Args:
+ return_intermediate (bool): Whether to return intermediate outputs.
+ coder_norm_cfg (dict): Config of last normalization layer. Default:
+ `LN`.
+ """
+
+ def __init__(self, *args, return_intermediate=False, **kwargs):
+
+ super(PETR_DeformableDetrTransformerDecoder, self).__init__(*args,
+ **kwargs)
+ self.return_intermediate = return_intermediate
+
+ def forward(self,
+ query,
+ *args,
+ reference_points=None,
+ valid_ratios=None,
+ reg_branches=None,
+ **kwargs):
+ """Forward function for `TransformerDecoder`.
+
+ Args:
+ query (Tensor): Input query with shape
+ `(num_query, bs, embed_dims)`.
+ reference_points (Tensor): The reference
+ points of offset. has shape
+ (bs, num_query, 4) when as_two_stage,
+ otherwise has shape ((bs, num_query, 2).
+ valid_ratios (Tensor): The radios of valid
+ points on the feature map, has shape
+ (bs, num_levels, 2)
+ reg_branch: (obj:`nn.LayerList`): Used for
+ refining the regression results. Only would
+ be passed when with_box_refine is True,
+ otherwise would be passed a `None`.
+
+ Returns:
+ Tensor: Results with shape [1, num_query, bs, embed_dims] when
+ return_intermediate is `False`, otherwise it has shape
+ [num_layers, num_query, bs, embed_dims].
+ """
+ output = query
+ intermediate = []
+ intermediate_reference_points = []
+ for lid, layer in enumerate(self.layers):
+ if reference_points.shape[-1] == 4:
+ reference_points_input = reference_points[:, :, None] * \
+ paddle.concat([valid_ratios, valid_ratios], -1)[:, None]
+ else:
+ assert reference_points.shape[-1] == 2
+ reference_points_input = reference_points[:, :, None] * \
+ valid_ratios[:, None]
+ output = layer(
+ output,
+ *args,
+ reference_points=reference_points_input,
+ **kwargs)
+
+ if reg_branches is not None:
+ tmp = reg_branches[lid](output)
+ if reference_points.shape[-1] == 4:
+ new_reference_points = tmp + inverse_sigmoid(
+ reference_points)
+ new_reference_points = F.sigmoid(new_reference_points)
+ else:
+ assert reference_points.shape[-1] == 2
+ new_reference_points = tmp
+ new_reference_points[..., :2] = tmp[
+ ..., :2] + inverse_sigmoid(reference_points)
+ new_reference_points = F.sigmoid(new_reference_points)
+ reference_points = new_reference_points.detach()
+
+ if self.return_intermediate:
+ intermediate.append(output)
+ intermediate_reference_points.append(reference_points)
+
+ if self.return_intermediate:
+ return paddle.stack(intermediate), paddle.stack(
+ intermediate_reference_points)
+
+ return output, reference_points
+
+
+@register
+class PETRTransformer(nn.Layer):
+ """Implements the PETR transformer.
+
+ Args:
+ as_two_stage (bool): Generate query from encoder features.
+ Default: False.
+ num_feature_levels (int): Number of feature maps from FPN:
+ Default: 4.
+ two_stage_num_proposals (int): Number of proposals when set
+ `as_two_stage` as True. Default: 300.
+ """
+ __inject__ = ["encoder", "decoder", "hm_encoder", "refine_decoder"]
+
+ def __init__(self,
+ encoder="",
+ decoder="",
+ hm_encoder="",
+ refine_decoder="",
+ as_two_stage=True,
+ num_feature_levels=4,
+ two_stage_num_proposals=300,
+ num_keypoints=17,
+ **kwargs):
+ super(PETRTransformer, self).__init__(**kwargs)
+ self.as_two_stage = as_two_stage
+ self.num_feature_levels = num_feature_levels
+ self.two_stage_num_proposals = two_stage_num_proposals
+ self.num_keypoints = num_keypoints
+ self.encoder = encoder
+ self.decoder = decoder
+ self.embed_dims = self.encoder.embed_dims
+ self.hm_encoder = hm_encoder
+ self.refine_decoder = refine_decoder
+ self.init_layers()
+ self.init_weights()
+
+ def init_layers(self):
+ """Initialize layers of the DeformableDetrTransformer."""
+ #paddle.create_parameter
+ self.level_embeds = paddle.create_parameter(
+ (self.num_feature_levels, self.embed_dims), dtype="float32")
+
+ if self.as_two_stage:
+ self.enc_output = nn.Linear(self.embed_dims, self.embed_dims)
+ self.enc_output_norm = nn.LayerNorm(self.embed_dims)
+ self.refine_query_embedding = nn.Embedding(self.num_keypoints,
+ self.embed_dims * 2)
+ else:
+ self.reference_points = nn.Linear(self.embed_dims,
+ 2 * self.num_keypoints)
+
+ def init_weights(self):
+ """Initialize the transformer weights."""
+ for p in self.parameters():
+ if p.rank() > 1:
+ xavier_uniform_(p)
+ if hasattr(p, 'bias') and p.bias is not None:
+ constant_(p.bais)
+ for m in self.sublayers():
+ if isinstance(m, MSDeformableAttention):
+ m._reset_parameters()
+ for m in self.sublayers():
+ if isinstance(m, MultiScaleDeformablePoseAttention):
+ m.init_weights()
+ if not self.as_two_stage:
+ xavier_uniform_(self.reference_points.weight)
+ constant_(self.reference_points.bias)
+ normal_(self.level_embeds)
+ normal_(self.refine_query_embedding.weight)
+
+ def gen_encoder_output_proposals(self, memory, memory_padding_mask,
+ spatial_shapes):
+ """Generate proposals from encoded memory.
+
+ Args:
+ memory (Tensor): The output of encoder, has shape
+ (bs, num_key, embed_dim). num_key is equal the number of points
+ on feature map from all level.
+ memory_padding_mask (Tensor): Padding mask for memory.
+ has shape (bs, num_key).
+ spatial_shapes (Tensor): The shape of all feature maps.
+ has shape (num_level, 2).
+
+ Returns:
+ tuple: A tuple of feature map and bbox prediction.
+
+ - output_memory (Tensor): The input of decoder, has shape
+ (bs, num_key, embed_dim). num_key is equal the number of
+ points on feature map from all levels.
+ - output_proposals (Tensor): The normalized proposal
+ after a inverse sigmoid, has shape (bs, num_keys, 4).
+ """
+
+ N, S, C = memory.shape
+ proposals = []
+ _cur = 0
+ for lvl, (H, W) in enumerate(spatial_shapes):
+ mask_flatten_ = memory_padding_mask[:, _cur:(_cur + H * W)].reshape(
+ [N, H, W, 1])
+ valid_H = paddle.sum(mask_flatten_[:, :, 0, 0], 1)
+ valid_W = paddle.sum(mask_flatten_[:, 0, :, 0], 1)
+
+ grid_y, grid_x = paddle.meshgrid(
+ paddle.linspace(
+ 0, H - 1, H, dtype="float32"),
+ paddle.linspace(
+ 0, W - 1, W, dtype="float32"))
+ grid = paddle.concat([grid_x.unsqueeze(-1), grid_y.unsqueeze(-1)],
+ -1)
+
+ scale = paddle.concat(
+ [valid_W.unsqueeze(-1),
+ valid_H.unsqueeze(-1)], 1).reshape([N, 1, 1, 2])
+ grid = (grid.unsqueeze(0).expand((N, -1, -1, -1)) + 0.5) / scale
+ proposal = grid.reshape([N, -1, 2])
+ proposals.append(proposal)
+ _cur += (H * W)
+ output_proposals = paddle.concat(proposals, 1)
+ output_proposals_valid = ((output_proposals > 0.01) &
+ (output_proposals < 0.99)).all(
+ -1, keepdim=True).astype("bool")
+ output_proposals = paddle.log(output_proposals / (1 - output_proposals))
+ output_proposals = masked_fill(
+ output_proposals, ~memory_padding_mask.astype("bool").unsqueeze(-1),
+ float('inf'))
+ output_proposals = masked_fill(output_proposals,
+ ~output_proposals_valid, float('inf'))
+
+ output_memory = memory
+ output_memory = masked_fill(
+ output_memory, ~memory_padding_mask.astype("bool").unsqueeze(-1),
+ float(0))
+ output_memory = masked_fill(output_memory, ~output_proposals_valid,
+ float(0))
+ output_memory = self.enc_output_norm(self.enc_output(output_memory))
+ return output_memory, output_proposals
+
+ @staticmethod
+ def get_reference_points(spatial_shapes, valid_ratios):
+ """Get the reference points used in decoder.
+
+ Args:
+ spatial_shapes (Tensor): The shape of all feature maps,
+ has shape (num_level, 2).
+ valid_ratios (Tensor): The radios of valid points on the
+ feature map, has shape (bs, num_levels, 2).
+
+ Returns:
+ Tensor: reference points used in decoder, has \
+ shape (bs, num_keys, num_levels, 2).
+ """
+ reference_points_list = []
+ for lvl, (H, W) in enumerate(spatial_shapes):
+ ref_y, ref_x = paddle.meshgrid(
+ paddle.linspace(
+ 0.5, H - 0.5, H, dtype="float32"),
+ paddle.linspace(
+ 0.5, W - 0.5, W, dtype="float32"))
+ ref_y = ref_y.reshape(
+ (-1, ))[None] / (valid_ratios[:, None, lvl, 1] * H)
+ ref_x = ref_x.reshape(
+ (-1, ))[None] / (valid_ratios[:, None, lvl, 0] * W)
+ ref = paddle.stack((ref_x, ref_y), -1)
+ reference_points_list.append(ref)
+ reference_points = paddle.concat(reference_points_list, 1)
+ reference_points = reference_points[:, :, None] * valid_ratios[:, None]
+ return reference_points
+
+ def get_valid_ratio(self, mask):
+ """Get the valid radios of feature maps of all level."""
+ _, H, W = mask.shape
+ valid_H = paddle.sum(mask[:, :, 0].astype('float'), 1)
+ valid_W = paddle.sum(mask[:, 0, :].astype('float'), 1)
+ valid_ratio_h = valid_H.astype('float') / H
+ valid_ratio_w = valid_W.astype('float') / W
+ valid_ratio = paddle.stack([valid_ratio_w, valid_ratio_h], -1)
+ return valid_ratio
+
+ def get_proposal_pos_embed(self,
+ proposals,
+ num_pos_feats=128,
+ temperature=10000):
+ """Get the position embedding of proposal."""
+ scale = 2 * math.pi
+ dim_t = paddle.arange(num_pos_feats, dtype="float32")
+ dim_t = temperature**(2 * (dim_t // 2) / num_pos_feats)
+ # N, L, 4
+ proposals = F.sigmoid(proposals) * scale
+ # N, L, 4, 128
+ pos = proposals[:, :, :, None] / dim_t
+ # N, L, 4, 64, 2
+ pos = paddle.stack(
+ (pos[:, :, :, 0::2].sin(), pos[:, :, :, 1::2].cos()),
+ axis=4).flatten(2)
+ return pos
+
+ def forward(self,
+ mlvl_feats,
+ mlvl_masks,
+ query_embed,
+ mlvl_pos_embeds,
+ kpt_branches=None,
+ cls_branches=None):
+ """Forward function for `Transformer`.
+
+ Args:
+ mlvl_feats (list(Tensor)): Input queries from different level.
+ Each element has shape [bs, embed_dims, h, w].
+ mlvl_masks (list(Tensor)): The key_padding_mask from different
+ level used for encoder and decoder, each element has shape
+ [bs, h, w].
+ query_embed (Tensor): The query embedding for decoder,
+ with shape [num_query, c].
+ mlvl_pos_embeds (list(Tensor)): The positional encoding
+ of feats from different level, has the shape
+ [bs, embed_dims, h, w].
+ kpt_branches (obj:`nn.LayerList`): Keypoint Regression heads for
+ feature maps from each decoder layer. Only would be passed when
+ `with_box_refine` is Ture. Default to None.
+ cls_branches (obj:`nn.LayerList`): Classification heads for
+ feature maps from each decoder layer. Only would be passed when
+ `as_two_stage` is Ture. Default to None.
+
+ Returns:
+ tuple[Tensor]: results of decoder containing the following tensor.
+
+ - inter_states: Outputs from decoder. If
+ `return_intermediate_dec` is True output has shape \
+ (num_dec_layers, bs, num_query, embed_dims), else has \
+ shape (1, bs, num_query, embed_dims).
+ - init_reference_out: The initial value of reference \
+ points, has shape (bs, num_queries, 4).
+ - inter_references_out: The internal value of reference \
+ points in decoder, has shape \
+ (num_dec_layers, bs,num_query, embed_dims)
+ - enc_outputs_class: The classification score of proposals \
+ generated from encoder's feature maps, has shape \
+ (batch, h*w, num_classes). \
+ Only would be returned when `as_two_stage` is True, \
+ otherwise None.
+ - enc_outputs_kpt_unact: The regression results generated from \
+ encoder's feature maps., has shape (batch, h*w, K*2).
+ Only would be returned when `as_two_stage` is True, \
+ otherwise None.
+ """
+ assert self.as_two_stage or query_embed is not None
+
+ feat_flatten = []
+ mask_flatten = []
+ lvl_pos_embed_flatten = []
+ spatial_shapes = []
+ for lvl, (feat, mask, pos_embed
+ ) in enumerate(zip(mlvl_feats, mlvl_masks, mlvl_pos_embeds)):
+ bs, c, h, w = feat.shape
+ spatial_shape = (h, w)
+ spatial_shapes.append(spatial_shape)
+ feat = feat.flatten(2).transpose((0, 2, 1))
+ mask = mask.flatten(1)
+ pos_embed = pos_embed.flatten(2).transpose((0, 2, 1))
+ lvl_pos_embed = pos_embed + self.level_embeds[lvl].reshape(
+ [1, 1, -1])
+ lvl_pos_embed_flatten.append(lvl_pos_embed)
+ feat_flatten.append(feat)
+ mask_flatten.append(mask)
+ feat_flatten = paddle.concat(feat_flatten, 1)
+ mask_flatten = paddle.concat(mask_flatten, 1)
+ lvl_pos_embed_flatten = paddle.concat(lvl_pos_embed_flatten, 1)
+ spatial_shapes_cumsum = paddle.to_tensor(
+ np.array(spatial_shapes).prod(1).cumsum(0))
+ spatial_shapes = paddle.to_tensor(spatial_shapes, dtype="int64")
+ level_start_index = paddle.concat((paddle.zeros(
+ (1, ), dtype=spatial_shapes.dtype), spatial_shapes_cumsum[:-1]))
+ valid_ratios = paddle.stack(
+ [self.get_valid_ratio(m) for m in mlvl_masks], 1)
+
+ reference_points = \
+ self.get_reference_points(spatial_shapes,
+ valid_ratios)
+
+ memory = self.encoder(
+ src=feat_flatten,
+ pos_embed=lvl_pos_embed_flatten,
+ src_mask=mask_flatten,
+ value_spatial_shapes=spatial_shapes,
+ reference_points=reference_points,
+ value_level_start_index=level_start_index,
+ valid_ratios=valid_ratios)
+
+ bs, _, c = memory.shape
+
+ hm_proto = None
+ if self.training:
+ hm_memory = paddle.slice(
+ memory,
+ starts=level_start_index[0],
+ ends=level_start_index[1],
+ axes=[1])
+ hm_pos_embed = paddle.slice(
+ lvl_pos_embed_flatten,
+ starts=level_start_index[0],
+ ends=level_start_index[1],
+ axes=[1])
+ hm_mask = paddle.slice(
+ mask_flatten,
+ starts=level_start_index[0],
+ ends=level_start_index[1],
+ axes=[1])
+ hm_reference_points = paddle.slice(
+ reference_points,
+ starts=level_start_index[0],
+ ends=level_start_index[1],
+ axes=[1])[:, :, :1, :]
+
+ # official code make a mistake of pos_embed to pose_embed, which disable pos_embed
+ hm_memory = self.hm_encoder(
+ src=hm_memory,
+ pose_embed=hm_pos_embed,
+ src_mask=hm_mask,
+ value_spatial_shapes=spatial_shapes[[0]],
+ reference_points=hm_reference_points,
+ value_level_start_index=level_start_index[0],
+ valid_ratios=valid_ratios[:, :1, :])
+ hm_memory = hm_memory.reshape((bs, spatial_shapes[0, 0],
+ spatial_shapes[0, 1], -1))
+ hm_proto = (hm_memory, mlvl_masks[0])
+
+ if self.as_two_stage:
+ output_memory, output_proposals = \
+ self.gen_encoder_output_proposals(
+ memory, mask_flatten, spatial_shapes)
+ enc_outputs_class = cls_branches[self.decoder.num_layers](
+ output_memory)
+ enc_outputs_kpt_unact = \
+ kpt_branches[self.decoder.num_layers](output_memory)
+ enc_outputs_kpt_unact[..., 0::2] += output_proposals[..., 0:1]
+ enc_outputs_kpt_unact[..., 1::2] += output_proposals[..., 1:2]
+
+ topk = self.two_stage_num_proposals
+ topk_proposals = paddle.topk(
+ enc_outputs_class[..., 0], topk, axis=1)[1].unsqueeze(-1)
+
+ #paddle.take_along_axis 对应torch.gather
+ topk_kpts_unact = paddle.take_along_axis(enc_outputs_kpt_unact,
+ topk_proposals, 1)
+ topk_kpts_unact = topk_kpts_unact.detach()
+
+ reference_points = F.sigmoid(topk_kpts_unact)
+ init_reference_out = reference_points
+ # learnable query and query_pos
+ query_pos, query = paddle.split(
+ query_embed, query_embed.shape[1] // c, axis=1)
+ query_pos = query_pos.unsqueeze(0).expand((bs, -1, -1))
+ query = query.unsqueeze(0).expand((bs, -1, -1))
+ else:
+ query_pos, query = paddle.split(
+ query_embed, query_embed.shape[1] // c, axis=1)
+ query_pos = query_pos.unsqueeze(0).expand((bs, -1, -1))
+ query = query.unsqueeze(0).expand((bs, -1, -1))
+ reference_points = F.sigmoid(self.reference_points(query_pos))
+ init_reference_out = reference_points
+
+ # decoder
+ inter_states, inter_references = self.decoder(
+ query=query,
+ memory=memory,
+ query_pos_embed=query_pos,
+ memory_mask=mask_flatten,
+ reference_points=reference_points,
+ value_spatial_shapes=spatial_shapes,
+ value_level_start_index=level_start_index,
+ valid_ratios=valid_ratios,
+ kpt_branches=kpt_branches)
+
+ inter_references_out = inter_references
+ if self.as_two_stage:
+ return inter_states, init_reference_out, \
+ inter_references_out, enc_outputs_class, \
+ enc_outputs_kpt_unact, hm_proto, memory
+ return inter_states, init_reference_out, \
+ inter_references_out, None, None, None, None, None, hm_proto
+
+ def forward_refine(self,
+ mlvl_masks,
+ memory,
+ reference_points_pose,
+ img_inds,
+ kpt_branches=None,
+ **kwargs):
+ mask_flatten = []
+ spatial_shapes = []
+ for lvl, mask in enumerate(mlvl_masks):
+ bs, h, w = mask.shape
+ spatial_shape = (h, w)
+ spatial_shapes.append(spatial_shape)
+ mask = mask.flatten(1)
+ mask_flatten.append(mask)
+ mask_flatten = paddle.concat(mask_flatten, 1)
+ spatial_shapes_cumsum = paddle.to_tensor(
+ np.array(
+ spatial_shapes, dtype='int64').prod(1).cumsum(0))
+ spatial_shapes = paddle.to_tensor(spatial_shapes, dtype="int64")
+ level_start_index = paddle.concat((paddle.zeros(
+ (1, ), dtype=spatial_shapes.dtype), spatial_shapes_cumsum[:-1]))
+ valid_ratios = paddle.stack(
+ [self.get_valid_ratio(m) for m in mlvl_masks], 1)
+
+ # pose refinement (17 queries corresponding to 17 keypoints)
+ # learnable query and query_pos
+ refine_query_embedding = self.refine_query_embedding.weight
+ query_pos, query = paddle.split(refine_query_embedding, 2, axis=1)
+ pos_num = reference_points_pose.shape[0]
+ query_pos = query_pos.unsqueeze(0).expand((pos_num, -1, -1))
+ query = query.unsqueeze(0).expand((pos_num, -1, -1))
+ reference_points = reference_points_pose.reshape(
+ (pos_num, reference_points_pose.shape[1] // 2, 2))
+ pos_memory = memory[img_inds]
+ mask_flatten = mask_flatten[img_inds]
+ valid_ratios = valid_ratios[img_inds]
+ if img_inds.size == 1:
+ pos_memory = pos_memory.unsqueeze(0)
+ mask_flatten = mask_flatten.unsqueeze(0)
+ valid_ratios = valid_ratios.unsqueeze(0)
+ inter_states, inter_references = self.refine_decoder(
+ query=query,
+ memory=pos_memory,
+ query_pos_embed=query_pos,
+ memory_mask=mask_flatten,
+ reference_points=reference_points,
+ value_spatial_shapes=spatial_shapes,
+ value_level_start_index=level_start_index,
+ valid_ratios=valid_ratios,
+ reg_branches=kpt_branches,
+ **kwargs)
+ # [num_decoder, num_query, bs, embed_dim]
+
+ init_reference_out = reference_points
+ return inter_states, init_reference_out, inter_references
diff --git a/ppdet/utils/visualizer.py b/ppdet/utils/visualizer.py
index f7193306c93..1c8560a7453 100644
--- a/ppdet/utils/visualizer.py
+++ b/ppdet/utils/visualizer.py
@@ -238,7 +238,7 @@ def draw_pose(image,
'for example: `pip install matplotlib`.')
raise e
- skeletons = np.array([item['keypoints'] for item in results])
+ skeletons = np.array([item['keypoints'] for item in results]).reshape((-1, 51))
kpt_nums = 17
if len(skeletons) > 0:
kpt_nums = int(skeletons.shape[1] / 3)
From ea682ac4c35216e0aecb8486888cb0977095526b Mon Sep 17 00:00:00 2001
From: XYZ <1290573099@qq.com>
Date: Wed, 22 Feb 2023 19:07:36 +0800
Subject: [PATCH 024/116] add config and architecture for human36m (#7802)
* add config and architecture for human36m
* modify TinyPose3DHRNet to support human3.6M dataset
* delete useless class
---
configs/pose3d/tinypose3d_human36M.yml | 123 ++++++++++++++++++
.../modeling/architectures/keypoint_hrnet.py | 11 +-
2 files changed, 125 insertions(+), 9 deletions(-)
create mode 100644 configs/pose3d/tinypose3d_human36M.yml
diff --git a/configs/pose3d/tinypose3d_human36M.yml b/configs/pose3d/tinypose3d_human36M.yml
new file mode 100644
index 00000000000..a3ccdbbbd58
--- /dev/null
+++ b/configs/pose3d/tinypose3d_human36M.yml
@@ -0,0 +1,123 @@
+use_gpu: true
+log_iter: 5
+save_dir: output
+snapshot_epoch: 1
+weights: output/tinypose3d_human36M/model_final
+epoch: 220
+num_joints: &num_joints 24
+pixel_std: &pixel_std 200
+metric: Pose3DEval
+num_classes: 1
+train_height: &train_height 128
+train_width: &train_width 128
+trainsize: &trainsize [*train_width, *train_height]
+
+#####model
+architecture: TinyPose3DHRNet
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/keypoint/tinypose_128x96.pdparams
+
+TinyPose3DHRNet:
+ backbone: LiteHRNet
+ post_process: HR3DNetPostProcess
+ fc_channel: 1024
+ num_joints: *num_joints
+ width: &width 40
+ loss: Pose3DLoss
+
+LiteHRNet:
+ network_type: wider_naive
+ freeze_at: -1
+ freeze_norm: false
+ return_idx: [0]
+
+Pose3DLoss:
+ weight_3d: 1.0
+ weight_2d: 0.0
+
+#####optimizer
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ milestones: [17, 21]
+ gamma: 0.1
+ - !LinearWarmup
+ start_factor: 0.01
+ steps: 1000
+
+OptimizerBuilder:
+ optimizer:
+ type: Adam
+ regularizer:
+ factor: 0.0
+ type: L2
+
+
+#####data
+TrainDataset:
+ !Pose3DDataset
+ dataset_dir: Human3.6M
+ image_dirs: ["Images"]
+ anno_list: ['Human3.6m_train.json']
+ num_joints: *num_joints
+ test_mode: False
+
+EvalDataset:
+ !Pose3DDataset
+ dataset_dir: Human3.6M
+ image_dirs: ["Images"]
+ anno_list: ['Human3.6m_valid.json']
+ num_joints: *num_joints
+ test_mode: True
+
+TestDataset:
+ !ImageFolder
+ anno_path: dataset/coco/keypoint_imagelist.txt
+
+worker_num: 4
+global_mean: &global_mean [0.485, 0.456, 0.406]
+global_std: &global_std [0.229, 0.224, 0.225]
+TrainReader:
+ sample_transforms:
+ - SinglePoseAffine:
+ trainsize: *trainsize
+ rotate: [0.5, 30] #[prob, rotate range]
+ scale: [0.5, 0.25] #[prob, scale range]
+ batch_transforms:
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 128
+ shuffle: true
+ drop_last: true
+
+EvalReader:
+ sample_transforms:
+ - SinglePoseAffine:
+ trainsize: *trainsize
+ rotate: [0., 30]
+ scale: [0., 0.25]
+ batch_transforms:
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 128
+
+TestReader:
+ inputs_def:
+ image_shape: [3, *train_height, *train_width]
+ sample_transforms:
+ - Decode: {}
+ - TopDownEvalAffine:
+ trainsize: *trainsize
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 1
+ fuse_normalize: false
diff --git a/ppdet/modeling/architectures/keypoint_hrnet.py b/ppdet/modeling/architectures/keypoint_hrnet.py
index fa3541d7d78..1d93e3af5f5 100644
--- a/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/ppdet/modeling/architectures/keypoint_hrnet.py
@@ -394,6 +394,7 @@ class TinyPose3DHRNet(BaseArch):
def __init__(self,
width,
num_joints,
+ fc_channel=768,
backbone='HRNet',
loss='KeyPointRegressionMSELoss',
post_process=TinyPose3DPostProcess):
@@ -411,21 +412,13 @@ def __init__(self,
self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
- self.final_conv_new = L.Conv2d(
- width, num_joints * 32, 1, 1, 0, bias=True)
-
self.flatten = paddle.nn.Flatten(start_axis=2, stop_axis=3)
- self.fc1 = paddle.nn.Linear(768, 256)
+ self.fc1 = paddle.nn.Linear(fc_channel, 256)
self.act1 = paddle.nn.ReLU()
self.fc2 = paddle.nn.Linear(256, 64)
self.act2 = paddle.nn.ReLU()
self.fc3 = paddle.nn.Linear(64, 3)
- # for human3.6M
- self.fc1_1 = paddle.nn.Linear(3136, 1024)
- self.fc2_1 = paddle.nn.Linear(1024, 256)
- self.fc3_1 = paddle.nn.Linear(256, 3)
-
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
From b830f474827baed0187cc8b5d4115ba21df2f259 Mon Sep 17 00:00:00 2001
From: wangguanzhong
Date: Wed, 22 Feb 2023 20:26:54 +0800
Subject: [PATCH 025/116] update version (#7813)
---
setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index ad2da38434a..991b861d089 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
# ============== version definition ==============
-PPDET_VERSION = "2.4.0"
+PPDET_VERSION = "0.0.0"
def parse_version():
From c15cdb40046922857508d4736ab44e25c17e324a Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Thu, 23 Feb 2023 10:47:30 +0800
Subject: [PATCH 026/116] fix run_benchmark (#7812)
* fix run_benchmark for small model accurate speed
* fix run_benchmark for other det models
---
deploy/python/infer.py | 46 ++++++++++++++++++++++++++++++++++--------
1 file changed, 38 insertions(+), 8 deletions(-)
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index e86761abc55..31e491b1278 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -181,7 +181,7 @@ def filter_box(self, result, threshold):
filter_res = {'boxes': boxes, 'boxes_num': filter_num}
return filter_res
- def predict(self, repeats=1):
+ def predict(self, repeats=1, run_benchmark=False):
'''
Args:
repeats (int): repeats number for prediction
@@ -193,6 +193,15 @@ def predict(self, repeats=1):
'''
# model prediction
np_boxes_num, np_boxes, np_masks = np.array([0]), None, None
+
+ if run_benchmark:
+ for i in range(repeats):
+ self.predictor.run()
+ paddle.device.cuda.synchronize()
+ result = dict(
+ boxes=np_boxes, masks=np_masks, boxes_num=np_boxes_num)
+ return result
+
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
@@ -272,9 +281,9 @@ def predict_image_slice(self,
self.det_times.preprocess_time_s.end()
# model prediction
- result = self.predict(repeats=50) # warmup
+ result = self.predict(repeats=50, run_benchmark=True) # warmup
self.det_times.inference_time_s.start()
- result = self.predict(repeats=repeats)
+ result = self.predict(repeats=repeats, run_benchmark=True)
self.det_times.inference_time_s.end(repeats=repeats)
# postprocess
@@ -370,9 +379,9 @@ def predict_image(self,
self.det_times.preprocess_time_s.end()
# model prediction
- result = self.predict(repeats=50) # warmup
+ result = self.predict(repeats=50, run_benchmark=True) # warmup
self.det_times.inference_time_s.start()
- result = self.predict(repeats=repeats)
+ result = self.predict(repeats=repeats, run_benchmark=True)
self.det_times.inference_time_s.end(repeats=repeats)
# postprocess
@@ -568,7 +577,7 @@ def __init__(
output_dir=output_dir,
threshold=threshold, )
- def predict(self, repeats=1):
+ def predict(self, repeats=1, run_benchmark=False):
'''
Args:
repeats (int): repeat number for prediction
@@ -577,7 +586,20 @@ def predict(self, repeats=1):
'cate_label': label of segm, shape:[N]
'cate_score': confidence score of segm, shape:[N]
'''
- np_label, np_score, np_segms = None, None, None
+ np_segms, np_label, np_score, np_boxes_num = None, None, None, np.array(
+ [0])
+
+ if run_benchmark:
+ for i in range(repeats):
+ self.predictor.run()
+ paddle.device.cuda.synchronize()
+ result = dict(
+ segm=np_segms,
+ label=np_label,
+ score=np_score,
+ boxes_num=np_boxes_num)
+ return result
+
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
@@ -659,7 +681,7 @@ def postprocess(self, inputs, result):
result = dict(boxes=np_boxes, boxes_num=np_boxes_num)
return result
- def predict(self, repeats=1):
+ def predict(self, repeats=1, run_benchmark=False):
'''
Args:
repeats (int): repeat number for prediction
@@ -668,6 +690,14 @@ def predict(self, repeats=1):
matix element:[class, score, x_min, y_min, x_max, y_max]
'''
np_score_list, np_boxes_list = [], []
+
+ if run_benchmark:
+ for i in range(repeats):
+ self.predictor.run()
+ paddle.device.cuda.synchronize()
+ result = dict(boxes=np_score_list, boxes_num=np_boxes_list)
+ return result
+
for i in range(repeats):
self.predictor.run()
np_score_list.clear()
From 5984726bb54d6a9d3b597a089b7f4aa85529fd80 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Tue, 28 Feb 2023 18:48:29 +0800
Subject: [PATCH 027/116] adapted between higherhrnet and petr (#7839)
* new adapted
* test ok
---
.../higherhrnet/higherhrnet_hrnet_w32_512.yml | 8 +++++++-
.../higherhrnet_hrnet_w32_512_swahr.yml | 8 +++++++-
.../higherhrnet/higherhrnet_hrnet_w32_640.yml | 10 ++++++++--
.../keypoint/petr/petr_resnet50_16x2_coco.yml | 3 +--
ppdet/data/transform/keypoint_operators.py | 18 +++++++++---------
ppdet/utils/visualizer.py | 2 +-
6 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml
index 7cea9d4a20c..5dedfb32bb1 100644
--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml
@@ -66,6 +66,9 @@ TrainDataset:
anno_path: annotations/person_keypoints_train2017.json
dataset_dir: dataset/coco
num_joints: *num_joints
+ return_bbox: False
+ return_area: False
+ return_class: False
EvalDataset:
!KeypointBottomUpCocoDataset
@@ -74,6 +77,9 @@ EvalDataset:
dataset_dir: dataset/coco
num_joints: *num_joints
test_mode: true
+ return_bbox: False
+ return_area: False
+ return_class: False
TestDataset:
!ImageFolder
@@ -88,7 +94,7 @@ TrainReader:
max_degree: 30
scale: [0.75, 1.5]
max_shift: 0.2
- trainsize: *input_size
+ trainsize: [*input_size, *input_size]
hmsize: [*hm_size, *hm_size_2x]
- KeyPointFlip:
flip_prob: 0.5
diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml
index 2677d20bced..7b0f7560a0c 100644
--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml
@@ -67,6 +67,9 @@ TrainDataset:
anno_path: annotations/person_keypoints_train2017.json
dataset_dir: dataset/coco
num_joints: *num_joints
+ return_bbox: False
+ return_area: False
+ return_class: False
EvalDataset:
!KeypointBottomUpCocoDataset
@@ -75,6 +78,9 @@ EvalDataset:
dataset_dir: dataset/coco
num_joints: *num_joints
test_mode: true
+ return_bbox: False
+ return_area: False
+ return_class: False
TestDataset:
!ImageFolder
@@ -89,7 +95,7 @@ TrainReader:
max_degree: 30
scale: [0.75, 1.5]
max_shift: 0.2
- trainsize: *input_size
+ trainsize: [*input_size, *input_size]
hmsize: [*hm_size, *hm_size_2x]
- KeyPointFlip:
flip_prob: 0.5
diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml
index 7cbeb01d1f5..edd66e55d52 100644
--- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml
+++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml
@@ -66,6 +66,9 @@ TrainDataset:
anno_path: annotations/person_keypoints_train2017.json
dataset_dir: dataset/coco
num_joints: *num_joints
+ return_bbox: False
+ return_area: False
+ return_class: False
EvalDataset:
!KeypointBottomUpCocoDataset
@@ -74,12 +77,15 @@ EvalDataset:
dataset_dir: dataset/coco
num_joints: *num_joints
test_mode: true
+ return_bbox: False
+ return_area: False
+ return_class: False
TestDataset:
!ImageFolder
anno_path: dataset/coco/keypoint_imagelist.txt
-worker_num: 0
+worker_num: 8
global_mean: &global_mean [0.485, 0.456, 0.406]
global_std: &global_std [0.229, 0.224, 0.225]
TrainReader:
@@ -88,7 +94,7 @@ TrainReader:
max_degree: 30
scale: [0.75, 1.5]
max_shift: 0.2
- trainsize: *input_size
+ trainsize: [*input_size, *input_size]
hmsize: [*hm_size, *hm_size_2x]
- KeyPointFlip:
flip_prob: 0.5
diff --git a/configs/keypoint/petr/petr_resnet50_16x2_coco.yml b/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
index d6415ad3b8b..a97eff63ab1 100644
--- a/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
+++ b/configs/keypoint/petr/petr_resnet50_16x2_coco.yml
@@ -245,8 +245,7 @@ EvalReader:
TestReader:
sample_transforms:
- Decode: {}
- - EvalAffine:
- size: *trainsize
+ - EvalAffine: {size: 800}
- NormalizeImage:
mean: *global_mean
std: *global_std
diff --git a/ppdet/data/transform/keypoint_operators.py b/ppdet/data/transform/keypoint_operators.py
index 24cf63b8860..fea23d696c2 100644
--- a/ppdet/data/transform/keypoint_operators.py
+++ b/ppdet/data/transform/keypoint_operators.py
@@ -76,7 +76,7 @@ def _flipjoints(self, records, sizelst):
'''
records['gt_joints'] is Sequence in higherhrnet
'''
- if not ('gt_joints' in records and records['gt_joints'].size > 0):
+ if not ('gt_joints' in records and len(records['gt_joints']) > 0):
return records
kpts_lst = records['gt_joints']
@@ -147,7 +147,7 @@ class RandomAffine(object):
max_scale (list[2]): the scale range to apply, transform range is [min, max]
max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
- trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
+ trainsize (list[2]): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
records(dict): the dict contained the image, mask and coords
@@ -161,7 +161,7 @@ def __init__(self,
scale=[0.75, 1.5],
max_shift=0.2,
hmsize=None,
- trainsize=512,
+ trainsize=[512, 512],
scale_type='short',
boldervalue=[114, 114, 114]):
super(RandomAffine, self).__init__()
@@ -304,7 +304,7 @@ def __call__(self, records):
input_size = 2 * center
if self.trainsize != -1:
dsize = self.trainsize
- imgshape = (dsize, dsize)
+ imgshape = (dsize)
else:
dsize = scale
imgshape = (shape.tolist())
@@ -379,6 +379,7 @@ def __call__(self, records):
if 'gt_joints' in records:
del records['gt_joints']
records['image'] = image_resized
+ records['scale_factor'] = self.size / min(h, w)
return records
@@ -1574,14 +1575,13 @@ def __call__(self, results):
dict: Resized results, 'im_shape', 'pad_shape', 'scale_factor', \
'keep_ratio' keys are added into result dict.
"""
-
if 'scale' not in results:
if 'scale_factor' in results:
img_shape = results['image'].shape[:2]
- scale_factor = results['scale_factor']
- assert isinstance(scale_factor, float)
- results['scale'] = tuple(
- [int(x * scale_factor) for x in img_shape][::-1])
+ scale_factor = results['scale_factor'][0]
+ # assert isinstance(scale_factor, float)
+ results['scale'] = [int(x * scale_factor)
+ for x in img_shape][::-1]
else:
self._random_scale(results)
else:
diff --git a/ppdet/utils/visualizer.py b/ppdet/utils/visualizer.py
index 1c8560a7453..f7193306c93 100644
--- a/ppdet/utils/visualizer.py
+++ b/ppdet/utils/visualizer.py
@@ -238,7 +238,7 @@ def draw_pose(image,
'for example: `pip install matplotlib`.')
raise e
- skeletons = np.array([item['keypoints'] for item in results]).reshape((-1, 51))
+ skeletons = np.array([item['keypoints'] for item in results])
kpt_nums = 17
if len(skeletons) > 0:
kpt_nums = int(skeletons.shape[1] / 3)
From 716755d24ace3098917a272ab8699a27de82c8d4 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Thu, 2 Mar 2023 14:06:47 +0800
Subject: [PATCH 028/116] tinypose3d && modelzoo (#7844)
* metro con reverse
tinypose3d fix
readme modelzoo
* fix tinypose3d
---
configs/pose3d/README.md | 15 +++---
configs/pose3d/tinypose3d_human36M.yml | 17 ++++---
ppdet/data/source/pose3d_cmb.py | 6 +--
ppdet/metrics/pose3d_metrics.py | 5 --
.../modeling/architectures/keypoint_hrnet.py | 47 ++++++++++---------
ppdet/modeling/architectures/pose3d_metro.py | 4 +-
6 files changed, 45 insertions(+), 49 deletions(-)
diff --git a/configs/pose3d/README.md b/configs/pose3d/README.md
index b5da08f1cf8..0b9dec7e9c5 100644
--- a/configs/pose3d/README.md
+++ b/configs/pose3d/README.md
@@ -24,12 +24,12 @@
PaddleDetection 中提供了两种3D Pose算法(稀疏关键点),分别是适用于服务器端的大模型Metro3D和移动端的TinyPose3D。其中Metro3D基于[End-to-End Human Pose and Mesh Reconstruction with Transformers](https://arxiv.org/abs/2012.09760)进行了稀疏化改造,TinyPose3D是在TinyPose基础上修改输出3D关键点。
-## 模型推荐(待补充)
+## 模型推荐
-|模型|适用场景|human3.6m精度|模型下载|
-|:--:|:--:|:--:|:--:|
-|Metro3D|服务器端|-|-|
-|TinyPose3D|移动端|-|-|
+|模型|适用场景|human3.6m精度(14关键点)|human3.6m精度(17关键点)|模型下载|
+|:--:|:--:|:--:|:--:|:--:|
+|Metro3D|服务器端|56.014|46.619|[metro3d_24kpts.pdparams](https://bj.bcebos.com/v1/paddledet/models/pose3d/metro3d_24kpts.pdparams)|
+|TinyPose3D|移动端|86.381|71.223|[tinypose3d_human36m.pdparams](https://bj.bcebos.com/v1/paddledet/models/pose3d/tinypose3d_human36M.pdparams)|
注:
1. 训练数据基于 [MeshTransfomer](https://github.com/microsoft/MeshTransformer) 中的训练数据。
@@ -137,13 +137,14 @@ CUDA_VISIBLE_DEVICES=0 python3 tools/infer.py -c configs/pose3d/metro3d_24kpts.y
我们的训练数据提供了大量的低精度自动生成式的数据,用户可以在此数据训练的基础上,标注自己高精度的目标动作数据进行finetune,即可得到相对稳定较好的模型。
- 我们在医疗康复高精度数据上的训练效果展示如下
+ 我们在医疗康复高精度数据上的训练效果展示如下 [高清视频](https://user-images.githubusercontent.com/31800336/218949226-22e6ab25-facb-4cc6-8eca-38d4bfd973e5.mp4)
-

+
+
## 引用
```
diff --git a/configs/pose3d/tinypose3d_human36M.yml b/configs/pose3d/tinypose3d_human36M.yml
index a3ccdbbbd58..05c6656d145 100644
--- a/configs/pose3d/tinypose3d_human36M.yml
+++ b/configs/pose3d/tinypose3d_human36M.yml
@@ -13,13 +13,12 @@ train_width: &train_width 128
trainsize: &trainsize [*train_width, *train_height]
#####model
-architecture: TinyPose3DHRNet
+architecture: TinyPose3DHRHeatmapNet
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/keypoint/tinypose_128x96.pdparams
-TinyPose3DHRNet:
+TinyPose3DHRHeatmapNet:
backbone: LiteHRNet
post_process: HR3DNetPostProcess
- fc_channel: 1024
num_joints: *num_joints
width: &width 40
loss: Pose3DLoss
@@ -56,17 +55,17 @@ OptimizerBuilder:
#####data
TrainDataset:
!Pose3DDataset
- dataset_dir: Human3.6M
- image_dirs: ["Images"]
- anno_list: ['Human3.6m_train.json']
+ dataset_dir: dataset/traindata/
+ image_dirs: ["human3.6m"]
+ anno_list: ['pose3d/Human3.6m_train.json']
num_joints: *num_joints
test_mode: False
EvalDataset:
!Pose3DDataset
- dataset_dir: Human3.6M
- image_dirs: ["Images"]
- anno_list: ['Human3.6m_valid.json']
+ dataset_dir: dataset/traindata/
+ image_dirs: ["human3.6m"]
+ anno_list: ['pose3d/Human3.6m_valid.json']
num_joints: *num_joints
test_mode: True
diff --git a/ppdet/data/source/pose3d_cmb.py b/ppdet/data/source/pose3d_cmb.py
index 3c465a325d6..06dbdd9e9ab 100644
--- a/ppdet/data/source/pose3d_cmb.py
+++ b/ppdet/data/source/pose3d_cmb.py
@@ -11,9 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-"""
-this code is base on https://github.com/open-mmlab/mmpose
-"""
+
import os
import cv2
import numpy as np
@@ -80,7 +78,7 @@ def get_mask(self, mvm_percent=0.3):
mjm_mask[indices, :] = 0.0
# return mjm_mask
- num_joints = 1
+ num_joints = 10
mvm_mask = np.ones((num_joints, 1)).astype(np.float)
if self.test_mode == False:
num_vertices = num_joints
diff --git a/ppdet/metrics/pose3d_metrics.py b/ppdet/metrics/pose3d_metrics.py
index 32e1deb615b..ea21de90b07 100644
--- a/ppdet/metrics/pose3d_metrics.py
+++ b/ppdet/metrics/pose3d_metrics.py
@@ -137,11 +137,6 @@ def all_gather(data):
class Pose3DEval(object):
- """refer to
- https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
- Copyright (c) Microsoft, under the MIT License.
- """
-
def __init__(self, output_eval, save_prediction_only=False):
super(Pose3DEval, self).__init__()
self.output_eval = output_eval
diff --git a/ppdet/modeling/architectures/keypoint_hrnet.py b/ppdet/modeling/architectures/keypoint_hrnet.py
index 1d93e3af5f5..8d50502e711 100644
--- a/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/ppdet/modeling/architectures/keypoint_hrnet.py
@@ -46,7 +46,7 @@ def __init__(self,
use_dark=True):
"""
HRNet network, see https://arxiv.org/abs/1902.09212
-
+
Args:
backbone (nn.Layer): backbone instance
post_process (object): `HRNetPostProcess` instance
@@ -132,10 +132,10 @@ def __init__(self, use_dark=True):
def get_max_preds(self, heatmaps):
'''get predictions from score maps
-
+
Args:
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
-
+
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
@@ -220,12 +220,12 @@ def dark_postprocess(self, hm, coords, kernelsize):
def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
"""the highest heatvalue location with a quarter offset in the
direction from the highest response to the second highest response.
-
+
Args:
heatmaps (numpy.ndarray): The predicted heatmaps
center (numpy.ndarray): The boxes center
scale (numpy.ndarray): The scale factor
-
+
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
@@ -341,10 +341,7 @@ def __init__(
self.deploy = False
self.num_joints = num_joints
- self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
- # for heatmap output
- self.final_conv_new = L.Conv2d(
- width, num_joints * 32, 1, 1, 0, bias=True)
+ self.final_conv = L.Conv2d(width, num_joints * 32, 1, 1, 0, bias=True)
@classmethod
def from_config(cls, cfg, *args, **kwargs):
@@ -356,20 +353,19 @@ def from_config(cls, cfg, *args, **kwargs):
def _forward(self):
feats = self.backbone(self.inputs) # feats:[[batch_size, 40, 32, 24]]
- hrnet_outputs = self.final_conv_new(feats[0])
+ hrnet_outputs = self.final_conv(feats[0])
res = soft_argmax(hrnet_outputs, self.num_joints)
-
- if self.training:
- return self.loss(res, self.inputs)
- else: # export model need
- return res
+ return res
def get_loss(self):
- return self._forward()
+ pose3d = self._forward()
+ loss = self.loss(pose3d, None, self.inputs)
+ outputs = {'loss': loss}
+ return outputs
def get_pred(self):
res_lst = self._forward()
- outputs = {'keypoint': res_lst}
+ outputs = {'pose3d': res_lst}
return outputs
def flip_back(self, output_flipped, matched_parts):
@@ -427,16 +423,23 @@ def from_config(cls, cfg, *args, **kwargs):
return {'backbone': backbone, }
def _forward(self):
- feats = self.backbone(self.inputs) # feats:[[batch_size, 40, 32, 24]]
+ '''
+ self.inputs is a dict
+ '''
+ feats = self.backbone(
+ self.inputs) # feats:[[batch_size, 40, width/4, height/4]]
+
+ hrnet_outputs = self.final_conv(
+ feats[0]) # hrnet_outputs: [batch_size, num_joints*32,32,32]
- hrnet_outputs = self.final_conv(feats[0])
flatten_res = self.flatten(
- hrnet_outputs) # [batch_size, 24, (height/4)*(width/4)]
+ hrnet_outputs) # [batch_size,num_joints*32,32*32]
+
res = self.fc1(flatten_res)
res = self.act1(res)
res = self.fc2(res)
res = self.act2(res)
- res = self.fc3(res) # [batch_size, 24, 3]
+ res = self.fc3(res)
if self.training:
return self.loss(res, self.inputs)
@@ -448,7 +451,7 @@ def get_loss(self):
def get_pred(self):
res_lst = self._forward()
- outputs = {'keypoint': res_lst}
+ outputs = {'pose3d': res_lst}
return outputs
def flip_back(self, output_flipped, matched_parts):
diff --git a/ppdet/modeling/architectures/pose3d_metro.py b/ppdet/modeling/architectures/pose3d_metro.py
index b56280981ef..4275154d137 100644
--- a/ppdet/modeling/architectures/pose3d_metro.py
+++ b/ppdet/modeling/architectures/pose3d_metro.py
@@ -53,7 +53,7 @@ def __init__(
trans_encoder='',
loss='Pose3DLoss', ):
"""
- METRO network, see https://arxiv.org/abs/
+ Modified from METRO network, see https://arxiv.org/abs/2012.09760
Args:
backbone (nn.Layer): backbone instance
@@ -65,7 +65,7 @@ def __init__(
self.deploy = False
self.trans_encoder = trans_encoder
- self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 1, 1)
+ self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 10, 1)
self.cam_param_fc = paddle.nn.Linear(3, 2)
@classmethod
From 203ad52b138e5dd8bfd505cf6cd3a13ce33608aa Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Thu, 2 Mar 2023 16:36:39 +0800
Subject: [PATCH 029/116] Support FocalNet backbone and dino_focalnet (#7856)
* add raw codes
* add pretrained weights
* fix reader
* fix
* fix focalnet codes coments and format
* fix focalnet codes coments and format
* fix focalnet format
---
configs/dino/_base_/dino_focalnet.yml | 45 ++
ppdet/modeling/architectures/detr.py | 2 +-
ppdet/modeling/backbones/__init__.py | 3 +-
ppdet/modeling/backbones/focalnet.py | 720 ++++++++++++++++++++++++++
4 files changed, 768 insertions(+), 2 deletions(-)
create mode 100644 configs/dino/_base_/dino_focalnet.yml
create mode 100644 ppdet/modeling/backbones/focalnet.py
diff --git a/configs/dino/_base_/dino_focalnet.yml b/configs/dino/_base_/dino_focalnet.yml
new file mode 100644
index 00000000000..7313dcf4e18
--- /dev/null
+++ b/configs/dino/_base_/dino_focalnet.yml
@@ -0,0 +1,45 @@
+architecture: DETR
+# pretrain_weights: # rewrite in FocalNet.pretrained in ppdet/modeling/backbones/focalnet.py
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
+hidden_dim: 256
+use_focal_loss: True
+
+DETR:
+ backbone: FocalNet
+ transformer: DINOTransformer
+ detr_head: DINOHead
+ post_process: DETRBBoxPostProcess
+
+FocalNet:
+ arch: 'focalnet_L_384_22k_fl4'
+ out_indices: [1, 2, 3]
+ pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
+
+DINOTransformer:
+ num_queries: 900
+ position_embed_type: sine
+ num_levels: 4
+ nhead: 8
+ num_encoder_layers: 6
+ num_decoder_layers: 6
+ dim_feedforward: 2048
+ dropout: 0.0
+ activation: relu
+ pe_temperature: 20
+ pe_offset: 0.0
+ num_denoising: 100
+ label_noise_ratio: 0.5
+ box_noise_scale: 1.0
+ learnt_init_query: True
+
+DINOHead:
+ loss:
+ name: DINOLoss
+ loss_coeff: {class: 1, bbox: 5, giou: 2}
+ aux_loss: True
+ matcher:
+ name: HungarianMatcher
+ matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRBBoxPostProcess:
+ num_top_queries: 300
diff --git a/ppdet/modeling/architectures/detr.py b/ppdet/modeling/architectures/detr.py
index 953c8f627a9..419a44377b8 100644
--- a/ppdet/modeling/architectures/detr.py
+++ b/ppdet/modeling/architectures/detr.py
@@ -69,7 +69,7 @@ def _forward(self):
body_feats = self.backbone(self.inputs)
# Transformer
- pad_mask = self.inputs['pad_mask'] if self.training else None
+ pad_mask = self.inputs.get('pad_mask', None)
out_transformer = self.transformer(body_feats, pad_mask, self.inputs)
# DETR Head
diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py
index fcca7159fc7..388ba04583b 100644
--- a/ppdet/modeling/backbones/__init__.py
+++ b/ppdet/modeling/backbones/__init__.py
@@ -35,6 +35,7 @@
from . import vision_transformer
from . import mobileone
from . import trans_encoder
+from . import focalnet
from .vgg import *
from .resnet import *
@@ -57,6 +58,6 @@
from .csp_darknet import *
from .convnext import *
from .vision_transformer import *
-from .vision_transformer import *
from .mobileone import *
from .trans_encoder import *
+from .focalnet import *
diff --git a/ppdet/modeling/backbones/focalnet.py b/ppdet/modeling/backbones/focalnet.py
new file mode 100644
index 00000000000..54c2877623f
--- /dev/null
+++ b/ppdet/modeling/backbones/focalnet.py
@@ -0,0 +1,720 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/microsoft/FocalNet/blob/main/classification/focalnet.py
+"""
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.modeling.shape_spec import ShapeSpec
+from ppdet.core.workspace import register, serializable
+from .transformer_utils import DropPath, Identity
+from .transformer_utils import add_parameter, to_2tuple
+from .transformer_utils import ones_, zeros_, trunc_normal_
+from .swin_transformer import Mlp
+
+__all__ = ['FocalNet']
+
+MODEL_cfg = {
+ 'focalnet_T_224_1k_srf': dict(
+ embed_dim=96,
+ depths=[2, 2, 6, 2],
+ focal_levels=[2, 2, 2, 2],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.2,
+ use_conv_embed=False,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ use_layerscale=False,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_tiny_srf_pretrained.pdparams',
+ ),
+ 'focalnet_S_224_1k_srf': dict(
+ embed_dim=96,
+ depths=[2, 2, 18, 2],
+ focal_levels=[2, 2, 2, 2],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.3,
+ use_conv_embed=False,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ use_layerscale=False,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_small_srf_pretrained.pdparams',
+ ),
+ 'focalnet_B_224_1k_srf': dict(
+ embed_dim=128,
+ depths=[2, 2, 18, 2],
+ focal_levels=[2, 2, 2, 2],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.5,
+ use_conv_embed=False,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ use_layerscale=False,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_base_srf_pretrained.pdparams',
+ ),
+ 'focalnet_T_224_1k_lrf': dict(
+ embed_dim=96,
+ depths=[2, 2, 6, 2],
+ focal_levels=[3, 3, 3, 3],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.2,
+ use_conv_embed=False,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ use_layerscale=False,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_tiny_lrf_pretrained.pdparams',
+ ),
+ 'focalnet_S_224_1k_lrf': dict(
+ embed_dim=96,
+ depths=[2, 2, 18, 2],
+ focal_levels=[3, 3, 3, 3],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.3,
+ use_conv_embed=False,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ use_layerscale=False,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_small_lrf_pretrained.pdparams',
+ ),
+ 'focalnet_B_224_1k_lrf': dict(
+ embed_dim=128,
+ depths=[2, 2, 18, 2],
+ focal_levels=[3, 3, 3, 3],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.5,
+ use_conv_embed=False,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ use_layerscale=False,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_base_lrf_pretrained.pdparams',
+ ),
+ 'focalnet_L_384_22k_fl3': dict(
+ embed_dim=192,
+ depths=[2, 2, 18, 2],
+ focal_levels=[3, 3, 3, 3],
+ focal_windows=[5, 5, 5, 5],
+ drop_path_rate=0.5,
+ use_conv_embed=True,
+ use_postln=True,
+ use_postln_in_modulation=False,
+ use_layerscale=True,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_pretrained.pdparams',
+ ),
+ 'focalnet_L_384_22k_fl4': dict(
+ embed_dim=192,
+ depths=[2, 2, 18, 2],
+ focal_levels=[4, 4, 4, 4],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.5,
+ use_conv_embed=True,
+ use_postln=True,
+ use_postln_in_modulation=False,
+ use_layerscale=True,
+ normalize_modulator=True, #
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams',
+ ),
+ 'focalnet_XL_384_22k_fl3': dict(
+ embed_dim=256,
+ depths=[2, 2, 18, 2],
+ focal_levels=[3, 3, 3, 3],
+ focal_windows=[5, 5, 5, 5],
+ drop_path_rate=0.5,
+ use_conv_embed=True,
+ use_postln=True,
+ use_postln_in_modulation=False,
+ use_layerscale=True,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_xlarge_lrf_384_pretrained.pdparams',
+ ),
+ 'focalnet_XL_384_22k_fl4': dict(
+ embed_dim=256,
+ depths=[2, 2, 18, 2],
+ focal_levels=[4, 4, 4, 4],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.5,
+ use_conv_embed=True,
+ use_postln=True,
+ use_postln_in_modulation=False,
+ use_layerscale=True,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_xlarge_lrf_384_fl4_pretrained.pdparams',
+ ),
+ 'focalnet_H_224_22k_fl3': dict(
+ embed_dim=352,
+ depths=[2, 2, 18, 2],
+ focal_levels=[3, 3, 3, 3],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.5,
+ use_conv_embed=True,
+ use_postln=True,
+ use_postln_in_modulation=True, #
+ use_layerscale=True,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_huge_lrf_224_pretrained.pdparams',
+ ),
+ 'focalnet_H_224_22k_fl4': dict(
+ embed_dim=352,
+ depths=[2, 2, 18, 2],
+ focal_levels=[4, 4, 4, 4],
+ focal_windows=[3, 3, 3, 3],
+ drop_path_rate=0.5,
+ use_conv_embed=True,
+ use_postln=True,
+ use_postln_in_modulation=True, #
+ use_layerscale=True,
+ normalize_modulator=False,
+ pretrained='https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_huge_lrf_224_fl4_pretrained.pdparams',
+ ),
+}
+
+
+class FocalModulation(nn.Layer):
+ """
+ Args:
+ dim (int): Number of input channels.
+ proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+ focal_level (int): Number of focal levels
+ focal_window (int): Focal window size at focal level 1
+ focal_factor (int): Step to increase the focal window. Default: 2
+ use_postln_in_modulation (bool): Whether use post-modulation layernorm
+ normalize_modulator (bool): Whether use normalize in modulator
+ """
+
+ def __init__(self,
+ dim,
+ proj_drop=0.,
+ focal_level=2,
+ focal_window=7,
+ focal_factor=2,
+ use_postln_in_modulation=False,
+ normalize_modulator=False):
+ super().__init__()
+ self.dim = dim
+
+ # specific args for focalv3
+ self.focal_level = focal_level
+ self.focal_window = focal_window
+ self.focal_factor = focal_factor
+ self.use_postln_in_modulation = use_postln_in_modulation
+ self.normalize_modulator = normalize_modulator
+
+ self.f = nn.Linear(
+ dim, 2 * dim + (self.focal_level + 1), bias_attr=True)
+ self.h = nn.Conv2D(
+ dim,
+ dim,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ groups=1,
+ bias_attr=True)
+
+ self.act = nn.GELU()
+ self.proj = nn.Linear(dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+ self.focal_layers = nn.LayerList()
+
+ if self.use_postln_in_modulation:
+ self.ln = nn.LayerNorm(dim)
+
+ for k in range(self.focal_level):
+ kernel_size = self.focal_factor * k + self.focal_window
+ self.focal_layers.append(
+ nn.Sequential(
+ nn.Conv2D(
+ dim,
+ dim,
+ kernel_size=kernel_size,
+ stride=1,
+ groups=dim,
+ padding=kernel_size // 2,
+ bias_attr=False),
+ nn.GELU()))
+
+ def forward(self, x):
+ """ Forward function.
+ Args:
+ x: input features with shape of (B, H, W, C)
+ """
+ _, _, _, C = x.shape
+ x = self.f(x)
+ x = x.transpose([0, 3, 1, 2])
+ q, ctx, gates = paddle.split(x, (C, C, self.focal_level + 1), 1)
+
+ ctx_all = 0
+ for l in range(self.focal_level):
+ ctx = self.focal_layers[l](ctx)
+ ctx_all = ctx_all + ctx * gates[:, l:l + 1]
+ ctx_global = self.act(ctx.mean(2, keepdim=True).mean(3, keepdim=True))
+ ctx_all = ctx_all + ctx_global * gates[:, self.focal_level:]
+ if self.normalize_modulator:
+ ctx_all = ctx_all / (self.focal_level + 1)
+
+ x_out = q * self.h(ctx_all)
+ x_out = x_out.transpose([0, 2, 3, 1])
+ if self.use_postln_in_modulation:
+ x_out = self.ln(x_out)
+ x_out = self.proj(x_out)
+ x_out = self.proj_drop(x_out)
+ return x_out
+
+
+class FocalModulationBlock(nn.Layer):
+ """ Focal Modulation Block.
+ Args:
+ dim (int): Number of input channels.
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+ drop (float, optional): Dropout rate. Default: 0.0
+ drop_path (float, optional): Stochastic depth rate. Default: 0.0
+ act_layer (nn.Layer, optional): Activation layer. Default: nn.GELU
+ norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm
+ focal_level (int): number of focal levels
+ focal_window (int): focal kernel size at level 1
+ use_postln (bool): Whether use layernorm after modulation. Default: False.
+ use_postln_in_modulation (bool): Whether use post-modulation layernorm. Default: False.
+ normalize_modulator (bool): Whether use normalize in modulator
+ use_layerscale (bool): Whether use layerscale proposed in CaiT. Default: False
+ layerscale_value (float): Value for layer scale. Default: 1e-4
+ """
+
+ def __init__(self,
+ dim,
+ mlp_ratio=4.,
+ drop=0.,
+ drop_path=0.,
+ act_layer=nn.GELU,
+ norm_layer=nn.LayerNorm,
+ focal_level=2,
+ focal_window=9,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ normalize_modulator=False,
+ use_layerscale=False,
+ layerscale_value=1e-4):
+ super().__init__()
+ self.dim = dim
+ self.mlp_ratio = mlp_ratio
+ self.focal_window = focal_window
+ self.focal_level = focal_level
+ self.use_postln = use_postln
+ self.use_layerscale = use_layerscale
+
+ self.norm1 = norm_layer(dim)
+ self.modulation = FocalModulation(
+ dim,
+ proj_drop=drop,
+ focal_level=self.focal_level,
+ focal_window=self.focal_window,
+ use_postln_in_modulation=use_postln_in_modulation,
+ normalize_modulator=normalize_modulator)
+
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+ self.norm2 = norm_layer(dim)
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = Mlp(in_features=dim,
+ hidden_features=mlp_hidden_dim,
+ act_layer=act_layer,
+ drop=drop)
+ self.H = None
+ self.W = None
+
+ self.gamma_1 = 1.0
+ self.gamma_2 = 1.0
+ if self.use_layerscale:
+ self.gamma_1 = add_parameter(self,
+ layerscale_value * paddle.ones([dim]))
+ self.gamma_2 = add_parameter(self,
+ layerscale_value * paddle.ones([dim]))
+
+ def forward(self, x):
+ """
+ Args:
+ x: Input feature, tensor size (B, H*W, C).
+ """
+ B, L, C = x.shape
+ H, W = self.H, self.W
+ assert L == H * W, "input feature has wrong size"
+
+ shortcut = x
+ if not self.use_postln:
+ x = self.norm1(x)
+ x = x.reshape([-1, H, W, C])
+
+ # FM
+ x = self.modulation(x).reshape([-1, H * W, C])
+ if self.use_postln:
+ x = self.norm1(x)
+
+ # FFN
+ x = shortcut + self.drop_path(self.gamma_1 * x)
+
+ if self.use_postln:
+ x = x + self.drop_path(self.gamma_2 * self.norm2(self.mlp(x)))
+ else:
+ x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+ return x
+
+
+class BasicLayer(nn.Layer):
+ """ A basic focal modulation layer for one stage.
+ Args:
+ dim (int): Number of feature channels
+ depth (int): Depths of this stage.
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+ drop (float, optional): Dropout rate. Default: 0.0
+ drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+ norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm
+ downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None
+ focal_level (int): Number of focal levels
+ focal_window (int): Focal window size at focal level 1
+ use_conv_embed (bool): Whether use overlapped convolution for patch embedding
+ use_layerscale (bool): Whether use layerscale proposed in CaiT. Default: False
+ layerscale_value (float): Value of layerscale
+ use_postln (bool): Whether use layernorm after modulation. Default: False.
+ use_postln_in_modulation (bool): Whether use post-modulation layernorm. Default: False.
+ normalize_modulator (bool): Whether use normalize in modulator
+ use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+ """
+
+ def __init__(self,
+ dim,
+ depth,
+ mlp_ratio=4.,
+ drop=0.,
+ drop_path=0.,
+ norm_layer=nn.LayerNorm,
+ downsample=None,
+ focal_level=2,
+ focal_window=9,
+ use_conv_embed=False,
+ use_layerscale=False,
+ layerscale_value=1e-4,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ normalize_modulator=False,
+ use_checkpoint=False):
+ super().__init__()
+ self.depth = depth
+ self.use_checkpoint = use_checkpoint
+
+ # build blocks
+ self.blocks = nn.LayerList([
+ FocalModulationBlock(
+ dim=dim,
+ mlp_ratio=mlp_ratio,
+ drop=drop,
+ drop_path=drop_path[i]
+ if isinstance(drop_path, np.ndarray) else drop_path,
+ act_layer=nn.GELU,
+ norm_layer=norm_layer,
+ focal_level=focal_level,
+ focal_window=focal_window,
+ use_postln=use_postln,
+ use_postln_in_modulation=use_postln_in_modulation,
+ normalize_modulator=normalize_modulator,
+ use_layerscale=use_layerscale,
+ layerscale_value=layerscale_value) for i in range(depth)
+ ])
+
+ # patch merging layer
+ if downsample is not None:
+ self.downsample = downsample(
+ patch_size=2,
+ in_chans=dim,
+ embed_dim=2 * dim,
+ use_conv_embed=use_conv_embed,
+ norm_layer=norm_layer,
+ is_stem=False)
+ else:
+ self.downsample = None
+
+ def forward(self, x, H, W):
+ """
+ Args:
+ x: Input feature, tensor size (B, H*W, C).
+ """
+ for blk in self.blocks:
+ blk.H, blk.W = H, W
+ x = blk(x)
+
+ if self.downsample is not None:
+ x_reshaped = x.transpose([0, 2, 1]).reshape(
+ [x.shape[0], x.shape[-1], H, W])
+ x_down = self.downsample(x_reshaped)
+ x_down = x_down.flatten(2).transpose([0, 2, 1])
+ Wh, Ww = (H + 1) // 2, (W + 1) // 2
+ return x, H, W, x_down, Wh, Ww
+ else:
+ return x, H, W, x, H, W
+
+
+class PatchEmbed(nn.Layer):
+ """ Image to Patch Embedding
+ Args:
+ patch_size (int): Patch token size. Default: 4.
+ in_chans (int): Number of input image channels. Default: 3.
+ embed_dim (int): Number of linear projection output channels. Default: 96.
+ norm_layer (nn.Layer, optional): Normalization layer. Default: None
+ use_conv_embed (bool): Whether use overlapped convolution for patch embedding. Default: False
+ is_stem (bool): Is the stem block or not.
+ """
+
+ def __init__(self,
+ patch_size=4,
+ in_chans=3,
+ embed_dim=96,
+ norm_layer=None,
+ use_conv_embed=False,
+ is_stem=False):
+ super().__init__()
+ patch_size = to_2tuple(patch_size)
+ self.patch_size = patch_size
+
+ self.in_chans = in_chans
+ self.embed_dim = embed_dim
+
+ if use_conv_embed:
+ # if we choose to use conv embedding, then we treat the stem and non-stem differently
+ if is_stem:
+ kernel_size = 7
+ padding = 2
+ stride = 4
+ else:
+ kernel_size = 3
+ padding = 1
+ stride = 2
+ self.proj = nn.Conv2D(
+ in_chans,
+ embed_dim,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding)
+ else:
+ self.proj = nn.Conv2D(
+ in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+
+ if norm_layer is not None:
+ self.norm = norm_layer(embed_dim)
+ else:
+ self.norm = None
+
+ def forward(self, x):
+ _, _, H, W = x.shape
+
+ if W % self.patch_size[1] != 0:
+ # for 3D tensor: [pad_left, pad_right]
+ # for 4D tensor: [pad_left, pad_right, pad_top, pad_bottom]
+ x = F.pad(x, [0, self.patch_size[1] - W % self.patch_size[1], 0, 0])
+ W += W % self.patch_size[1]
+ if H % self.patch_size[0] != 0:
+ x = F.pad(x, [0, 0, 0, self.patch_size[0] - H % self.patch_size[0]])
+ H += H % self.patch_size[0]
+
+ x = self.proj(x)
+ if self.norm is not None:
+ _, _, Wh, Ww = x.shape
+ x = x.flatten(2).transpose([0, 2, 1])
+ x = self.norm(x)
+ x = x.transpose([0, 2, 1]).reshape([-1, self.embed_dim, Wh, Ww])
+
+ return x
+
+
+@register
+@serializable
+class FocalNet(nn.Layer):
+ """ FocalNet backbone
+ Args:
+ arch (str): Architecture of FocalNet
+ out_indices (Sequence[int]): Output from which stages.
+ frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+ -1 means not freezing any parameters.
+ patch_size (int | tuple(int)): Patch size. Default: 4.
+ in_chans (int): Number of input image channels. Default: 3.
+ embed_dim (int): Number of linear projection output channels. Default: 96.
+ depths (tuple[int]): Depths of each FocalNet Transformer stage.
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+ drop_rate (float): Dropout rate.
+ drop_path_rate (float): Stochastic depth rate. Default: 0.2.
+ norm_layer (nn.Layer): Normalization layer. Default: nn.LayerNorm.
+ patch_norm (bool): If True, add normalization after patch embedding. Default: True.
+ focal_levels (Sequence[int]): Number of focal levels at four stages
+ focal_windows (Sequence[int]): Focal window sizes at first focal level at four stages
+ use_conv_embed (bool): Whether use overlapped convolution for patch embedding
+ use_layerscale (bool): Whether use layerscale proposed in CaiT. Default: False
+ layerscale_value (float): Value of layerscale
+ use_postln (bool): Whether use layernorm after modulation. Default: False.
+ use_postln_in_modulation (bool): Whether use post-modulation layernorm. Default: False.
+ normalize_modulator (bool): Whether use normalize in modulator
+ use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+ """
+
+ def __init__(
+ self,
+ arch='focalnet_T_224_1k_srf',
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=-1,
+ patch_size=4,
+ in_chans=3,
+ embed_dim=96,
+ depths=[2, 2, 6, 2],
+ mlp_ratio=4.,
+ drop_rate=0.,
+ drop_path_rate=0.2, # 0.5 better for large+ models
+ norm_layer=nn.LayerNorm,
+ patch_norm=True,
+ focal_levels=[2, 2, 2, 2],
+ focal_windows=[3, 3, 3, 3],
+ use_conv_embed=False,
+ use_layerscale=False,
+ layerscale_value=1e-4,
+ use_postln=False,
+ use_postln_in_modulation=False,
+ normalize_modulator=False,
+ use_checkpoint=False,
+ pretrained=None):
+ super(FocalNet, self).__init__()
+ assert arch in MODEL_cfg.keys(), "Unsupported arch: {}".format(arch)
+
+ embed_dim = MODEL_cfg[arch]['embed_dim']
+ depths = MODEL_cfg[arch]['depths']
+ drop_path_rate = MODEL_cfg[arch]['drop_path_rate']
+ focal_levels = MODEL_cfg[arch]['focal_levels']
+ focal_windows = MODEL_cfg[arch]['focal_windows']
+ use_conv_embed = MODEL_cfg[arch]['use_conv_embed']
+ use_layerscale = MODEL_cfg[arch]['use_layerscale']
+ use_postln = MODEL_cfg[arch]['use_postln']
+ use_postln_in_modulation = MODEL_cfg[arch]['use_postln_in_modulation']
+ normalize_modulator = MODEL_cfg[arch]['normalize_modulator']
+ if pretrained is None:
+ pretrained = MODEL_cfg[arch]['pretrained']
+
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.num_layers = len(depths)
+ self.patch_norm = patch_norm
+
+ # split image into non-overlapping patches
+ self.patch_embed = PatchEmbed(
+ patch_size=patch_size,
+ in_chans=in_chans,
+ embed_dim=embed_dim,
+ norm_layer=norm_layer if self.patch_norm else None,
+ use_conv_embed=use_conv_embed,
+ is_stem=True)
+
+ self.pos_drop = nn.Dropout(p=drop_rate)
+
+ # stochastic depth decay rule
+ dpr = np.linspace(0, drop_path_rate, sum(depths))
+
+ # build layers
+ self.layers = nn.LayerList()
+ for i_layer in range(self.num_layers):
+ layer = BasicLayer(
+ dim=int(embed_dim * 2**i_layer),
+ depth=depths[i_layer],
+ mlp_ratio=mlp_ratio,
+ drop=drop_rate,
+ drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+ norm_layer=norm_layer,
+ downsample=PatchEmbed
+ if (i_layer < self.num_layers - 1) else None,
+ focal_level=focal_levels[i_layer],
+ focal_window=focal_windows[i_layer],
+ use_conv_embed=use_conv_embed,
+ use_layerscale=use_layerscale,
+ layerscale_value=layerscale_value,
+ use_postln=use_postln,
+ use_postln_in_modulation=use_postln_in_modulation,
+ normalize_modulator=normalize_modulator,
+ use_checkpoint=use_checkpoint)
+ self.layers.append(layer)
+
+ num_features = [int(embed_dim * 2**i) for i in range(self.num_layers)]
+ self.num_features = num_features
+
+ # add a norm layer for each output
+ for i_layer in out_indices:
+ layer = norm_layer(num_features[i_layer])
+ layer_name = f'norm{i_layer}'
+ self.add_sublayer(layer_name, layer)
+
+ self.apply(self._init_weights)
+ self._freeze_stages()
+ if pretrained:
+ if 'http' in pretrained: #URL
+ path = paddle.utils.download.get_weights_path_from_url(
+ pretrained)
+ else: #model in local path
+ path = pretrained
+ self.set_state_dict(paddle.load(path))
+
+ def _freeze_stages(self):
+ if self.frozen_stages >= 0:
+ self.patch_embed.eval()
+ for param in self.patch_embed.parameters():
+ param.stop_gradient = True
+
+ if self.frozen_stages >= 2:
+ self.pos_drop.eval()
+ for i in range(0, self.frozen_stages - 1):
+ m = self.layers[i]
+ m.eval()
+ for param in m.parameters():
+ param.stop_gradient = True
+
+ def _init_weights(self, m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ zeros_(m.bias)
+ elif isinstance(m, nn.LayerNorm):
+ zeros_(m.bias)
+ ones_(m.weight)
+
+ def forward(self, x):
+ x = self.patch_embed(x['image'])
+ B, _, Wh, Ww = x.shape
+ x = x.flatten(2).transpose([0, 2, 1])
+ x = self.pos_drop(x)
+ outs = []
+ for i in range(self.num_layers):
+ layer = self.layers[i]
+ x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww)
+ if i in self.out_indices:
+ norm_layer = getattr(self, f'norm{i}')
+ x_out = norm_layer(x_out)
+ out = x_out.reshape([-1, H, W, self.num_features[i]]).transpose(
+ (0, 3, 1, 2))
+ outs.append(out)
+
+ return outs
+
+ @property
+ def out_shape(self):
+ out_strides = [4, 8, 16, 32]
+ return [
+ ShapeSpec(
+ channels=self.num_features[i], stride=out_strides[i])
+ for i in self.out_indices
+ ]
From bb1ba0334df22a45208471439f74606c31681168 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Thu, 2 Mar 2023 16:51:45 +0800
Subject: [PATCH 030/116] To static (#7851)
* [TIPC] fcos add static train
* tinypose_128x96 add training static
* [TIPC] tinypose add static training
* disable maskrcnn static training
---
ppdet/engine/export_utils.py | 60 +++++++++++++++++++
ppdet/modeling/losses/fcos_loss.py | 18 +++---
...cos_r50_fpn_1x_coco_train_infer_python.txt | 4 +-
.../tinypose_128x96_train_infer_python.txt | 4 +-
4 files changed, 75 insertions(+), 11 deletions(-)
diff --git a/ppdet/engine/export_utils.py b/ppdet/engine/export_utils.py
index d7d2e883d2d..800c1faf498 100644
--- a/ppdet/engine/export_utils.py
+++ b/ppdet/engine/export_utils.py
@@ -82,6 +82,66 @@
'target2': paddle.static.InputSpec(
name='target2', shape=[-1, 3, 86, -1, -1], dtype='float32'),
}],
+ 'tinypose_128x96': [{
+ 'center': paddle.static.InputSpec(
+ name='center', shape=[-1, 2], dtype='float32'),
+ 'scale': paddle.static.InputSpec(
+ name='scale', shape=[-1, 2], dtype='float32'),
+ 'im_id': paddle.static.InputSpec(
+ name='im_id', shape=[-1, 1], dtype='float32'),
+ 'image': paddle.static.InputSpec(
+ name='image', shape=[-1, 3, 128, 96], dtype='float32'),
+ 'score': paddle.static.InputSpec(
+ name='score', shape=[-1], dtype='float32'),
+ 'rotate': paddle.static.InputSpec(
+ name='rotate', shape=[-1], dtype='float32'),
+ 'target': paddle.static.InputSpec(
+ name='target', shape=[-1, 17, 32, 24], dtype='float32'),
+ 'target_weight': paddle.static.InputSpec(
+ name='target_weight', shape=[-1, 17, 1], dtype='float32'),
+ }],
+ 'fcos_r50_fpn_1x_coco': [{
+ 'im_id': paddle.static.InputSpec(
+ name='im_id', shape=[-1, 1], dtype='float32'),
+ 'curr_iter': paddle.static.InputSpec(
+ name='curr_iter', shape=[-1], dtype='float32'),
+ 'image': paddle.static.InputSpec(
+ name='image', shape=[-1, 3, -1, -1], dtype='float32'),
+ 'im_shape': paddle.static.InputSpec(
+ name='im_shape', shape=[-1, 2], dtype='float32'),
+ 'scale_factor': paddle.static.InputSpec(
+ name='scale_factor', shape=[-1, 2], dtype='float32'),
+ 'reg_target0': paddle.static.InputSpec(
+ name='reg_target0', shape=[-1, 160, 160, 4], dtype='float32'),
+ 'labels0': paddle.static.InputSpec(
+ name='labels0', shape=[-1, 160, 160, 1], dtype='int32'),
+ 'centerness0': paddle.static.InputSpec(
+ name='centerness0', shape=[-1, 160, 160, 1], dtype='float32'),
+ 'reg_target1': paddle.static.InputSpec(
+ name='reg_target1', shape=[-1, 80, 80, 4], dtype='float32'),
+ 'labels1': paddle.static.InputSpec(
+ name='labels1', shape=[-1, 80, 80, 1], dtype='int32'),
+ 'centerness1': paddle.static.InputSpec(
+ name='centerness1', shape=[-1, 80, 80, 1], dtype='float32'),
+ 'reg_target2': paddle.static.InputSpec(
+ name='reg_target2', shape=[-1, 40, 40, 4], dtype='float32'),
+ 'labels2': paddle.static.InputSpec(
+ name='labels2', shape=[-1, 40, 40, 1], dtype='int32'),
+ 'centerness2': paddle.static.InputSpec(
+ name='centerness2', shape=[-1, 40, 40, 1], dtype='float32'),
+ 'reg_target3': paddle.static.InputSpec(
+ name='reg_target3', shape=[-1, 20, 20, 4], dtype='float32'),
+ 'labels3': paddle.static.InputSpec(
+ name='labels3', shape=[-1, 20, 20, 1], dtype='int32'),
+ 'centerness3': paddle.static.InputSpec(
+ name='centerness3', shape=[-1, 20, 20, 1], dtype='float32'),
+ 'reg_target4': paddle.static.InputSpec(
+ name='reg_target4', shape=[-1, 10, 10, 4], dtype='float32'),
+ 'labels4': paddle.static.InputSpec(
+ name='labels4', shape=[-1, 10, 10, 1], dtype='int32'),
+ 'centerness4': paddle.static.InputSpec(
+ name='centerness4', shape=[-1, 10, 10, 1], dtype='float32'),
+ }],
}
diff --git a/ppdet/modeling/losses/fcos_loss.py b/ppdet/modeling/losses/fcos_loss.py
index 6ff52bc2a59..b3eac7b4ecb 100644
--- a/ppdet/modeling/losses/fcos_loss.py
+++ b/ppdet/modeling/losses/fcos_loss.py
@@ -69,12 +69,12 @@ def __init__(self,
self.reg_weights = reg_weights
self.quality = quality
- def __iou_loss(self,
- pred,
- targets,
- positive_mask,
- weights=None,
- return_iou=False):
+ def _iou_loss(self,
+ pred,
+ targets,
+ positive_mask,
+ weights=None,
+ return_iou=False):
"""
Calculate the loss for location prediction
Args:
@@ -216,7 +216,7 @@ def forward(self, cls_logits, bboxes_reg, centerness, tag_labels,
# 2. bboxes_reg: giou_loss
mask_positive_float = paddle.squeeze(mask_positive_float, axis=-1)
tag_center_flatten = paddle.squeeze(tag_center_flatten, axis=-1)
- reg_loss = self.__iou_loss(
+ reg_loss = self._iou_loss(
bboxes_reg_flatten,
tag_bboxes_flatten,
mask_positive_float,
@@ -233,7 +233,7 @@ def forward(self, cls_logits, bboxes_reg, centerness, tag_labels,
# 2. bboxes_reg: giou_loss
mask_positive_float = paddle.squeeze(mask_positive_float, axis=-1)
tag_center_flatten = paddle.squeeze(tag_center_flatten, axis=-1)
- reg_loss = self.__iou_loss(
+ reg_loss = self._iou_loss(
bboxes_reg_flatten,
tag_bboxes_flatten,
mask_positive_float,
@@ -243,7 +243,7 @@ def forward(self, cls_logits, bboxes_reg, centerness, tag_labels,
# 3. centerness: sigmoid_cross_entropy_with_logits_loss
centerness_flatten = paddle.squeeze(centerness_flatten, axis=-1)
- gt_ious = self.__iou_loss(
+ gt_ious = self._iou_loss(
bboxes_reg_flatten,
tag_bboxes_flatten,
mask_positive_float,
diff --git a/test_tipc/configs/fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt b/test_tipc/configs/fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt
index 42426964832..f95e1a04032 100644
--- a/test_tipc/configs/fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt
@@ -57,4 +57,6 @@ repeat:3
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x800x1344_2.npy
\ No newline at end of file
+numpy_infer_input:3x800x1344_2.npy
+===========================to_static_train_benchmark_params===========================
+to_static_train:--to_static
\ No newline at end of file
diff --git a/test_tipc/configs/keypoint/tinypose_128x96_train_infer_python.txt b/test_tipc/configs/keypoint/tinypose_128x96_train_infer_python.txt
index 3040ed53757..7a61216a596 100644
--- a/test_tipc/configs/keypoint/tinypose_128x96_train_infer_python.txt
+++ b/test_tipc/configs/keypoint/tinypose_128x96_train_infer_python.txt
@@ -57,4 +57,6 @@ repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-random_infer_input:[{float32,[3,128,96]}]
\ No newline at end of file
+random_infer_input:[{float32,[3,128,96]}]
+===========================to_static_train_benchmark_params===========================
+to_static_train:--to_static
\ No newline at end of file
From b352ef88a8d9175504d7407fafa169eb0fbf28e7 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Fri, 3 Mar 2023 10:15:47 +0800
Subject: [PATCH 031/116] add picodet ppyoloe_crn_s_300e_coco static training
(#7859)
---
ppdet/engine/export_utils.py | 40 +++++++++++++++++++
ppdet/modeling/assigners/atss_assigner.py | 7 ++--
.../assigners/task_aligned_assigner.py | 2 +-
.../assigners/task_aligned_assigner_cr.py | 2 +-
ppdet/modeling/heads/pico_head.py | 4 +-
ppdet/modeling/heads/ppyoloe_contrast_head.py | 6 +--
ppdet/modeling/heads/ppyoloe_head.py | 29 +++++++-------
...et_s_320_coco_lcnet_train_infer_python.txt | 4 +-
...loe_crn_s_300e_coco_train_infer_python.txt | 2 +-
9 files changed, 69 insertions(+), 27 deletions(-)
diff --git a/ppdet/engine/export_utils.py b/ppdet/engine/export_utils.py
index 800c1faf498..882dd5af65e 100644
--- a/ppdet/engine/export_utils.py
+++ b/ppdet/engine/export_utils.py
@@ -142,6 +142,46 @@
'centerness4': paddle.static.InputSpec(
name='centerness4', shape=[-1, 10, 10, 1], dtype='float32'),
}],
+ 'picodet_s_320_coco_lcnet': [{
+ 'im_id': paddle.static.InputSpec(
+ name='im_id', shape=[-1, 1], dtype='float32'),
+ 'is_crowd': paddle.static.InputSpec(
+ name='is_crowd', shape=[-1, -1, 1], dtype='float32'),
+ 'gt_class': paddle.static.InputSpec(
+ name='gt_class', shape=[-1, -1, 1], dtype='int32'),
+ 'gt_bbox': paddle.static.InputSpec(
+ name='gt_bbox', shape=[-1, -1, 4], dtype='float32'),
+ 'curr_iter': paddle.static.InputSpec(
+ name='curr_iter', shape=[-1], dtype='float32'),
+ 'image': paddle.static.InputSpec(
+ name='image', shape=[-1, 3, -1, -1], dtype='float32'),
+ 'im_shape': paddle.static.InputSpec(
+ name='im_shape', shape=[-1, 2], dtype='float32'),
+ 'scale_factor': paddle.static.InputSpec(
+ name='scale_factor', shape=[-1, 2], dtype='float32'),
+ 'pad_gt_mask': paddle.static.InputSpec(
+ name='pad_gt_mask', shape=[-1, -1, 1], dtype='float32'),
+ }],
+ 'ppyoloe_crn_s_300e_coco': [{
+ 'im_id': paddle.static.InputSpec(
+ name='im_id', shape=[-1, 1], dtype='float32'),
+ 'is_crowd': paddle.static.InputSpec(
+ name='is_crowd', shape=[-1, -1, 1], dtype='float32'),
+ 'gt_class': paddle.static.InputSpec(
+ name='gt_class', shape=[-1, -1, 1], dtype='int32'),
+ 'gt_bbox': paddle.static.InputSpec(
+ name='gt_bbox', shape=[-1, -1, 4], dtype='float32'),
+ 'curr_iter': paddle.static.InputSpec(
+ name='curr_iter', shape=[-1], dtype='float32'),
+ 'image': paddle.static.InputSpec(
+ name='image', shape=[-1, 3, -1, -1], dtype='float32'),
+ 'im_shape': paddle.static.InputSpec(
+ name='im_shape', shape=[-1, 2], dtype='float32'),
+ 'scale_factor': paddle.static.InputSpec(
+ name='scale_factor', shape=[-1, 2], dtype='float32'),
+ 'pad_gt_mask': paddle.static.InputSpec(
+ name='pad_gt_mask', shape=[-1, -1, 1], dtype='float32'),
+ }],
}
diff --git a/ppdet/modeling/assigners/atss_assigner.py b/ppdet/modeling/assigners/atss_assigner.py
index a1e753c9434..f1aae2bebc8 100644
--- a/ppdet/modeling/assigners/atss_assigner.py
+++ b/ppdet/modeling/assigners/atss_assigner.py
@@ -169,8 +169,9 @@ def forward(self,
# the one with the highest iou will be selected.
mask_positive_sum = mask_positive.sum(axis=-2)
if mask_positive_sum.max() > 1:
- mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
- [1, num_max_boxes, 1])
+ mask_multiple_gts = (
+ mask_positive_sum.unsqueeze(1) > 1).astype('int32').tile(
+ [1, num_max_boxes, 1]).astype('bool')
if self.sm_use:
is_max_iou = compute_max_iou_anchor(ious * mask_positive)
else:
@@ -221,4 +222,4 @@ def forward(self,
paddle.zeros_like(gather_scores))
assigned_scores *= gather_scores.unsqueeze(-1)
- return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
+ return assigned_labels, assigned_bboxes, assigned_scores
diff --git a/ppdet/modeling/assigners/task_aligned_assigner.py b/ppdet/modeling/assigners/task_aligned_assigner.py
index 5a756fa67da..23af79439ae 100644
--- a/ppdet/modeling/assigners/task_aligned_assigner.py
+++ b/ppdet/modeling/assigners/task_aligned_assigner.py
@@ -190,4 +190,4 @@ def forward(self,
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
assigned_scores = assigned_scores * alignment_metrics
- return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
+ return assigned_labels, assigned_bboxes, assigned_scores
diff --git a/ppdet/modeling/assigners/task_aligned_assigner_cr.py b/ppdet/modeling/assigners/task_aligned_assigner_cr.py
index 4558d6e8ec7..5c5097604d5 100644
--- a/ppdet/modeling/assigners/task_aligned_assigner_cr.py
+++ b/ppdet/modeling/assigners/task_aligned_assigner_cr.py
@@ -178,4 +178,4 @@ def forward(self,
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
assigned_scores = assigned_scores * alignment_metrics
- return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
+ return assigned_labels, assigned_bboxes, assigned_scores
diff --git a/ppdet/modeling/heads/pico_head.py b/ppdet/modeling/heads/pico_head.py
index e5232239910..adcd05fc6b2 100644
--- a/ppdet/modeling/heads/pico_head.py
+++ b/ppdet/modeling/heads/pico_head.py
@@ -651,7 +651,7 @@ def get_loss(self, head_outs, gt_meta):
# label assignment
if gt_meta['epoch_id'] < self.static_assigner_epoch:
- assigned_labels, assigned_bboxes, assigned_scores, _ = self.static_assigner(
+ assigned_labels, assigned_bboxes, assigned_scores = self.static_assigner(
anchors,
num_anchors_list,
gt_labels,
@@ -662,7 +662,7 @@ def get_loss(self, head_outs, gt_meta):
pred_bboxes=pred_bboxes.detach() * stride_tensor_list)
else:
- assigned_labels, assigned_bboxes, assigned_scores, _ = self.assigner(
+ assigned_labels, assigned_bboxes, assigned_scores = self.assigner(
pred_scores.detach(),
pred_bboxes.detach() * stride_tensor_list,
centers,
diff --git a/ppdet/modeling/heads/ppyoloe_contrast_head.py b/ppdet/modeling/heads/ppyoloe_contrast_head.py
index 3b8e26e63d0..8732c2ce32a 100644
--- a/ppdet/modeling/heads/ppyoloe_contrast_head.py
+++ b/ppdet/modeling/heads/ppyoloe_contrast_head.py
@@ -121,7 +121,7 @@ def get_loss(self, head_outs, gt_meta):
pad_gt_mask = gt_meta['pad_gt_mask']
# label assignment
if gt_meta['epoch_id'] < self.static_assigner_epoch:
- assigned_labels, assigned_bboxes, assigned_scores, _ = \
+ assigned_labels, assigned_bboxes, assigned_scores = \
self.static_assigner(
anchors,
num_anchors_list,
@@ -133,7 +133,7 @@ def get_loss(self, head_outs, gt_meta):
alpha_l = 0.25
else:
if self.sm_use:
- assigned_labels, assigned_bboxes, assigned_scores, _ = \
+ assigned_labels, assigned_bboxes, assigned_scores = \
self.assigner(
pred_scores.detach(),
pred_bboxes.detach() * stride_tensor,
@@ -144,7 +144,7 @@ def get_loss(self, head_outs, gt_meta):
pad_gt_mask,
bg_index=self.num_classes)
else:
- assigned_labels, assigned_bboxes, assigned_scores, _ = \
+ assigned_labels, assigned_bboxes, assigned_scores = \
self.assigner(
pred_scores.detach(),
pred_bboxes.detach() * stride_tensor,
diff --git a/ppdet/modeling/heads/ppyoloe_head.py b/ppdet/modeling/heads/ppyoloe_head.py
index 261c0c4933b..80f1bc479e7 100644
--- a/ppdet/modeling/heads/ppyoloe_head.py
+++ b/ppdet/modeling/heads/ppyoloe_head.py
@@ -337,7 +337,8 @@ def _bbox_loss(self, pred_dist, pred_bboxes, anchor_points, assigned_labels,
# pos/neg loss
if num_pos > 0:
# l1 + iou
- bbox_mask = mask_positive.unsqueeze(-1).tile([1, 1, 4])
+ bbox_mask = mask_positive.astype('int32').unsqueeze(-1).tile(
+ [1, 1, 4]).astype('bool')
pred_bboxes_pos = paddle.masked_select(pred_bboxes,
bbox_mask).reshape([-1, 4])
assigned_bboxes_pos = paddle.masked_select(
@@ -351,8 +352,8 @@ def _bbox_loss(self, pred_dist, pred_bboxes, anchor_points, assigned_labels,
assigned_bboxes_pos) * bbox_weight
loss_iou = loss_iou.sum() / assigned_scores_sum
- dist_mask = mask_positive.unsqueeze(-1).tile(
- [1, 1, self.reg_channels * 4])
+ dist_mask = mask_positive.unsqueeze(-1).astype('int32').tile(
+ [1, 1, self.reg_channels * 4]).astype('bool')
pred_dist_pos = paddle.masked_select(
pred_dist, dist_mask).reshape([-1, 4, self.reg_channels])
assigned_ltrb = self._bbox2distance(anchor_points, assigned_bboxes)
@@ -387,7 +388,7 @@ def get_loss(self, head_outs, gt_meta, aux_pred=None):
pad_gt_mask = gt_meta['pad_gt_mask']
# label assignment
if gt_meta['epoch_id'] < self.static_assigner_epoch:
- assigned_labels, assigned_bboxes, assigned_scores, mask_positive = \
+ assigned_labels, assigned_bboxes, assigned_scores = \
self.static_assigner(
anchors,
num_anchors_list,
@@ -400,7 +401,7 @@ def get_loss(self, head_outs, gt_meta, aux_pred=None):
else:
if self.sm_use:
# only used in smalldet of PPYOLOE-SOD model
- assigned_labels, assigned_bboxes, assigned_scores, mask_positive = \
+ assigned_labels, assigned_bboxes, assigned_scores = \
self.assigner(
pred_scores.detach(),
pred_bboxes.detach() * stride_tensor,
@@ -413,7 +414,7 @@ def get_loss(self, head_outs, gt_meta, aux_pred=None):
else:
if aux_pred is None:
if not hasattr(self, "assigned_labels"):
- assigned_labels, assigned_bboxes, assigned_scores, mask_positive = \
+ assigned_labels, assigned_bboxes, assigned_scores = \
self.assigner(
pred_scores.detach(),
pred_bboxes.detach() * stride_tensor,
@@ -427,15 +428,15 @@ def get_loss(self, head_outs, gt_meta, aux_pred=None):
self.assigned_labels = assigned_labels
self.assigned_bboxes = assigned_bboxes
self.assigned_scores = assigned_scores
- self.mask_positive = mask_positive
+
else:
# only used in distill
assigned_labels = self.assigned_labels
assigned_bboxes = self.assigned_bboxes
assigned_scores = self.assigned_scores
- mask_positive = self.mask_positive
+
else:
- assigned_labels, assigned_bboxes, assigned_scores, mask_positive = \
+ assigned_labels, assigned_bboxes, assigned_scores = \
self.assigner(
pred_scores_aux.detach(),
pred_bboxes_aux.detach() * stride_tensor,
@@ -451,14 +452,12 @@ def get_loss(self, head_outs, gt_meta, aux_pred=None):
assign_out_dict = self.get_loss_from_assign(
pred_scores, pred_distri, pred_bboxes, anchor_points_s,
- assigned_labels, assigned_bboxes, assigned_scores, mask_positive,
- alpha_l)
+ assigned_labels, assigned_bboxes, assigned_scores, alpha_l)
if aux_pred is not None:
assign_out_dict_aux = self.get_loss_from_assign(
aux_pred[0], aux_pred[1], pred_bboxes_aux, anchor_points_s,
- assigned_labels, assigned_bboxes, assigned_scores,
- mask_positive, alpha_l)
+ assigned_labels, assigned_bboxes, assigned_scores, alpha_l)
loss = {}
for key in assign_out_dict.keys():
loss[key] = assign_out_dict[key] + assign_out_dict_aux[key]
@@ -469,7 +468,7 @@ def get_loss(self, head_outs, gt_meta, aux_pred=None):
def get_loss_from_assign(self, pred_scores, pred_distri, pred_bboxes,
anchor_points_s, assigned_labels, assigned_bboxes,
- assigned_scores, mask_positive, alpha_l):
+ assigned_scores, alpha_l):
# cls loss
if self.use_varifocal_loss:
one_hot_label = F.one_hot(assigned_labels,
@@ -490,7 +489,7 @@ def get_loss_from_assign(self, pred_scores, pred_distri, pred_bboxes,
self.distill_pairs['pred_cls_scores'] = pred_scores
self.distill_pairs['pos_num'] = assigned_scores_sum
self.distill_pairs['assigned_scores'] = assigned_scores
- self.distill_pairs['mask_positive'] = mask_positive
+
one_hot_label = F.one_hot(assigned_labels,
self.num_classes + 1)[..., :-1]
self.distill_pairs['target_labels'] = one_hot_label
diff --git a/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_infer_python.txt b/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_infer_python.txt
index f51595a9e77..57e7e3c3cb9 100644
--- a/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_infer_python.txt
+++ b/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_infer_python.txt
@@ -57,4 +57,6 @@ repeat:25
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x320x320_2.npy
\ No newline at end of file
+numpy_infer_input:3x320x320_2.npy
+===========================to_static_train_benchmark_params===========================
+to_static_train:--to_static
\ No newline at end of file
diff --git a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
index 0694895ea60..19fa1673b0f 100644
--- a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
+++ b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
@@ -57,4 +57,4 @@ repeat:12
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
-numpy_infer_input:3x640x640_2.npy
\ No newline at end of file
+numpy_infer_input:3x640x640_2.npy
From d48a4bb9cfb61d46ba57974164bdf2d6249f0208 Mon Sep 17 00:00:00 2001
From: Zhao-Yian <77494834+Zhao-Yian@users.noreply.github.com>
Date: Mon, 6 Mar 2023 11:19:32 +0800
Subject: [PATCH 032/116] add group detr for dino (#7865)
---
.../group_detr/_base_/dino_2000_reader.yml | 48 +
configs/group_detr/_base_/dino_reader.yml | 48 +
configs/group_detr/_base_/group_dino_r50.yml | 53 ++
.../group_detr/_base_/group_dino_vit_huge.yml | 68 ++
configs/group_detr/_base_/optimizer_1x.yml | 16 +
.../group_dino_r50_4scale_1x_coco.yml | 11 +
.../group_dino_vit_huge_4scale_1x_coco.yml | 11 +
ppdet/modeling/architectures/detr.py | 13 +-
ppdet/modeling/backbones/__init__.py | 2 +
ppdet/modeling/backbones/transformer_utils.py | 50 +
ppdet/modeling/backbones/vit_mae.py | 749 +++++++++++++++
ppdet/modeling/heads/detr_head.py | 65 +-
ppdet/modeling/initializer.py | 3 +-
ppdet/modeling/post_process.py | 11 +-
ppdet/modeling/transformers/__init__.py | 2 +
.../transformers/group_detr_transformer.py | 857 ++++++++++++++++++
16 files changed, 2000 insertions(+), 7 deletions(-)
create mode 100644 configs/group_detr/_base_/dino_2000_reader.yml
create mode 100644 configs/group_detr/_base_/dino_reader.yml
create mode 100644 configs/group_detr/_base_/group_dino_r50.yml
create mode 100644 configs/group_detr/_base_/group_dino_vit_huge.yml
create mode 100644 configs/group_detr/_base_/optimizer_1x.yml
create mode 100644 configs/group_detr/group_dino_r50_4scale_1x_coco.yml
create mode 100644 configs/group_detr/group_dino_vit_huge_4scale_1x_coco.yml
create mode 100644 ppdet/modeling/backbones/vit_mae.py
create mode 100644 ppdet/modeling/transformers/group_detr_transformer.py
diff --git a/configs/group_detr/_base_/dino_2000_reader.yml b/configs/group_detr/_base_/dino_2000_reader.yml
new file mode 100644
index 00000000000..ef7620eb856
--- /dev/null
+++ b/configs/group_detr/_base_/dino_2000_reader.yml
@@ -0,0 +1,48 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomFlip: {prob: 0.5}
+ - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184], max_size: 2000 } ],
+ transforms2: [
+ RandomShortSideResize: { short_side_sizes: [400, 500, 600, 700, 800, 900] },
+ RandomSizeCrop: { min_size: 384, max_size: 900 },
+ RandomShortSideResize: { short_side_sizes: [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184], max_size: 2000 } ]
+ }
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - NormalizeBox: {}
+ - BboxXYXY2XYWH: {}
+ - Permute: {}
+ batch_transforms:
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+ collate_batch: false
+ use_shared_memory: false
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [1184, 2000], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [1184, 2000], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/group_detr/_base_/dino_reader.yml b/configs/group_detr/_base_/dino_reader.yml
new file mode 100644
index 00000000000..c15a0f3b639
--- /dev/null
+++ b/configs/group_detr/_base_/dino_reader.yml
@@ -0,0 +1,48 @@
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomFlip: {prob: 0.5}
+ - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+ transforms2: [
+ RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+ RandomSizeCrop: { min_size: 384, max_size: 600 },
+ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+ }
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - NormalizeBox: {}
+ - BboxXYXY2XYWH: {}
+ - Permute: {}
+ batch_transforms:
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+ collate_batch: false
+ use_shared_memory: false
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/group_detr/_base_/group_dino_r50.yml b/configs/group_detr/_base_/group_dino_r50.yml
new file mode 100644
index 00000000000..587f7f51943
--- /dev/null
+++ b/configs/group_detr/_base_/group_dino_r50.yml
@@ -0,0 +1,53 @@
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+hidden_dim: 256
+use_focal_loss: True
+
+
+DETR:
+ backbone: ResNet
+ transformer: GroupDINOTransformer
+ detr_head: DINOHead
+ post_process: DETRBBoxPostProcess
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [1, 2, 3]
+ lr_mult_list: [0.0, 0.1, 0.1, 0.1]
+ num_stages: 4
+
+GroupDINOTransformer:
+ num_queries: 900
+ position_embed_type: sine
+ num_levels: 4
+ nhead: 8
+ num_encoder_layers: 6
+ num_decoder_layers: 6
+ dim_feedforward: 2048
+ dropout: 0.0
+ activation: relu
+ pe_temperature: 20
+ pe_offset: 0.0
+ num_denoising: 100
+ label_noise_ratio: 0.5
+ box_noise_scale: 1.0
+ learnt_init_query: True
+ dual_queries: True
+ dual_groups: 10
+
+DINOHead:
+ loss:
+ name: DINOLoss
+ loss_coeff: {class: 1, bbox: 5, giou: 2}
+ aux_loss: True
+ matcher:
+ name: HungarianMatcher
+ matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRBBoxPostProcess:
+ num_top_queries: 300
+ dual_queries: True
+ dual_groups: 10
diff --git a/configs/group_detr/_base_/group_dino_vit_huge.yml b/configs/group_detr/_base_/group_dino_vit_huge.yml
new file mode 100644
index 00000000000..8849f8a2d6f
--- /dev/null
+++ b/configs/group_detr/_base_/group_dino_vit_huge.yml
@@ -0,0 +1,68 @@
+architecture: DETR
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/vit_huge_mae_patch14_dec512d8b_pretrained.pdparams
+hidden_dim: 256
+use_focal_loss: True
+
+DETR:
+ backbone: VisionTransformer2D
+ neck: SimpleFeaturePyramid
+ transformer: GroupDINOTransformer
+ detr_head: DINOHead
+ post_process: DETRBBoxPostProcess
+
+VisionTransformer2D:
+ patch_size: 16
+ embed_dim: 1280
+ depth: 32
+ num_heads: 16
+ mlp_ratio: 4
+ attn_bias: True
+ drop_rate: 0.0
+ drop_path_rate: 0.1
+ lr_decay_rate: 0.7
+ global_attn_indexes: [7, 15, 23, 31]
+ use_abs_pos: False
+ use_rel_pos: True
+ rel_pos_zero_init: True
+ window_size: 14
+ out_indices: [ 31, ]
+
+SimpleFeaturePyramid:
+ out_channels: 256
+ num_levels: 4
+
+GroupDINOTransformer:
+ num_queries: 900
+ position_embed_type: sine
+ pe_temperature: 20
+ pe_offset: 0.0
+ num_levels: 4
+ nhead: 8
+ num_encoder_layers: 6
+ num_decoder_layers: 6
+ dim_feedforward: 2048
+ use_input_proj: False
+ dropout: 0.0
+ activation: relu
+ num_denoising: 100
+ label_noise_ratio: 0.5
+ box_noise_scale: 1.0
+ learnt_init_query: True
+ dual_queries: True
+ dual_groups: 10
+
+
+DINOHead:
+ loss:
+ name: DINOLoss
+ loss_coeff: {class: 1, bbox: 5, giou: 2}
+ aux_loss: True
+ matcher:
+ name: HungarianMatcher
+ matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+
+DETRBBoxPostProcess:
+ num_top_queries: 300
+ dual_queries: True
+ dual_groups: 10
diff --git a/configs/group_detr/_base_/optimizer_1x.yml b/configs/group_detr/_base_/optimizer_1x.yml
new file mode 100644
index 00000000000..63b3a9ed279
--- /dev/null
+++ b/configs/group_detr/_base_/optimizer_1x.yml
@@ -0,0 +1,16 @@
+epoch: 12
+
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [11]
+ use_warmup: false
+
+OptimizerBuilder:
+ clip_grad_by_norm: 0.1
+ regularizer: false
+ optimizer:
+ type: AdamW
+ weight_decay: 0.0001
diff --git a/configs/group_detr/group_dino_r50_4scale_1x_coco.yml b/configs/group_detr/group_dino_r50_4scale_1x_coco.yml
new file mode 100644
index 00000000000..1f38c690d3e
--- /dev/null
+++ b/configs/group_detr/group_dino_r50_4scale_1x_coco.yml
@@ -0,0 +1,11 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/group_dino_r50.yml',
+ '_base_/dino_reader.yml',
+]
+
+weights: output/group_dino_r50_4scale_1x_coco/model_final
+find_unused_parameters: True
+log_iter: 100
diff --git a/configs/group_detr/group_dino_vit_huge_4scale_1x_coco.yml b/configs/group_detr/group_dino_vit_huge_4scale_1x_coco.yml
new file mode 100644
index 00000000000..90d0c483e63
--- /dev/null
+++ b/configs/group_detr/group_dino_vit_huge_4scale_1x_coco.yml
@@ -0,0 +1,11 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_1x.yml',
+ '_base_/group_dino_vit_huge.yml',
+ '_base_/dino_2000_reader.yml',
+]
+
+weights: output/group_dino_vit_huge_4scale_1x_coco/model_final
+find_unused_parameters: True
+log_iter: 100
diff --git a/ppdet/modeling/architectures/detr.py b/ppdet/modeling/architectures/detr.py
index 419a44377b8..223eeda89cb 100644
--- a/ppdet/modeling/architectures/detr.py
+++ b/ppdet/modeling/architectures/detr.py
@@ -34,10 +34,12 @@ def __init__(self,
backbone,
transformer='DETRTransformer',
detr_head='DETRHead',
+ neck=None,
post_process='DETRBBoxPostProcess',
exclude_post_process=False):
super(DETR, self).__init__()
self.backbone = backbone
+ self.neck = neck
self.transformer = transformer
self.detr_head = detr_head
self.post_process = post_process
@@ -47,8 +49,12 @@ def __init__(self,
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
- # transformer
+ # neck
kwargs = {'input_shape': backbone.out_shape}
+ neck = create(cfg['neck'], **kwargs) if cfg['neck'] else None
+ # transformer
+ if neck is not None:
+ kwargs = {'input_shape': neck.out_shape}
transformer = create(cfg['transformer'], **kwargs)
# head
kwargs = {
@@ -62,12 +68,17 @@ def from_config(cls, cfg, *args, **kwargs):
'backbone': backbone,
'transformer': transformer,
"detr_head": detr_head,
+ "neck": neck
}
def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs)
+ # Neck
+ if self.neck is not None:
+ body_feats = self.neck(body_feats)
+
# Transformer
pad_mask = self.inputs.get('pad_mask', None)
out_transformer = self.transformer(body_feats, pad_mask, self.inputs)
diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py
index 388ba04583b..f8b183e27bd 100644
--- a/ppdet/modeling/backbones/__init__.py
+++ b/ppdet/modeling/backbones/__init__.py
@@ -36,6 +36,7 @@
from . import mobileone
from . import trans_encoder
from . import focalnet
+from . import vit_mae
from .vgg import *
from .resnet import *
@@ -61,3 +62,4 @@
from .mobileone import *
from .trans_encoder import *
from .focalnet import *
+from .vit_mae import *
diff --git a/ppdet/modeling/backbones/transformer_utils.py b/ppdet/modeling/backbones/transformer_utils.py
index 46d7b9f28e6..a0783e1e99a 100644
--- a/ppdet/modeling/backbones/transformer_utils.py
+++ b/ppdet/modeling/backbones/transformer_utils.py
@@ -14,6 +14,7 @@
import paddle
import paddle.nn as nn
+import paddle.nn.functional as F
from paddle.nn.initializer import TruncatedNormal, Constant, Assign
@@ -72,3 +73,52 @@ def add_parameter(layer, datas, name=None):
if name:
layer.add_parameter(name, parameter)
return parameter
+
+
+def window_partition(x, window_size):
+ """
+ Partition into non-overlapping windows with padding if needed.
+ Args:
+ x (tensor): input tokens with [B, H, W, C].
+ window_size (int): window size.
+ Returns:
+ windows: windows after partition with [B * num_windows, window_size, window_size, C].
+ (Hp, Wp): padded height and width before partition
+ """
+ B, H, W, C = paddle.shape(x)
+
+ pad_h = (window_size - H % window_size) % window_size
+ pad_w = (window_size - W % window_size) % window_size
+ x = F.pad(x.transpose([0, 3, 1, 2]),
+ paddle.to_tensor(
+ [0, int(pad_w), 0, int(pad_h)],
+ dtype='int32')).transpose([0, 2, 3, 1])
+ Hp, Wp = H + pad_h, W + pad_w
+
+ num_h, num_w = Hp // window_size, Wp // window_size
+
+ x = x.reshape([B, num_h, window_size, num_w, window_size, C])
+ windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape(
+ [-1, window_size, window_size, C])
+ return windows, (Hp, Wp), (num_h, num_w)
+
+
+def window_unpartition(x, pad_hw, num_hw, hw):
+ """
+ Window unpartition into original sequences and removing padding.
+ Args:
+ x (tensor): input tokens with [B * num_windows, window_size, window_size, C].
+ pad_hw (Tuple): padded height and width (Hp, Wp).
+ hw (Tuple): original height and width (H, W) before padding.
+ Returns:
+ x: unpartitioned sequences with [B, H, W, C].
+ """
+ Hp, Wp = pad_hw
+ num_h, num_w = num_hw
+ H, W = hw
+ B, window_size, _, C = paddle.shape(x)
+ B = B // (num_h * num_w)
+ x = x.reshape([B, num_h, num_w, window_size, window_size, C])
+ x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, Hp, Wp, C])
+
+ return x[:, :H, :W, :]
diff --git a/ppdet/modeling/backbones/vit_mae.py b/ppdet/modeling/backbones/vit_mae.py
new file mode 100644
index 00000000000..8d00da72b54
--- /dev/null
+++ b/ppdet/modeling/backbones/vit_mae.py
@@ -0,0 +1,749 @@
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import numpy as np
+import math
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import Constant, TruncatedNormal
+
+from ppdet.modeling.shape_spec import ShapeSpec
+from ppdet.core.workspace import register, serializable
+
+from .transformer_utils import (zeros_, DropPath, Identity, window_partition,
+ window_unpartition)
+from ..initializer import linear_init_
+
+__all__ = ['VisionTransformer2D', 'SimpleFeaturePyramid']
+
+
+class Mlp(nn.Layer):
+ def __init__(self,
+ in_features,
+ hidden_features=None,
+ out_features=None,
+ act_layer='nn.GELU',
+ drop=0.,
+ lr_factor=1.0):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(
+ in_features,
+ hidden_features,
+ weight_attr=ParamAttr(learning_rate=lr_factor),
+ bias_attr=ParamAttr(learning_rate=lr_factor))
+ self.act = eval(act_layer)()
+ self.fc2 = nn.Linear(
+ hidden_features,
+ out_features,
+ weight_attr=ParamAttr(learning_rate=lr_factor),
+ bias_attr=ParamAttr(learning_rate=lr_factor))
+ self.drop = nn.Dropout(drop)
+
+ self._init_weights()
+
+ def _init_weights(self):
+ linear_init_(self.fc1)
+ linear_init_(self.fc2)
+
+ def forward(self, x):
+ x = self.drop(self.act(self.fc1(x)))
+ x = self.drop(self.fc2(x))
+ return x
+
+
+class Attention(nn.Layer):
+ def __init__(self,
+ dim,
+ num_heads=8,
+ qkv_bias=False,
+ attn_bias=False,
+ attn_drop=0.,
+ proj_drop=0.,
+ use_rel_pos=False,
+ rel_pos_zero_init=True,
+ window_size=None,
+ input_size=None,
+ qk_scale=None,
+ lr_factor=1.0):
+ super().__init__()
+ self.num_heads = num_heads
+ self.head_dim = dim // num_heads
+ self.scale = qk_scale or self.head_dim**-0.5
+ self.use_rel_pos = use_rel_pos
+ self.input_size = input_size
+ self.rel_pos_zero_init = rel_pos_zero_init
+ self.window_size = window_size
+ self.lr_factor = lr_factor
+
+ self.qkv = nn.Linear(
+ dim,
+ dim * 3,
+ weight_attr=ParamAttr(learning_rate=lr_factor),
+ bias_attr=ParamAttr(learning_rate=lr_factor)
+ if attn_bias else False)
+ if qkv_bias:
+ self.q_bias = self.create_parameter(
+ shape=([dim]), default_initializer=zeros_)
+ self.v_bias = self.create_parameter(
+ shape=([dim]), default_initializer=zeros_)
+ else:
+ self.q_bias = None
+ self.v_bias = None
+ self.proj = nn.Linear(
+ dim,
+ dim,
+ weight_attr=ParamAttr(learning_rate=lr_factor),
+ bias_attr=ParamAttr(learning_rate=lr_factor))
+ self.attn_drop = nn.Dropout(attn_drop)
+ if window_size is None:
+ self.window_size = self.input_size[0]
+
+ self._init_weights()
+
+ def _init_weights(self):
+ linear_init_(self.qkv)
+ linear_init_(self.proj)
+
+ if self.use_rel_pos:
+ self.rel_pos_h = self.create_parameter(
+ [2 * self.window_size - 1, self.head_dim],
+ attr=ParamAttr(learning_rate=self.lr_factor),
+ default_initializer=Constant(value=0.))
+ self.rel_pos_w = self.create_parameter(
+ [2 * self.window_size - 1, self.head_dim],
+ attr=ParamAttr(learning_rate=self.lr_factor),
+ default_initializer=Constant(value=0.))
+
+ if not self.rel_pos_zero_init:
+ TruncatedNormal(self.rel_pos_h, std=0.02)
+ TruncatedNormal(self.rel_pos_w, std=0.02)
+
+ def get_rel_pos(self, seq_size, rel_pos):
+ max_rel_dist = int(2 * seq_size - 1)
+ # Interpolate rel pos if needed.
+ if rel_pos.shape[0] != max_rel_dist:
+ # Interpolate rel pos.
+ rel_pos = rel_pos.reshape([1, rel_pos.shape[0], -1])
+ rel_pos = rel_pos.transpose([0, 2, 1])
+ rel_pos_resized = F.interpolate(
+ rel_pos,
+ size=(max_rel_dist, ),
+ mode="linear",
+ data_format='NCW')
+ rel_pos_resized = rel_pos_resized.reshape([-1, max_rel_dist])
+ rel_pos_resized = rel_pos_resized.transpose([1, 0])
+ else:
+ rel_pos_resized = rel_pos
+
+ coords = paddle.arange(seq_size, dtype='float32')
+ relative_coords = coords.unsqueeze(-1) - coords.unsqueeze(0)
+ relative_coords += (seq_size - 1)
+ relative_coords = relative_coords.astype('int64').flatten()
+
+ return paddle.index_select(rel_pos_resized, relative_coords).reshape(
+ [seq_size, seq_size, self.head_dim])
+
+ def add_decomposed_rel_pos(self, attn, q, h, w):
+ """
+ Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`.
+ Args:
+ attn (Tensor): attention map.
+ q (Tensor): query q in the attention layer with shape (B, q_h * q_w, C).
+ Returns:
+ attn (Tensor): attention map with added relative positional embeddings.
+ """
+ Rh = self.get_rel_pos(h, self.rel_pos_h)
+ Rw = self.get_rel_pos(w, self.rel_pos_w)
+
+ B, _, dim = q.shape
+ r_q = q.reshape([B, h, w, dim])
+ # bhwc, hch->bhwh1
+ # bwhc, wcw->bhw1w
+ rel_h = paddle.einsum("bhwc,hkc->bhwk", r_q, Rh).unsqueeze(-1)
+ rel_w = paddle.einsum("bhwc,wkc->bhwk", r_q, Rw).unsqueeze(-2)
+
+ attn = attn.reshape([B, h, w, h, w]) + rel_h + rel_w
+ return attn.reshape([B, h * w, h * w])
+
+ def forward(self, x):
+ B, H, W, C = paddle.shape(x)
+
+ if self.q_bias is not None:
+ qkv_bias = paddle.concat(
+ (self.q_bias, paddle.zeros_like(self.v_bias), self.v_bias))
+ qkv = F.linear(x, weight=self.qkv.weight, bias=qkv_bias)
+ else:
+ qkv = self.qkv(x).reshape(
+ [B, H * W, 3, self.num_heads, self.head_dim]).transpose(
+ [2, 0, 3, 1, 4]).reshape(
+ [3, B * self.num_heads, H * W, self.head_dim])
+
+ q, k, v = qkv[0], qkv[1], qkv[2]
+ attn = q.matmul(k.transpose([0, 2, 1])) * self.scale
+
+ if self.use_rel_pos:
+ attn = self.add_decomposed_rel_pos(attn, q, H, W)
+
+ attn = F.softmax(attn, axis=-1)
+ attn = self.attn_drop(attn)
+ x = attn.matmul(v).reshape(
+ [B, self.num_heads, H * W, self.head_dim]).transpose(
+ [0, 2, 1, 3]).reshape([B, H, W, C])
+ x = self.proj(x)
+ return x
+
+
+class Block(nn.Layer):
+ def __init__(self,
+ dim,
+ num_heads,
+ mlp_ratio=4.,
+ qkv_bias=False,
+ attn_bias=False,
+ qk_scale=None,
+ init_values=None,
+ drop=0.,
+ attn_drop=0.,
+ drop_path=0.,
+ use_rel_pos=True,
+ rel_pos_zero_init=True,
+ window_size=None,
+ input_size=None,
+ act_layer='nn.GELU',
+ norm_layer='nn.LayerNorm',
+ lr_factor=1.0,
+ epsilon=1e-5):
+ super().__init__()
+ self.window_size = window_size
+
+ self.norm1 = eval(norm_layer)(dim,
+ weight_attr=ParamAttr(
+ learning_rate=lr_factor,
+ regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(
+ learning_rate=lr_factor,
+ regularizer=L2Decay(0.0)),
+ epsilon=epsilon)
+ self.attn = Attention(
+ dim,
+ num_heads=num_heads,
+ qkv_bias=qkv_bias,
+ attn_bias=attn_bias,
+ qk_scale=qk_scale,
+ attn_drop=attn_drop,
+ proj_drop=drop,
+ use_rel_pos=use_rel_pos,
+ rel_pos_zero_init=rel_pos_zero_init,
+ window_size=window_size,
+ input_size=input_size,
+ lr_factor=lr_factor)
+
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+ self.norm2 = eval(norm_layer)(dim,
+ weight_attr=ParamAttr(
+ learning_rate=lr_factor,
+ regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(
+ learning_rate=lr_factor,
+ regularizer=L2Decay(0.0)),
+ epsilon=epsilon)
+ self.mlp = Mlp(in_features=dim,
+ hidden_features=int(dim * mlp_ratio),
+ act_layer=act_layer,
+ drop=drop,
+ lr_factor=lr_factor)
+ if init_values is not None:
+ self.gamma_1 = self.create_parameter(
+ shape=([dim]), default_initializer=Constant(value=init_values))
+ self.gamma_2 = self.create_parameter(
+ shape=([dim]), default_initializer=Constant(value=init_values))
+ else:
+ self.gamma_1, self.gamma_2 = None, None
+
+ def forward(self, x):
+ y = self.norm1(x)
+ if self.window_size is not None:
+ y, pad_hw, num_hw = window_partition(y, self.window_size)
+ y = self.attn(y)
+ if self.gamma_1 is not None:
+ y = self.gamma_1 * y
+
+ if self.window_size is not None:
+ y = window_unpartition(y, pad_hw, num_hw, (x.shape[1], x.shape[2]))
+ x = x + self.drop_path(y)
+ if self.gamma_2 is None:
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ else:
+ x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+
+ return x
+
+
+class PatchEmbed(nn.Layer):
+ """ Image to Patch Embedding
+ """
+
+ def __init__(self,
+ img_size=(224, 224),
+ patch_size=16,
+ in_chans=3,
+ embed_dim=768,
+ lr_factor=0.01):
+ super().__init__()
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.proj = nn.Conv2D(
+ in_chans,
+ embed_dim,
+ kernel_size=patch_size,
+ stride=patch_size,
+ weight_attr=ParamAttr(learning_rate=lr_factor),
+ bias_attr=ParamAttr(learning_rate=lr_factor))
+
+ @property
+ def num_patches_in_h(self):
+ return self.img_size[1] // self.patch_size
+
+ @property
+ def num_patches_in_w(self):
+ return self.img_size[0] // self.patch_size
+
+ def forward(self, x):
+ out = self.proj(x)
+ return out
+
+
+@register
+@serializable
+class VisionTransformer2D(nn.Layer):
+ """ Vision Transformer with support for patch input
+ """
+
+ def __init__(self,
+ img_size=(1024, 1024),
+ patch_size=16,
+ in_chans=3,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ mlp_ratio=4,
+ qkv_bias=False,
+ attn_bias=False,
+ qk_scale=None,
+ init_values=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.,
+ act_layer='nn.GELU',
+ norm_layer='nn.LayerNorm',
+ lr_decay_rate=1.0,
+ global_attn_indexes=(2, 5, 8, 11),
+ use_abs_pos=False,
+ use_rel_pos=False,
+ use_abs_pos_emb=False,
+ use_sincos_pos_emb=False,
+ rel_pos_zero_init=True,
+ epsilon=1e-5,
+ final_norm=False,
+ pretrained=None,
+ window_size=None,
+ out_indices=(11, ),
+ with_fpn=False,
+ use_checkpoint=False,
+ *args,
+ **kwargs):
+ super().__init__()
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.embed_dim = embed_dim
+ self.num_heads = num_heads
+ self.depth = depth
+ self.global_attn_indexes = global_attn_indexes
+ self.epsilon = epsilon
+ self.with_fpn = with_fpn
+ self.use_checkpoint = use_checkpoint
+
+ self.patch_h = img_size[0] // patch_size
+ self.patch_w = img_size[1] // patch_size
+ self.num_patches = self.patch_h * self.patch_w
+ self.use_abs_pos = use_abs_pos
+ self.use_abs_pos_emb = use_abs_pos_emb
+
+ self.patch_embed = PatchEmbed(
+ img_size=img_size,
+ patch_size=patch_size,
+ in_chans=in_chans,
+ embed_dim=embed_dim)
+
+ dpr = np.linspace(0, drop_path_rate, depth)
+ if use_checkpoint:
+ paddle.seed(0)
+
+ if use_abs_pos_emb:
+ self.pos_w = self.patch_embed.num_patches_in_w
+ self.pos_h = self.patch_embed.num_patches_in_h
+ self.pos_embed = self.create_parameter(
+ shape=(1, self.pos_w * self.pos_h + 1, embed_dim),
+ default_initializer=paddle.nn.initializer.TruncatedNormal(
+ std=.02))
+ elif use_sincos_pos_emb:
+ pos_embed = self.get_2d_sincos_position_embedding(self.patch_h,
+ self.patch_w)
+
+ self.pos_embed = pos_embed
+ self.pos_embed = self.create_parameter(shape=pos_embed.shape)
+ self.pos_embed.set_value(pos_embed.numpy())
+ self.pos_embed.stop_gradient = True
+ else:
+ self.pos_embed = None
+
+ self.blocks = nn.LayerList([
+ Block(
+ embed_dim,
+ num_heads=num_heads,
+ mlp_ratio=mlp_ratio,
+ qkv_bias=qkv_bias,
+ attn_bias=attn_bias,
+ qk_scale=qk_scale,
+ drop=drop_rate,
+ attn_drop=attn_drop_rate,
+ drop_path=dpr[i],
+ use_rel_pos=use_rel_pos,
+ rel_pos_zero_init=rel_pos_zero_init,
+ window_size=None
+ if i in self.global_attn_indexes else window_size,
+ input_size=[self.patch_h, self.patch_w],
+ act_layer=act_layer,
+ lr_factor=self.get_vit_lr_decay_rate(i, lr_decay_rate),
+ norm_layer=norm_layer,
+ init_values=init_values,
+ epsilon=epsilon) for i in range(depth)
+ ])
+
+ assert len(out_indices) <= 4, 'out_indices out of bound'
+ self.out_indices = out_indices
+ self.pretrained = pretrained
+ self.init_weight()
+
+ self.out_channels = [embed_dim for _ in range(len(out_indices))]
+ self.out_strides = [4, 8, 16, 32][-len(out_indices):] if with_fpn else [
+ patch_size for _ in range(len(out_indices))
+ ]
+ self.norm = Identity()
+ if self.with_fpn:
+ self.init_fpn(
+ embed_dim=embed_dim,
+ patch_size=patch_size,
+ out_with_norm=final_norm)
+
+ def get_vit_lr_decay_rate(self, layer_id, lr_decay_rate):
+ return lr_decay_rate**(self.depth - layer_id)
+
+ def init_weight(self):
+ pretrained = self.pretrained
+ if pretrained:
+ if 'http' in pretrained:
+ path = paddle.utils.download.get_weights_path_from_url(
+ pretrained)
+ else:
+ path = pretrained
+
+ load_state_dict = paddle.load(path)
+ model_state_dict = self.state_dict()
+ pos_embed_name = "pos_embed"
+
+ if pos_embed_name in load_state_dict.keys(
+ ) and self.use_abs_pos_emb:
+ load_pos_embed = paddle.to_tensor(
+ load_state_dict[pos_embed_name], dtype="float32")
+ if self.pos_embed.shape != load_pos_embed.shape:
+ pos_size = int(math.sqrt(load_pos_embed.shape[1] - 1))
+ model_state_dict[pos_embed_name] = self.resize_pos_embed(
+ load_pos_embed, (pos_size, pos_size),
+ (self.pos_h, self.pos_w))
+
+ # self.set_state_dict(model_state_dict)
+ load_state_dict[pos_embed_name] = model_state_dict[
+ pos_embed_name]
+
+ print("Load pos_embed and resize it from {} to {} .".format(
+ load_pos_embed.shape, self.pos_embed.shape))
+
+ self.set_state_dict(load_state_dict)
+ print("Load load_state_dict....")
+
+ def init_fpn(self, embed_dim=768, patch_size=16, out_with_norm=False):
+ if patch_size == 16:
+ self.fpn1 = nn.Sequential(
+ nn.Conv2DTranspose(
+ embed_dim, embed_dim, kernel_size=2, stride=2),
+ nn.BatchNorm2D(embed_dim),
+ nn.GELU(),
+ nn.Conv2DTranspose(
+ embed_dim, embed_dim, kernel_size=2, stride=2), )
+
+ self.fpn2 = nn.Sequential(
+ nn.Conv2DTranspose(
+ embed_dim, embed_dim, kernel_size=2, stride=2), )
+
+ self.fpn3 = Identity()
+
+ self.fpn4 = nn.MaxPool2D(kernel_size=2, stride=2)
+ elif patch_size == 8:
+ self.fpn1 = nn.Sequential(
+ nn.Conv2DTranspose(
+ embed_dim, embed_dim, kernel_size=2, stride=2), )
+
+ self.fpn2 = Identity()
+
+ self.fpn3 = nn.Sequential(nn.MaxPool2D(kernel_size=2, stride=2), )
+
+ self.fpn4 = nn.Sequential(nn.MaxPool2D(kernel_size=4, stride=4), )
+
+ if not out_with_norm:
+ self.norm = Identity()
+ else:
+ self.norm = nn.LayerNorm(embed_dim, epsilon=self.epsilon)
+
+ def resize_pos_embed(self, pos_embed, old_hw, new_hw):
+ """
+ Resize pos_embed weight.
+ Args:
+ pos_embed (Tensor): the pos_embed weight
+ old_hw (list[int]): the height and width of old pos_embed
+ new_hw (list[int]): the height and width of new pos_embed
+ Returns:
+ Tensor: the resized pos_embed weight
+ """
+ cls_pos_embed = pos_embed[:, :1, :]
+ pos_embed = pos_embed[:, 1:, :]
+
+ pos_embed = pos_embed.transpose([0, 2, 1])
+ pos_embed = pos_embed.reshape([1, -1, old_hw[0], old_hw[1]])
+ pos_embed = F.interpolate(
+ pos_embed, new_hw, mode='bicubic', align_corners=False)
+ pos_embed = pos_embed.flatten(2).transpose([0, 2, 1])
+ pos_embed = paddle.concat([cls_pos_embed, pos_embed], axis=1)
+
+ return pos_embed
+
+ def get_2d_sincos_position_embedding(self, h, w, temperature=10000.):
+ grid_y, grid_x = paddle.meshgrid(
+ paddle.arange(
+ h, dtype=paddle.float32),
+ paddle.arange(
+ w, dtype=paddle.float32))
+ assert self.embed_dim % 4 == 0, 'Embed dimension must be divisible by 4 for 2D sin-cos position embedding'
+ pos_dim = self.embed_dim // 4
+ omega = paddle.arange(pos_dim, dtype=paddle.float32) / pos_dim
+ omega = (1. / (temperature**omega)).unsqueeze(0)
+
+ out_x = grid_x.reshape([-1, 1]).matmul(omega)
+ out_y = grid_y.reshape([-1, 1]).matmul(omega)
+
+ pos_emb = paddle.concat(
+ [
+ paddle.sin(out_y), paddle.cos(out_y), paddle.sin(out_x),
+ paddle.cos(out_x)
+ ],
+ axis=1)
+
+ return pos_emb.reshape([1, h, w, self.embed_dim])
+
+ def forward(self, inputs):
+ x = self.patch_embed(inputs['image']).transpose([0, 2, 3, 1])
+ B, Hp, Wp, _ = paddle.shape(x)
+
+ if self.use_abs_pos:
+ x = x + self.get_2d_sincos_position_embedding(Hp, Wp)
+
+ if self.use_abs_pos_emb:
+ x = x + self.resize_pos_embed(self.pos_embed,
+ (self.pos_h, self.pos_w), (Hp, Wp))
+
+ feats = []
+ for idx, blk in enumerate(self.blocks):
+ if self.use_checkpoint and self.training:
+ x = paddle.distributed.fleet.utils.recompute(
+ blk, x, **{"preserve_rng_state": True})
+ else:
+ x = blk(x)
+ if idx in self.out_indices:
+ feats.append(self.norm(x.transpose([0, 3, 1, 2])))
+
+ if self.with_fpn:
+ fpns = [self.fpn1, self.fpn2, self.fpn3, self.fpn4]
+ for i in range(len(feats)):
+ feats[i] = fpns[i](feats[i])
+ return feats
+
+ @property
+ def num_layers(self):
+ return len(self.blocks)
+
+ @property
+ def no_weight_decay(self):
+ return {'pos_embed', 'cls_token'}
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=c, stride=s)
+ for c, s in zip(self.out_channels, self.out_strides)
+ ]
+
+
+class LayerNorm(nn.Layer):
+ """
+ A LayerNorm variant, popularized by Transformers, that performs point-wise mean and
+ variance normalization over the channel dimension for inputs that have shape
+ (batch_size, channels, height, width).
+ Note that, the modified LayerNorm on used in ResBlock and SimpleFeaturePyramid.
+
+ In ViT, we use the nn.LayerNorm
+ """
+
+ def __init__(self, normalized_shape, eps=1e-6):
+ super().__init__()
+ self.weight = self.create_parameter([normalized_shape])
+ self.bias = self.create_parameter([normalized_shape])
+ self.eps = eps
+ self.normalized_shape = (normalized_shape, )
+
+ def forward(self, x):
+ u = x.mean(1, keepdim=True)
+ s = (x - u).pow(2).mean(1, keepdim=True)
+ x = (x - u) / paddle.sqrt(s + self.eps)
+ x = self.weight[:, None, None] * x + self.bias[:, None, None]
+ return x
+
+
+@register
+@serializable
+class SimpleFeaturePyramid(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ spatial_scales,
+ num_levels=4,
+ use_bias=False):
+ """
+ Args:
+ in_channels (list[int]): input channels of each level which can be
+ derived from the output shape of backbone by from_config
+ out_channel (int): output channel of each level.
+ spatial_scales (list[float]): list of scaling factors to upsample or downsample
+ the input features for creating pyramid features which can be derived from
+ the output shape of backbone by from_config
+ num_levels (int): number of levels of output features.
+ use_bias (bool): whether use bias or not.
+ """
+ super(SimpleFeaturePyramid, self).__init__()
+
+ self.in_channels = in_channels[0]
+ self.out_channels = out_channels
+ self.num_levels = num_levels
+
+ self.stages = []
+ dim = self.in_channels
+ if num_levels == 4:
+ scale_factors = [2.0, 1.0, 0.5]
+ elif num_levels == 5:
+ scale_factors = [4.0, 2.0, 1.0, 0.5]
+ else:
+ raise NotImplementedError(
+ f"num_levels={num_levels} is not supported yet.")
+
+ dim = in_channels[0]
+ for idx, scale in enumerate(scale_factors):
+ out_dim = dim
+ if scale == 4.0:
+ layers = [
+ nn.Conv2DTranspose(
+ dim, dim // 2, kernel_size=2, stride=2),
+ nn.LayerNorm(dim // 2),
+ nn.GELU(),
+ nn.Conv2DTranspose(
+ dim // 2, dim // 4, kernel_size=2, stride=2),
+ ]
+ out_dim = dim // 4
+ elif scale == 2.0:
+ layers = [
+ nn.Conv2DTranspose(
+ dim, dim // 2, kernel_size=2, stride=2)
+ ]
+ out_dim = dim // 2
+ elif scale == 1.0:
+ layers = []
+ elif scale == 0.5:
+ layers = [nn.MaxPool2D(kernel_size=2, stride=2)]
+
+ layers.extend([
+ nn.Conv2D(
+ out_dim,
+ out_channels,
+ kernel_size=1,
+ bias_attr=use_bias, ), LayerNorm(out_channels), nn.Conv2D(
+ out_channels,
+ out_channels,
+ kernel_size=3,
+ padding=1,
+ bias_attr=use_bias, ), LayerNorm(out_channels)
+ ])
+ layers = nn.Sequential(*layers)
+
+ stage = -int(math.log2(spatial_scales[0] * scale_factors[idx]))
+ self.add_sublayer(f"simfp_{stage}", layers)
+ self.stages.append(layers)
+
+ # top block output feature maps.
+ self.top_block = nn.Sequential(
+ nn.MaxPool2D(
+ kernel_size=1, stride=2, padding=0))
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {
+ 'in_channels': [i.channels for i in input_shape],
+ 'spatial_scales': [1.0 / i.stride for i in input_shape],
+ }
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(channels=self.out_channels)
+ for _ in range(self.num_levels)
+ ]
+
+ def forward(self, feats):
+ """
+ Args:
+ x: Tensor of shape (N,C,H,W).
+ """
+ features = feats[0]
+ results = []
+
+ for stage in self.stages:
+ results.append(stage(features))
+
+ top_block_in_feature = results[-1]
+ results.append(self.top_block(top_block_in_feature))
+ assert self.num_levels == len(results)
+
+ return results
diff --git a/ppdet/modeling/heads/detr_head.py b/ppdet/modeling/heads/detr_head.py
index 6b9d8d8db91..61448e4e0ba 100644
--- a/ppdet/modeling/heads/detr_head.py
+++ b/ppdet/modeling/heads/detr_head.py
@@ -380,10 +380,67 @@ def forward(self, out_transformer, body_feats, inputs=None):
assert 'gt_bbox' in inputs and 'gt_class' in inputs
if dn_meta is not None:
- dn_out_bboxes, dec_out_bboxes = paddle.split(
- dec_out_bboxes, dn_meta['dn_num_split'], axis=2)
- dn_out_logits, dec_out_logits = paddle.split(
- dec_out_logits, dn_meta['dn_num_split'], axis=2)
+ if isinstance(dn_meta, list):
+ dual_groups = len(dn_meta) - 1
+ dec_out_bboxes = paddle.split(
+ dec_out_bboxes, dual_groups + 1, axis=2)
+ dec_out_logits = paddle.split(
+ dec_out_logits, dual_groups + 1, axis=2)
+ enc_topk_bboxes = paddle.split(
+ enc_topk_bboxes, dual_groups + 1, axis=1)
+ enc_topk_logits = paddle.split(
+ enc_topk_logits, dual_groups + 1, axis=1)
+
+ dec_out_bboxes_list = []
+ dec_out_logits_list = []
+ dn_out_bboxes_list = []
+ dn_out_logits_list = []
+ loss = {}
+ for g_id in range(dual_groups + 1):
+ if dn_meta[g_id] is not None:
+ dn_out_bboxes_gid, dec_out_bboxes_gid = paddle.split(
+ dec_out_bboxes[g_id],
+ dn_meta[g_id]['dn_num_split'],
+ axis=2)
+ dn_out_logits_gid, dec_out_logits_gid = paddle.split(
+ dec_out_logits[g_id],
+ dn_meta[g_id]['dn_num_split'],
+ axis=2)
+ else:
+ dn_out_bboxes_gid, dn_out_logits_gid = None, None
+ dec_out_bboxes_gid = dec_out_bboxes[g_id]
+ dec_out_logits_gid = dec_out_logits[g_id]
+ out_bboxes_gid = paddle.concat([
+ enc_topk_bboxes[g_id].unsqueeze(0),
+ dec_out_bboxes_gid
+ ])
+ out_logits_gid = paddle.concat([
+ enc_topk_logits[g_id].unsqueeze(0),
+ dec_out_logits_gid
+ ])
+ loss_gid = self.loss(
+ out_bboxes_gid,
+ out_logits_gid,
+ inputs['gt_bbox'],
+ inputs['gt_class'],
+ dn_out_bboxes=dn_out_bboxes_gid,
+ dn_out_logits=dn_out_logits_gid,
+ dn_meta=dn_meta[g_id])
+ # sum loss
+ for key, value in loss_gid.items():
+ loss.update({
+ key: loss.get(key, paddle.zeros([1])) + value
+ })
+
+ # average across (dual_groups + 1)
+ for key, value in loss.items():
+ loss.update({key: value / (dual_groups + 1)})
+ return loss
+ else:
+ dn_out_bboxes, dec_out_bboxes = paddle.split(
+ dec_out_bboxes, dn_meta['dn_num_split'], axis=2)
+ dn_out_logits, dec_out_logits = paddle.split(
+ dec_out_logits, dn_meta['dn_num_split'], axis=2)
else:
dn_out_bboxes, dn_out_logits = None, None
diff --git a/ppdet/modeling/initializer.py b/ppdet/modeling/initializer.py
index 758eed240ea..308c51baf89 100644
--- a/ppdet/modeling/initializer.py
+++ b/ppdet/modeling/initializer.py
@@ -273,7 +273,8 @@ def kaiming_normal_(tensor,
def linear_init_(module):
bound = 1 / math.sqrt(module.weight.shape[0])
uniform_(module.weight, -bound, bound)
- uniform_(module.bias, -bound, bound)
+ if hasattr(module, "bias") and module.bias is not None:
+ uniform_(module.bias, -bound, bound)
def conv_init_(module):
diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py
index 933d012de18..b48cc98a71f 100644
--- a/ppdet/modeling/post_process.py
+++ b/ppdet/modeling/post_process.py
@@ -67,7 +67,8 @@ def __call__(self, head_out, rois, im_shape, scale_factor):
"""
if self.nms is not None:
bboxes, score = self.decode(head_out, rois, im_shape, scale_factor)
- bbox_pred, bbox_num, before_nms_indexes = self.nms(bboxes, score, self.num_classes)
+ bbox_pred, bbox_num, before_nms_indexes = self.nms(bboxes, score,
+ self.num_classes)
else:
bbox_pred, bbox_num = self.decode(head_out, rois, im_shape,
@@ -449,10 +450,14 @@ class DETRBBoxPostProcess(object):
def __init__(self,
num_classes=80,
num_top_queries=100,
+ dual_queries=False,
+ dual_groups=0,
use_focal_loss=False):
super(DETRBBoxPostProcess, self).__init__()
self.num_classes = num_classes
self.num_top_queries = num_top_queries
+ self.dual_queries = dual_queries
+ self.dual_groups = dual_groups
self.use_focal_loss = use_focal_loss
def __call__(self, head_out, im_shape, scale_factor):
@@ -471,6 +476,10 @@ def __call__(self, head_out, im_shape, scale_factor):
shape [bs], and is N.
"""
bboxes, logits, masks = head_out
+ if self.dual_queries:
+ num_queries = logits.shape[1]
+ logits, bboxes = logits[:, :int(num_queries // (self.dual_groups + 1)), :], \
+ bboxes[:, :int(num_queries // (self.dual_groups + 1)), :]
bbox_pred = bbox_cxcywh_to_xyxy(bboxes)
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
diff --git a/ppdet/modeling/transformers/__init__.py b/ppdet/modeling/transformers/__init__.py
index e55cb0c1de9..0457e041423 100644
--- a/ppdet/modeling/transformers/__init__.py
+++ b/ppdet/modeling/transformers/__init__.py
@@ -18,6 +18,7 @@
from . import position_encoding
from . import deformable_transformer
from . import dino_transformer
+from . import group_detr_transformer
from .detr_transformer import *
from .utils import *
@@ -26,3 +27,4 @@
from .deformable_transformer import *
from .dino_transformer import *
from .petr_transformer import *
+from .group_detr_transformer import *
diff --git a/ppdet/modeling/transformers/group_detr_transformer.py b/ppdet/modeling/transformers/group_detr_transformer.py
new file mode 100644
index 00000000000..31ec6172eb4
--- /dev/null
+++ b/ppdet/modeling/transformers/group_detr_transformer.py
@@ -0,0 +1,857 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Modified from Deformable-DETR (https://github.com/fundamentalvision/Deformable-DETR)
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Modified from detrex (https://github.com/IDEA-Research/detrex)
+# Copyright 2022 The IDEA Authors. All rights reserved.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+
+from ppdet.core.workspace import register
+from ..layers import MultiHeadAttention
+from .position_encoding import PositionEmbedding
+from ..heads.detr_head import MLP
+from .deformable_transformer import MSDeformableAttention
+from ..initializer import (linear_init_, constant_, xavier_uniform_, normal_,
+ bias_init_with_prob)
+from .utils import (_get_clones, get_valid_ratio,
+ get_contrastive_denoising_training_group,
+ get_sine_pos_embed, inverse_sigmoid)
+
+__all__ = ['GroupDINOTransformer']
+
+
+class DINOTransformerEncoderLayer(nn.Layer):
+ def __init__(self,
+ d_model=256,
+ n_head=8,
+ dim_feedforward=1024,
+ dropout=0.,
+ activation="relu",
+ n_levels=4,
+ n_points=4,
+ weight_attr=None,
+ bias_attr=None):
+ super(DINOTransformerEncoderLayer, self).__init__()
+ # self attention
+ self.self_attn = MSDeformableAttention(d_model, n_head, n_levels,
+ n_points, 1.0)
+ self.dropout1 = nn.Dropout(dropout)
+ self.norm1 = nn.LayerNorm(
+ d_model,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ # ffn
+ self.linear1 = nn.Linear(d_model, dim_feedforward, weight_attr,
+ bias_attr)
+ self.activation = getattr(F, activation)
+ self.dropout2 = nn.Dropout(dropout)
+ self.linear2 = nn.Linear(dim_feedforward, d_model, weight_attr,
+ bias_attr)
+ self.dropout3 = nn.Dropout(dropout)
+ self.norm2 = nn.LayerNorm(
+ d_model,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ linear_init_(self.linear1)
+ linear_init_(self.linear2)
+ xavier_uniform_(self.linear1.weight)
+ xavier_uniform_(self.linear2.weight)
+
+ def with_pos_embed(self, tensor, pos):
+ return tensor if pos is None else tensor + pos
+
+ def forward_ffn(self, src):
+ src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
+ src = src + self.dropout3(src2)
+ src = self.norm2(src)
+ return src
+
+ def forward(self,
+ src,
+ reference_points,
+ spatial_shapes,
+ level_start_index,
+ src_mask=None,
+ query_pos_embed=None):
+ # self attention
+ src2 = self.self_attn(
+ self.with_pos_embed(src, query_pos_embed), reference_points, src,
+ spatial_shapes, level_start_index, src_mask)
+ src = src + self.dropout1(src2)
+ src = self.norm1(src)
+ # ffn
+ src = self.forward_ffn(src)
+
+ return src
+
+
+class DINOTransformerEncoder(nn.Layer):
+ def __init__(self, encoder_layer, num_layers):
+ super(DINOTransformerEncoder, self).__init__()
+ self.layers = _get_clones(encoder_layer, num_layers)
+ self.num_layers = num_layers
+
+ @staticmethod
+ def get_reference_points(spatial_shapes, valid_ratios, offset=0.5):
+ valid_ratios = valid_ratios.unsqueeze(1)
+ reference_points = []
+ for i, (H, W) in enumerate(spatial_shapes):
+ ref_y, ref_x = paddle.meshgrid(
+ paddle.arange(end=H) + offset, paddle.arange(end=W) + offset)
+ ref_y = ref_y.flatten().unsqueeze(0) / (valid_ratios[:, :, i, 1] *
+ H)
+ ref_x = ref_x.flatten().unsqueeze(0) / (valid_ratios[:, :, i, 0] *
+ W)
+ reference_points.append(paddle.stack((ref_x, ref_y), axis=-1))
+ reference_points = paddle.concat(reference_points, 1).unsqueeze(2)
+ reference_points = reference_points * valid_ratios
+ return reference_points
+
+ def forward(self,
+ feat,
+ spatial_shapes,
+ level_start_index,
+ feat_mask=None,
+ query_pos_embed=None,
+ valid_ratios=None):
+ if valid_ratios is None:
+ valid_ratios = paddle.ones(
+ [feat.shape[0], spatial_shapes.shape[0], 2])
+ reference_points = self.get_reference_points(spatial_shapes,
+ valid_ratios)
+ for layer in self.layers:
+ feat = layer(feat, reference_points, spatial_shapes,
+ level_start_index, feat_mask, query_pos_embed)
+
+ return feat
+
+
+class DINOTransformerDecoderLayer(nn.Layer):
+ def __init__(self,
+ d_model=256,
+ n_head=8,
+ dim_feedforward=1024,
+ dropout=0.,
+ activation="relu",
+ n_levels=4,
+ n_points=4,
+ dual_queries=False,
+ dual_groups=0,
+ weight_attr=None,
+ bias_attr=None):
+ super(DINOTransformerDecoderLayer, self).__init__()
+
+ # self attention
+ self.self_attn = MultiHeadAttention(d_model, n_head, dropout=dropout)
+ self.dropout1 = nn.Dropout(dropout)
+ self.norm1 = nn.LayerNorm(
+ d_model,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+
+ # cross attention
+ self.cross_attn = MSDeformableAttention(d_model, n_head, n_levels,
+ n_points, 1.0)
+ self.dropout2 = nn.Dropout(dropout)
+ self.norm2 = nn.LayerNorm(
+ d_model,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+
+ # ffn
+ self.linear1 = nn.Linear(d_model, dim_feedforward, weight_attr,
+ bias_attr)
+ self.activation = getattr(F, activation)
+ self.dropout3 = nn.Dropout(dropout)
+ self.linear2 = nn.Linear(dim_feedforward, d_model, weight_attr,
+ bias_attr)
+ self.dropout4 = nn.Dropout(dropout)
+ self.norm3 = nn.LayerNorm(
+ d_model,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+
+ # for dual groups
+ self.dual_queries = dual_queries
+ self.dual_groups = dual_groups
+ self.n_head = n_head
+
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ linear_init_(self.linear1)
+ linear_init_(self.linear2)
+ xavier_uniform_(self.linear1.weight)
+ xavier_uniform_(self.linear2.weight)
+
+ def with_pos_embed(self, tensor, pos):
+ return tensor if pos is None else tensor + pos
+
+ def forward_ffn(self, tgt):
+ return self.linear2(self.dropout3(self.activation(self.linear1(tgt))))
+
+ def forward(self,
+ tgt,
+ reference_points,
+ memory,
+ memory_spatial_shapes,
+ memory_level_start_index,
+ attn_mask=None,
+ memory_mask=None,
+ query_pos_embed=None):
+ # self attention
+ q = k = self.with_pos_embed(tgt, query_pos_embed)
+ if self.dual_queries:
+ dual_groups = self.dual_groups
+ bs, num_queries, n_model = paddle.shape(q)
+ q = paddle.concat(q.split(dual_groups + 1, axis=1), axis=0)
+ k = paddle.concat(k.split(dual_groups + 1, axis=1), axis=0)
+ tgt = paddle.concat(tgt.split(dual_groups + 1, axis=1), axis=0)
+
+ g_num_queries = num_queries // (dual_groups + 1)
+ if attn_mask is None or attn_mask[0] is None:
+ attn_mask = None
+ else:
+ # [(dual_groups + 1), g_num_queries, g_num_queries]
+ attn_mask = paddle.concat(
+ [sa_mask.unsqueeze(0) for sa_mask in attn_mask], axis=0)
+ # [1, (dual_groups + 1), 1, g_num_queries, g_num_queries]
+ # --> [bs, (dual_groups + 1), nhead, g_num_queries, g_num_queries]
+ # --> [bs * (dual_groups + 1), nhead, g_num_queries, g_num_queries]
+ attn_mask = attn_mask.unsqueeze(0).unsqueeze(2).tile(
+ [bs, 1, self.n_head, 1, 1])
+ attn_mask = attn_mask.reshape([
+ bs * (dual_groups + 1), self.n_head, g_num_queries,
+ g_num_queries
+ ])
+
+ if attn_mask is not None:
+ attn_mask = attn_mask.astype('bool')
+
+ tgt2 = self.self_attn(q, k, value=tgt, attn_mask=attn_mask)
+ tgt = tgt + self.dropout1(tgt2)
+ tgt = self.norm2(tgt)
+
+ # trace back
+ if self.dual_queries:
+ tgt = paddle.concat(tgt.split(dual_groups + 1, axis=0), axis=1)
+
+ # cross attention
+ tgt2 = self.cross_attn(
+ self.with_pos_embed(tgt, query_pos_embed), reference_points, memory,
+ memory_spatial_shapes, memory_level_start_index, memory_mask)
+ tgt = tgt + self.dropout2(tgt2)
+ tgt = self.norm1(tgt)
+
+ # ffn
+ tgt2 = self.forward_ffn(tgt)
+ tgt = tgt + self.dropout4(tgt2)
+ tgt = self.norm3(tgt)
+
+ return tgt
+
+
+class DINOTransformerDecoder(nn.Layer):
+ def __init__(self,
+ hidden_dim,
+ decoder_layer,
+ num_layers,
+ return_intermediate=True):
+ super(DINOTransformerDecoder, self).__init__()
+ self.layers = _get_clones(decoder_layer, num_layers)
+ self.hidden_dim = hidden_dim
+ self.num_layers = num_layers
+ self.return_intermediate = return_intermediate
+
+ self.norm = nn.LayerNorm(
+ hidden_dim,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+
+ def forward(self,
+ tgt,
+ reference_points,
+ memory,
+ memory_spatial_shapes,
+ memory_level_start_index,
+ bbox_head,
+ query_pos_head,
+ valid_ratios=None,
+ attn_mask=None,
+ memory_mask=None):
+ if valid_ratios is None:
+ valid_ratios = paddle.ones(
+ [memory.shape[0], memory_spatial_shapes.shape[0], 2])
+
+ output = tgt
+ intermediate = []
+ inter_ref_bboxes = []
+ for i, layer in enumerate(self.layers):
+ reference_points_input = reference_points.unsqueeze(
+ 2) * valid_ratios.tile([1, 1, 2]).unsqueeze(1)
+ query_pos_embed = get_sine_pos_embed(
+ reference_points_input[..., 0, :], self.hidden_dim // 2)
+ query_pos_embed = query_pos_head(query_pos_embed)
+
+ output = layer(output, reference_points_input, memory,
+ memory_spatial_shapes, memory_level_start_index,
+ attn_mask, memory_mask, query_pos_embed)
+ inter_ref_bbox = F.sigmoid(bbox_head[i](output) + inverse_sigmoid(
+ reference_points))
+
+ if self.return_intermediate:
+ intermediate.append(self.norm(output))
+ inter_ref_bboxes.append(inter_ref_bbox)
+
+ reference_points = inter_ref_bbox.detach()
+
+ if self.return_intermediate:
+ return paddle.stack(intermediate), paddle.stack(inter_ref_bboxes)
+
+ return output, reference_points
+
+
+@register
+class GroupDINOTransformer(nn.Layer):
+ __shared__ = ['num_classes', 'hidden_dim']
+
+ def __init__(self,
+ num_classes=80,
+ hidden_dim=256,
+ num_queries=900,
+ position_embed_type='sine',
+ return_intermediate_dec=True,
+ backbone_feat_channels=[512, 1024, 2048],
+ num_levels=4,
+ num_encoder_points=4,
+ num_decoder_points=4,
+ nhead=8,
+ num_encoder_layers=6,
+ num_decoder_layers=6,
+ dim_feedforward=1024,
+ dropout=0.,
+ activation="relu",
+ pe_temperature=10000,
+ pe_offset=-0.5,
+ num_denoising=100,
+ label_noise_ratio=0.5,
+ box_noise_scale=1.0,
+ learnt_init_query=True,
+ use_input_proj=True,
+ dual_queries=False,
+ dual_groups=0,
+ eps=1e-2):
+ super(GroupDINOTransformer, self).__init__()
+ assert position_embed_type in ['sine', 'learned'], \
+ f'ValueError: position_embed_type not supported {position_embed_type}!'
+ assert len(backbone_feat_channels) <= num_levels
+
+ self.hidden_dim = hidden_dim
+ self.nhead = nhead
+ self.num_levels = num_levels
+ self.num_classes = num_classes
+ self.num_queries = num_queries
+ self.eps = eps
+ self.num_decoder_layers = num_decoder_layers
+ self.use_input_proj = use_input_proj
+
+ if use_input_proj:
+ # backbone feature projection
+ self._build_input_proj_layer(backbone_feat_channels)
+
+ # Transformer module
+ encoder_layer = DINOTransformerEncoderLayer(
+ hidden_dim, nhead, dim_feedforward, dropout, activation, num_levels,
+ num_encoder_points)
+ self.encoder = DINOTransformerEncoder(encoder_layer, num_encoder_layers)
+ decoder_layer = DINOTransformerDecoderLayer(
+ hidden_dim,
+ nhead,
+ dim_feedforward,
+ dropout,
+ activation,
+ num_levels,
+ num_decoder_points,
+ dual_queries=dual_queries,
+ dual_groups=dual_groups)
+ self.decoder = DINOTransformerDecoder(hidden_dim, decoder_layer,
+ num_decoder_layers,
+ return_intermediate_dec)
+
+ # denoising part
+ self.denoising_class_embed = nn.Embedding(
+ num_classes,
+ hidden_dim,
+ weight_attr=ParamAttr(initializer=nn.initializer.Normal()))
+ self.num_denoising = num_denoising
+ self.label_noise_ratio = label_noise_ratio
+ self.box_noise_scale = box_noise_scale
+
+ # for dual group
+ self.dual_queries = dual_queries
+ self.dual_groups = dual_groups
+ if self.dual_queries:
+ self.denoising_class_embed_groups = nn.LayerList([
+ nn.Embedding(
+ num_classes,
+ hidden_dim,
+ weight_attr=ParamAttr(initializer=nn.initializer.Normal()))
+ for _ in range(self.dual_groups)
+ ])
+
+ # position embedding
+ self.position_embedding = PositionEmbedding(
+ hidden_dim // 2,
+ temperature=pe_temperature,
+ normalize=True if position_embed_type == 'sine' else False,
+ embed_type=position_embed_type,
+ offset=pe_offset)
+ self.level_embed = nn.Embedding(num_levels, hidden_dim)
+ # decoder embedding
+ self.learnt_init_query = learnt_init_query
+ if learnt_init_query:
+ self.tgt_embed = nn.Embedding(num_queries, hidden_dim)
+ normal_(self.tgt_embed.weight)
+ if self.dual_queries:
+ self.tgt_embed_dual = nn.LayerList([
+ nn.Embedding(num_queries, hidden_dim)
+ for _ in range(self.dual_groups)
+ ])
+ for dual_tgt_module in self.tgt_embed_dual:
+ normal_(dual_tgt_module.weight)
+ self.query_pos_head = MLP(2 * hidden_dim,
+ hidden_dim,
+ hidden_dim,
+ num_layers=2)
+
+ # encoder head
+ self.enc_output = nn.Sequential(
+ nn.Linear(hidden_dim, hidden_dim),
+ nn.LayerNorm(
+ hidden_dim,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0))))
+ if self.dual_queries:
+ self.enc_output = _get_clones(self.enc_output, self.dual_groups + 1)
+ else:
+ self.enc_output = _get_clones(self.enc_output, 1)
+
+ self.enc_score_head = nn.Linear(hidden_dim, num_classes)
+ self.enc_bbox_head = MLP(hidden_dim, hidden_dim, 4, num_layers=3)
+
+ if self.dual_queries:
+ self.enc_bbox_head_dq = nn.LayerList([
+ MLP(hidden_dim, hidden_dim, 4, num_layers=3)
+ for i in range(self.dual_groups)
+ ])
+ self.enc_score_head_dq = nn.LayerList([
+ nn.Linear(hidden_dim, num_classes)
+ for i in range(self.dual_groups)
+ ])
+
+ # decoder head
+ self.dec_score_head = nn.LayerList([
+ nn.Linear(hidden_dim, num_classes)
+ for _ in range(num_decoder_layers)
+ ])
+ self.dec_bbox_head = nn.LayerList([
+ MLP(hidden_dim, hidden_dim, 4, num_layers=3)
+ for _ in range(num_decoder_layers)
+ ])
+
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ # class and bbox head init
+ bias_cls = bias_init_with_prob(0.01)
+ linear_init_(self.enc_score_head)
+ constant_(self.enc_score_head.bias, bias_cls)
+ constant_(self.enc_bbox_head.layers[-1].weight)
+ constant_(self.enc_bbox_head.layers[-1].bias)
+ for cls_, reg_ in zip(self.dec_score_head, self.dec_bbox_head):
+ linear_init_(cls_)
+ constant_(cls_.bias, bias_cls)
+ constant_(reg_.layers[-1].weight)
+ constant_(reg_.layers[-1].bias)
+
+ for enc_output in self.enc_output:
+ linear_init_(enc_output[0])
+ xavier_uniform_(enc_output[0].weight)
+ normal_(self.level_embed.weight)
+ if self.learnt_init_query:
+ xavier_uniform_(self.tgt_embed.weight)
+ xavier_uniform_(self.query_pos_head.layers[0].weight)
+ xavier_uniform_(self.query_pos_head.layers[1].weight)
+ normal_(self.denoising_class_embed.weight)
+ if self.use_input_proj:
+ for l in self.input_proj:
+ xavier_uniform_(l[0].weight)
+ constant_(l[0].bias)
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'backbone_feat_channels': [i.channels for i in input_shape], }
+
+ def _build_input_proj_layer(self, backbone_feat_channels):
+ self.input_proj = nn.LayerList()
+ for in_channels in backbone_feat_channels:
+ self.input_proj.append(
+ nn.Sequential(
+ ('conv', nn.Conv2D(
+ in_channels, self.hidden_dim, kernel_size=1)),
+ ('norm', nn.GroupNorm(
+ 32,
+ self.hidden_dim,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0))))))
+ in_channels = backbone_feat_channels[-1]
+ for _ in range(self.num_levels - len(backbone_feat_channels)):
+ self.input_proj.append(
+ nn.Sequential(
+ ('conv', nn.Conv2D(
+ in_channels,
+ self.hidden_dim,
+ kernel_size=3,
+ stride=2,
+ padding=1)), ('norm', nn.GroupNorm(
+ 32,
+ self.hidden_dim,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0))))))
+ in_channels = self.hidden_dim
+
+ def _get_encoder_input(self, feats, pad_mask=None):
+ if self.use_input_proj:
+ # get projection features
+ proj_feats = [
+ self.input_proj[i](feat) for i, feat in enumerate(feats)
+ ]
+ if self.num_levels > len(proj_feats):
+ len_srcs = len(proj_feats)
+ for i in range(len_srcs, self.num_levels):
+ if i == len_srcs:
+ proj_feats.append(self.input_proj[i](feats[-1]))
+ else:
+ proj_feats.append(self.input_proj[i](proj_feats[-1]))
+ else:
+ proj_feats = feats
+ # get encoder inputs
+ feat_flatten = []
+ mask_flatten = []
+ lvl_pos_embed_flatten = []
+ spatial_shapes = []
+ valid_ratios = []
+ for i, feat in enumerate(proj_feats):
+ bs, _, h, w = paddle.shape(feat)
+ spatial_shapes.append(paddle.concat([h, w]))
+ # [b,c,h,w] -> [b,h*w,c]
+ feat_flatten.append(feat.flatten(2).transpose([0, 2, 1]))
+ if pad_mask is not None:
+ mask = F.interpolate(pad_mask.unsqueeze(0), size=(h, w))[0]
+ else:
+ mask = paddle.ones([bs, h, w])
+ valid_ratios.append(get_valid_ratio(mask))
+ # [b, h*w, c]
+ pos_embed = self.position_embedding(mask).flatten(1, 2)
+ lvl_pos_embed = pos_embed + self.level_embed.weight[i].reshape(
+ [1, 1, -1])
+ lvl_pos_embed_flatten.append(lvl_pos_embed)
+ if pad_mask is not None:
+ # [b, h*w]
+ mask_flatten.append(mask.flatten(1))
+
+ # [b, l, c]
+ feat_flatten = paddle.concat(feat_flatten, 1)
+ # [b, l]
+ mask_flatten = None if pad_mask is None else paddle.concat(mask_flatten,
+ 1)
+ # [b, l, c]
+ lvl_pos_embed_flatten = paddle.concat(lvl_pos_embed_flatten, 1)
+ # [num_levels, 2]
+ spatial_shapes = paddle.to_tensor(
+ paddle.stack(spatial_shapes).astype('int64'))
+ # [l] start index of each level
+ level_start_index = paddle.concat([
+ paddle.zeros(
+ [1], dtype='int64'), spatial_shapes.prod(1).cumsum(0)[:-1]
+ ])
+ # [b, num_levels, 2]
+ valid_ratios = paddle.stack(valid_ratios, 1)
+ return (feat_flatten, spatial_shapes, level_start_index, mask_flatten,
+ lvl_pos_embed_flatten, valid_ratios)
+
+ def forward(self, feats, pad_mask=None, gt_meta=None):
+ # input projection and embedding
+ (feat_flatten, spatial_shapes, level_start_index, mask_flatten,
+ lvl_pos_embed_flatten,
+ valid_ratios) = self._get_encoder_input(feats, pad_mask)
+
+ # encoder
+ memory = self.encoder(feat_flatten, spatial_shapes, level_start_index,
+ mask_flatten, lvl_pos_embed_flatten, valid_ratios)
+
+ # prepare denoising training
+ if self.training:
+ denoising_class, denoising_bbox, attn_mask, dn_meta = \
+ get_contrastive_denoising_training_group(gt_meta,
+ self.num_classes,
+ self.num_queries,
+ self.denoising_class_embed.weight,
+ self.num_denoising,
+ self.label_noise_ratio,
+ self.box_noise_scale)
+ if self.dual_queries:
+ denoising_class_groups = []
+ denoising_bbox_groups = []
+ attn_mask_groups = []
+ dn_meta_groups = []
+ for g_id in range(self.dual_groups):
+ denoising_class_gid, denoising_bbox_gid, attn_mask_gid, dn_meta_gid = \
+ get_contrastive_denoising_training_group(gt_meta,
+ self.num_classes,
+ self.num_queries,
+ self.denoising_class_embed_groups[g_id].weight,
+ self.num_denoising,
+ self.label_noise_ratio,
+ self.box_noise_scale)
+ denoising_class_groups.append(denoising_class_gid)
+ denoising_bbox_groups.append(denoising_bbox_gid)
+ attn_mask_groups.append(attn_mask_gid)
+ dn_meta_groups.append(dn_meta_gid)
+
+ # combine
+ denoising_class = [denoising_class] + denoising_class_groups
+ denoising_bbox = [denoising_bbox] + denoising_bbox_groups
+ attn_mask = [attn_mask] + attn_mask_groups
+ dn_meta = [dn_meta] + dn_meta_groups
+ else:
+ denoising_class, denoising_bbox, attn_mask, dn_meta = None, None, None, None
+
+ target, init_ref_points, enc_topk_bboxes, enc_topk_logits = \
+ self._get_decoder_input(
+ memory, spatial_shapes, mask_flatten, denoising_class,
+ denoising_bbox)
+
+ # decoder
+ inter_feats, inter_ref_bboxes = self.decoder(
+ target, init_ref_points, memory, spatial_shapes, level_start_index,
+ self.dec_bbox_head, self.query_pos_head, valid_ratios, attn_mask,
+ mask_flatten)
+ # solve hang during distributed training
+ inter_feats[0] += self.denoising_class_embed.weight[0, 0] * 0.
+ if self.dual_queries:
+ for g_id in range(self.dual_groups):
+ inter_feats[0] += self.denoising_class_embed_groups[
+ g_id].weight[0, 0] * 0.0
+
+ out_bboxes = []
+ out_logits = []
+ for i in range(self.num_decoder_layers):
+ out_logits.append(self.dec_score_head[i](inter_feats[i]))
+ if i == 0:
+ out_bboxes.append(
+ F.sigmoid(self.dec_bbox_head[i](inter_feats[i]) +
+ inverse_sigmoid(init_ref_points)))
+ else:
+ out_bboxes.append(
+ F.sigmoid(self.dec_bbox_head[i](inter_feats[i]) +
+ inverse_sigmoid(inter_ref_bboxes[i - 1])))
+
+ out_bboxes = paddle.stack(out_bboxes)
+ out_logits = paddle.stack(out_logits)
+ return (out_bboxes, out_logits, enc_topk_bboxes, enc_topk_logits,
+ dn_meta)
+
+ def _get_encoder_output_anchors(self,
+ memory,
+ spatial_shapes,
+ memory_mask=None,
+ grid_size=0.05):
+ output_anchors = []
+ idx = 0
+ for lvl, (h, w) in enumerate(spatial_shapes):
+ if memory_mask is not None:
+ mask_ = memory_mask[:, idx:idx + h * w].reshape([-1, h, w])
+ valid_H = paddle.sum(mask_[:, :, 0], 1)
+ valid_W = paddle.sum(mask_[:, 0, :], 1)
+ else:
+ valid_H, valid_W = h, w
+
+ grid_y, grid_x = paddle.meshgrid(
+ paddle.arange(
+ end=h, dtype=memory.dtype),
+ paddle.arange(
+ end=w, dtype=memory.dtype))
+ grid_xy = paddle.stack([grid_x, grid_y], -1)
+
+ valid_WH = paddle.stack([valid_W, valid_H], -1).reshape(
+ [-1, 1, 1, 2]).astype(grid_xy.dtype)
+ grid_xy = (grid_xy.unsqueeze(0) + 0.5) / valid_WH
+ wh = paddle.ones_like(grid_xy) * grid_size * (2.0**lvl)
+ output_anchors.append(
+ paddle.concat([grid_xy, wh], -1).reshape([-1, h * w, 4]))
+ idx += h * w
+
+ output_anchors = paddle.concat(output_anchors, 1)
+ valid_mask = ((output_anchors > self.eps) *
+ (output_anchors < 1 - self.eps)).all(-1, keepdim=True)
+ output_anchors = paddle.log(output_anchors / (1 - output_anchors))
+ if memory_mask is not None:
+ valid_mask = (valid_mask * (memory_mask.unsqueeze(-1) > 0)) > 0
+ output_anchors = paddle.where(valid_mask, output_anchors,
+ paddle.to_tensor(float("inf")))
+
+ memory = paddle.where(valid_mask, memory, paddle.to_tensor(0.))
+ if self.dual_queries:
+ output_memory = [
+ self.enc_output[g_id](memory)
+ for g_id in range(self.dual_groups + 1)
+ ]
+ else:
+ output_memory = self.enc_output[0](memory)
+ return output_memory, output_anchors
+
+ def _get_decoder_input(self,
+ memory,
+ spatial_shapes,
+ memory_mask=None,
+ denoising_class=None,
+ denoising_bbox=None):
+ bs, _, _ = memory.shape
+ # prepare input for decoder
+ output_memory, output_anchors = self._get_encoder_output_anchors(
+ memory, spatial_shapes, memory_mask)
+ if self.dual_queries:
+ enc_outputs_class = self.enc_score_head(output_memory[0])
+ enc_outputs_coord_unact = self.enc_bbox_head(output_memory[
+ 0]) + output_anchors
+ else:
+ enc_outputs_class = self.enc_score_head(output_memory)
+ enc_outputs_coord_unact = self.enc_bbox_head(
+ output_memory) + output_anchors
+
+ _, topk_ind = paddle.topk(
+ enc_outputs_class.max(-1), self.num_queries, axis=1)
+ # extract region proposal boxes
+ batch_ind = paddle.arange(end=bs, dtype=topk_ind.dtype)
+ batch_ind = batch_ind.unsqueeze(-1).tile([1, self.num_queries])
+ topk_ind = paddle.stack([batch_ind, topk_ind], axis=-1)
+ topk_coords_unact = paddle.gather_nd(enc_outputs_coord_unact,
+ topk_ind) # unsigmoided.
+ enc_topk_bboxes = F.sigmoid(topk_coords_unact)
+ reference_points = enc_topk_bboxes.detach()
+ enc_topk_logits = paddle.gather_nd(enc_outputs_class, topk_ind)
+
+ if self.dual_queries:
+ enc_topk_logits_groups = []
+ enc_topk_bboxes_groups = []
+ reference_points_groups = []
+ topk_ind_groups = []
+ for g_id in range(self.dual_groups):
+ enc_outputs_class_gid = self.enc_score_head_dq[g_id](
+ output_memory[g_id + 1])
+ enc_outputs_coord_unact_gid = self.enc_bbox_head_dq[g_id](
+ output_memory[g_id + 1]) + output_anchors
+ _, topk_ind_gid = paddle.topk(
+ enc_outputs_class_gid.max(-1), self.num_queries, axis=1)
+ # extract region proposal boxes
+ batch_ind = paddle.arange(end=bs, dtype=topk_ind_gid.dtype)
+ batch_ind = batch_ind.unsqueeze(-1).tile([1, self.num_queries])
+ topk_ind_gid = paddle.stack([batch_ind, topk_ind_gid], axis=-1)
+ topk_coords_unact_gid = paddle.gather_nd(
+ enc_outputs_coord_unact_gid, topk_ind_gid) # unsigmoided.
+ enc_topk_bboxes_gid = F.sigmoid(topk_coords_unact_gid)
+ reference_points_gid = enc_topk_bboxes_gid.detach()
+ enc_topk_logits_gid = paddle.gather_nd(enc_outputs_class_gid,
+ topk_ind_gid)
+
+ # append and combine
+ topk_ind_groups.append(topk_ind_gid)
+ enc_topk_logits_groups.append(enc_topk_logits_gid)
+ enc_topk_bboxes_groups.append(enc_topk_bboxes_gid)
+ reference_points_groups.append(reference_points_gid)
+
+ enc_topk_bboxes = paddle.concat(
+ [enc_topk_bboxes] + enc_topk_bboxes_groups, 1)
+ enc_topk_logits = paddle.concat(
+ [enc_topk_logits] + enc_topk_logits_groups, 1)
+ reference_points = paddle.concat(
+ [reference_points] + reference_points_groups, 1)
+ topk_ind = paddle.concat([topk_ind] + topk_ind_groups, 1)
+
+ # extract region features
+ if self.learnt_init_query:
+ target = self.tgt_embed.weight.unsqueeze(0).tile([bs, 1, 1])
+ if self.dual_queries:
+ target = paddle.concat([target] + [
+ self.tgt_embed_dual[g_id].weight.unsqueeze(0).tile(
+ [bs, 1, 1]) for g_id in range(self.dual_groups)
+ ], 1)
+ else:
+ if self.dual_queries:
+ target = paddle.gather_nd(output_memory[0], topk_ind)
+ target_groups = []
+ for g_id in range(self.dual_groups):
+ target_gid = paddle.gather_nd(output_memory[g_id + 1],
+ topk_ind_groups[g_id])
+ target_groups.append(target_gid)
+ target = paddle.concat([target] + target_groups, 1).detach()
+ else:
+ target = paddle.gather_nd(output_memory, topk_ind).detach()
+
+ if denoising_bbox is not None:
+ if isinstance(denoising_bbox, list) and isinstance(
+ denoising_class, list) and self.dual_queries:
+ if denoising_bbox[0] is not None:
+ reference_points_list = paddle.split(
+ reference_points, self.dual_groups + 1, axis=1)
+ reference_points = paddle.concat(
+ [
+ paddle.concat(
+ [ref, ref_], axis=1)
+ for ref, ref_ in zip(denoising_bbox,
+ reference_points_list)
+ ],
+ axis=1)
+
+ target_list = paddle.split(
+ target, self.dual_groups + 1, axis=1)
+ target = paddle.concat(
+ [
+ paddle.concat(
+ [tgt, tgt_], axis=1)
+ for tgt, tgt_ in zip(denoising_class, target_list)
+ ],
+ axis=1)
+ else:
+ reference_points, target = reference_points, target
+ else:
+ reference_points = paddle.concat(
+ [denoising_bbox, reference_points], 1)
+ target = paddle.concat([denoising_class, target], 1)
+
+ return target, reference_points, enc_topk_bboxes, enc_topk_logits
From 237b19d951091401d9d4be04daa681391196c077 Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Tue, 7 Mar 2023 11:24:39 +0800
Subject: [PATCH 033/116] fix ppyoloe distill and docs (#7875)
* fix ppyoloe_distill and docs
* fix docs, test=document_fix
---
configs/ppyoloe/README.md | 25 +++++++++++++++++++++----
configs/ppyoloe/README_cn.md | 18 ++++++++++++++++++
configs/ppyoloe/objects365/README_cn.md | 8 ++++++++
configs/slim/README.md | 3 ++-
configs/slim/README_en.md | 3 ++-
ppdet/slim/distill_model.py | 5 ++---
6 files changed, 53 insertions(+), 9 deletions(-)
diff --git a/configs/ppyoloe/README.md b/configs/ppyoloe/README.md
index 1c90e8ad691..f8f79344e35 100644
--- a/configs/ppyoloe/README.md
+++ b/configs/ppyoloe/README.md
@@ -40,9 +40,13 @@ PP-YOLOE is composed of following methods:
|:--------------:|:-----:|:-------:|:----------:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
| PP-YOLOE+_s | 80 | 8 | 8 | cspresnet-s | 640 | 43.7 | 43.9 | 7.93 | 17.36 | 208.3 | 333.3 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco.pdparams) | [config](./ppyoloe_plus_crn_s_80e_coco.yml) |
| PP-YOLOE+_m | 80 | 8 | 8 | cspresnet-m | 640 | 49.8 | 50.0 | 23.43 | 49.91 | 123.4 | 208.3 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_m_80e_coco.pdparams) | [config](./ppyoloe_plus_crn_m_80e_coco.yml) |
+| PP-YOLOE+_m(distill) | 80 | 8 | 8 | cspresnet-m | 640 | **51.0** | 51.2 | 23.43 | 49.91 | 123.4 | 208.3 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_m_80e_coco_distill.pdparams) | [config](distill/ppyoloe_plus_crn_m_80e_coco_distill.yml) |
| PP-YOLOE+_l | 80 | 8 | 8 | cspresnet-l | 640 | 52.9 | 53.3 | 52.20 | 110.07 | 78.1 | 149.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams) | [config](./ppyoloe_plus_crn_l_80e_coco.yml) |
+| PP-YOLOE+_l(distill) | 80 | 8 | 8 | cspresnet-l | 640 | **54.0** | 54.4 | 52.20 | 110.07 | 78.1 | 149.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams) | [config](distill/ppyoloe_plus_crn_l_80e_coco_distill.yml) |
| PP-YOLOE+_x | 80 | 8 | 8 | cspresnet-x | 640 | 54.7 | 54.9 | 98.42 | 206.59 | 45.0 | 95.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_x_80e_coco.pdparams) | [config](./ppyoloe_plus_crn_x_80e_coco.yml) |
+**Note:**
+- M and L models use distillation, please refer to [distill](distill) for details.
#### Tiny model
@@ -57,10 +61,15 @@ PP-YOLOE is composed of following methods:
### Comprehensive Metrics
| Model | Epoch | AP0.5:0.95 | AP0.5 | AP0.75 | APsmall | APmedium | APlarge | ARsmall | ARmedium | ARlarge |
|:------------------------:|:-----:|:---------------:|:----------:|:------------:|:------------:| :-----------: |:------------:|:------------:|:-------------:|:------------:|
-| PP-YOLOE+_s | 80 | 43.7 | 60.6 | 47.9 | 26.5 | 47.5 | 59.0 | 46.7 | 71.4 | 81.7 |
-| PP-YOLOE+_m | 80 | 49.8 | 67.1 | 54.5 | 31.8 | 53.9 | 66.2 | 53.3 | 75.0 | 84.6 |
-| PP-YOLOE+_l | 80 | 52.9 | 70.1 | 57.9 | 35.2 | 57.5 | 69.1 | 56.0 | 77.9 | 86.9 |
-| PP-YOLOE+_x | 80 | 54.7 | 72.0 | 59.9 | 37.9 | 59.3 | 70.4 | 57.0 | 78.7 | 87.2 |
+| PP-YOLOE+_s | 80 | 43.7 | 60.6 | 47.9 | 26.5 | 47.5 | 59.0 | 46.7 | 71.4 | 81.7 |
+| PP-YOLOE+_m | 80 | 49.8 | 67.1 | 54.5 | 31.8 | 53.9 | 66.2 | 53.3 | 75.0 | 84.6 |
+| PP-YOLOE+_m(distill)| 80 | 51.0 | 68.1 | 55.8 | 32.5 | 55.7 | 67.4 | 51.9 | 76.1 | 86.4 |
+| PP-YOLOE+_l | 80 | 52.9 | 70.1 | 57.9 | 35.2 | 57.5 | 69.1 | 56.0 | 77.9 | 86.9 |
+| PP-YOLOE+_l(distill)| 80 | 54.0 | 71.2 | 59.2 | 36.1 | 58.8 | 70.4 | 55.0 | 78.7 | 87.7 |
+| PP-YOLOE+_x | 80 | 54.7 | 72.0 | 59.9 | 37.9 | 59.3 | 70.4 | 57.0 | 78.7 | 87.2 |
+
+**Note:**
+- M and L models use distillation, please refer to [distill](distill) for details.
### End-to-end Speed
@@ -92,6 +101,14 @@ PP-YOLOE is composed of following methods:
**Notes:**
- The Details for multiple machine and multi-gpu training, see [DistributedTraining](../../docs/tutorials/DistributedTraining_en.md)
+- For Objects365 dataset download, please refer to [objects365 official website](http://www.objects365.org/overview.html). The specific category list can be downloaded from [objects365_detection_label_list.txt](https://bj.bcebos.com/v1/paddledet/data/objects365/objects365_detection_label_list.txt) organized by PaddleDetection team. It should be stored in `dataset/objects365/`, and each line represents one category. The categories need to be read when exporting the model or doing inference. If the json file is not exist, you can make the following changes to `configs/datasets/objects365_detection.yml`:
+```
+TestDataset:
+ !ImageFolder
+ # anno_path: annotations/zhiyuan_objv2_val.json
+ anno_path: objects365_detection_label_list.txt
+ dataset_dir: dataset/objects365/
+```
### Model Zoo on VOC
diff --git a/configs/ppyoloe/README_cn.md b/configs/ppyoloe/README_cn.md
index 6f0288d126d..fcd0624a0ae 100644
--- a/configs/ppyoloe/README_cn.md
+++ b/configs/ppyoloe/README_cn.md
@@ -40,9 +40,14 @@ PP-YOLOE由以下方法组成
|:---------------:|:-----:|:---------:|:--------:|:----------:|:----------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
| PP-YOLOE+_s | 80 | 8 | 8 | cspresnet-s | 640 | 43.7 | 43.9 | 7.93 | 17.36 | 208.3 | 333.3 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco.pdparams) | [config](./ppyoloe_plus_crn_s_80e_coco.yml) |
| PP-YOLOE+_m | 80 | 8 | 8 | cspresnet-m | 640 | 49.8 | 50.0 | 23.43 | 49.91 | 123.4 | 208.3 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_m_80e_coco.pdparams) | [config](./ppyoloe_plus_crn_m_80e_coco.yml) |
+| PP-YOLOE+_m(distill) | 80 | 8 | 8 | cspresnet-m | 640 | **51.0** | 51.2 | 23.43 | 49.91 | 123.4 | 208.3 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_m_80e_coco_distill.pdparams) | [config](distill/ppyoloe_plus_crn_m_80e_coco_distill.yml) |
| PP-YOLOE+_l | 80 | 8 | 8 | cspresnet-l | 640 | 52.9 | 53.3 | 52.20 | 110.07 | 78.1 | 149.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams) | [config](./ppyoloe_plus_crn_l_80e_coco.yml) |
+| PP-YOLOE+_l(distill) | 80 | 8 | 8 | cspresnet-l | 640 | **54.0** | 54.4 | 52.20 | 110.07 | 78.1 | 149.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams) | [config](distill/ppyoloe_plus_crn_l_80e_coco_distill.yml) |
| PP-YOLOE+_x | 80 | 8 | 8 | cspresnet-x | 640 | 54.7 | 54.9 | 98.42 | 206.59 | 45.0 | 95.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_x_80e_coco.pdparams) | [config](./ppyoloe_plus_crn_x_80e_coco.yml) |
+**注意:**:
+- M和L模型使用了蒸馏,具体请参考[distill](distill)。
+
#### Tiny模型
| 模型 | Epoch | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | 模型下载 | 配置文件 |
@@ -58,9 +63,14 @@ PP-YOLOE由以下方法组成
|:------------------------:|:-----:|:---------------:|:----------:|:-----------:|:------------:|:-------------:|:------------:|:------------:|:-------------:|:------------:|
| PP-YOLOE+_s | 80 | 43.7 | 60.6 | 47.9 | 26.5 | 47.5 | 59.0 | 46.7 | 71.4 | 81.7 |
| PP-YOLOE+_m | 80 | 49.8 | 67.1 | 54.5 | 31.8 | 53.9 | 66.2 | 53.3 | 75.0 | 84.6 |
+| PP-YOLOE+_m(distill)| 80 | 51.0 | 68.1 | 55.8 | 32.5 | 55.7 | 67.4 | 51.9 | 76.1 | 86.4 |
| PP-YOLOE+_l | 80 | 52.9 | 70.1 | 57.9 | 35.2 | 57.5 | 69.1 | 56.0 | 77.9 | 86.9 |
+| PP-YOLOE+_l(distill)| 80 | 54.0 | 71.2 | 59.2 | 36.1 | 58.8 | 70.4 | 55.0 | 78.7 | 87.7 |
| PP-YOLOE+_x | 80 | 54.7 | 72.0 | 59.9 | 37.9 | 59.3 | 70.4 | 57.0 | 78.7 | 87.2 |
+**注意:**:
+- M和L模型使用了蒸馏,具体请参考[distill](distill)。
+
### 端到端速度
| 模型 | AP0.5:0.95 | TRT-FP32(fps) | TRT-FP16(fps) |
@@ -91,6 +101,14 @@ PP-YOLOE由以下方法组成
**注意:**
- 多机训练细节见[文档](../../docs/tutorials/DistributedTraining_cn.md)
+- Objects365数据集下载请参考[objects365官网](http://www.objects365.org/overview.html)。具体种类列表可下载由PaddleDetection团队整理的[objects365_detection_label_list.txt](https://bj.bcebos.com/v1/paddledet/data/objects365/objects365_detection_label_list.txt)并存放在`dataset/objects365/`,每一行即表示第几个种类。inference或导出模型时需要读取到种类数,如果没有标注json文件时,可以进行如下更改`configs/datasets/objects365_detection.yml`:
+```
+TestDataset:
+ !ImageFolder
+ # anno_path: annotations/zhiyuan_objv2_val.json
+ anno_path: objects365_detection_label_list.txt
+ dataset_dir: dataset/objects365/
+```
### VOC数据集模型库
diff --git a/configs/ppyoloe/objects365/README_cn.md b/configs/ppyoloe/objects365/README_cn.md
index 8018d03c62d..12f691bdaca 100644
--- a/configs/ppyoloe/objects365/README_cn.md
+++ b/configs/ppyoloe/objects365/README_cn.md
@@ -13,3 +13,11 @@
**注意:**
- 多机训练细节见[文档](../../../docs/tutorials/DistributedTraining_cn.md)
+- Objects365数据集下载请参考[objects365官网](http://www.objects365.org/overview.html)。具体种类列表可下载由PaddleDetection团队整理的[objects365_detection_label_list.txt](https://bj.bcebos.com/v1/paddledet/data/objects365/objects365_detection_label_list.txt)并存放在`dataset/objects365/`,每一行即表示第几个种类。inference或导出模型时需要读取到种类数,如果没有标注json文件时,可以进行如下更改`configs/datasets/objects365_detection.yml`:
+```
+TestDataset:
+ !ImageFolder
+ # anno_path: annotations/zhiyuan_objv2_val.json
+ anno_path: objects365_detection_label_list.txt
+ dataset_dir: dataset/objects365/
+```
diff --git a/configs/slim/README.md b/configs/slim/README.md
index 4eabd73b570..6d717f51330 100755
--- a/configs/slim/README.md
+++ b/configs/slim/README.md
@@ -48,6 +48,7 @@ python tools/train.py -c configs/{MODEL.yml} --slim_config configs/slim/{SLIM_CO
- `-c`: 指定模型配置文件。
- `--slim_config`: 指定压缩策略配置文件。
+- 如果选择使用蒸馏,具体蒸馏方法和更多检测模型的蒸馏,请参考[蒸馏策略文档](distill/README.md)
### 评估
@@ -169,7 +170,7 @@ python3.7 tools/post_quant.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml --sli
| YOLOv3-MobileNetV1 | baseline | 608 | 29.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | - |
| YOLOv3-MobileNetV1 | 蒸馏 | 608 | 31.0(+1.6) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml) |
-- 具体蒸馏方法请参考[蒸馏策略文档](distill/README.md)
+- 具体蒸馏方法和更多检测模型的蒸馏,请参考[蒸馏策略文档](distill/README.md)
### 蒸馏剪裁联合策略
diff --git a/configs/slim/README_en.md b/configs/slim/README_en.md
index 2aef83c4267..7bbb35f46d8 100755
--- a/configs/slim/README_en.md
+++ b/configs/slim/README_en.md
@@ -46,6 +46,7 @@ python tools/train.py -c configs/{MODEL.yml} --slim_config configs/slim/{SLIM_CO
- `-c`: Specify the model configuration file.
- `--slim_config`: Specify the compression policy profile.
+- If you want to use distillation, please refer to [Distillation Doc](distill/README.md) for specific distillation methods and more distillation of detection models.
### Evaluation
@@ -156,7 +157,7 @@ Description:
| YOLOv3-MobileNetV1 | baseline | 608 | 29.4 | [link](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [Configuration File ](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | - |
| YOLOv3-MobileNetV1 | Distillation | 608 | 31.0(+1.6) | [link](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams) | [Configuration File ](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slimConfiguration File ](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml) |
-- Please refer to the specific distillation method[Distillation Policy Document](distill/README.md)
+- For the specific distillation method and more distillation detection models, please refer to [distill](distill/README.md).
### Distillation Pruning Combined Strategy
diff --git a/ppdet/slim/distill_model.py b/ppdet/slim/distill_model.py
index 96e13663813..4fa3ccc83e0 100644
--- a/ppdet/slim/distill_model.py
+++ b/ppdet/slim/distill_model.py
@@ -332,12 +332,11 @@ def forward(self, inputs, alpha=0.125):
with paddle.no_grad():
teacher_loss = self.teacher_model(inputs)
if hasattr(self.teacher_model.yolo_head, "assigned_labels"):
- self.student_model.yolo_head.assigned_labels, self.student_model.yolo_head.assigned_bboxes, self.student_model.yolo_head.assigned_scores, self.student_model.yolo_head.mask_positive = \
- self.teacher_model.yolo_head.assigned_labels, self.teacher_model.yolo_head.assigned_bboxes, self.teacher_model.yolo_head.assigned_scores, self.teacher_model.yolo_head.mask_positive
+ self.student_model.yolo_head.assigned_labels, self.student_model.yolo_head.assigned_bboxes, self.student_model.yolo_head.assigned_scores = \
+ self.teacher_model.yolo_head.assigned_labels, self.teacher_model.yolo_head.assigned_bboxes, self.teacher_model.yolo_head.assigned_scores
delattr(self.teacher_model.yolo_head, "assigned_labels")
delattr(self.teacher_model.yolo_head, "assigned_bboxes")
delattr(self.teacher_model.yolo_head, "assigned_scores")
- delattr(self.teacher_model.yolo_head, "mask_positive")
student_loss = self.student_model(inputs)
logits_loss, feat_loss = self.distill_loss(self.teacher_model,
From 653604c05ba15ee204b3b8a5642108758df78327 Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Tue, 7 Mar 2023 11:45:00 +0800
Subject: [PATCH 034/116] Fix swin and add swin ppyoloe (#7857)
* refine swin configs and codes
* fix swin ppyoloe
* fix swin for ema and distill training
* fix configs for CI
* fix docs, test=document_fix
---
configs/faster_rcnn/README.md | 2 +-
.../faster_rcnn/_base_/optimizer_swin_1x.yml | 6 +-
.../faster_rcnn_swin_tiny_fpn_2x_coco.yml | 6 --
.../faster_rcnn_swin_tiny_fpn_3x_coco.yml | 22 -----
configs/swin/README.md | 26 ++++++
.../faster_rcnn_swin_tiny_fpn_3x_coco.yml | 82 +++++++++++++++++++
.../swin/ppyoloe_plus_swin_tiny_36e_coco.yml | 67 +++++++++++++++
ppdet/modeling/backbones/swin_transformer.py | 20 ++---
ppdet/modeling/transformers/utils.py | 4 +-
9 files changed, 186 insertions(+), 49 deletions(-)
delete mode 100644 configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_3x_coco.yml
create mode 100644 configs/swin/README.md
create mode 100644 configs/swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml
create mode 100644 configs/swin/ppyoloe_plus_swin_tiny_36e_coco.yml
diff --git a/configs/faster_rcnn/README.md b/configs/faster_rcnn/README.md
index da495599ce1..8ba30cbcbd7 100644
--- a/configs/faster_rcnn/README.md
+++ b/configs/faster_rcnn/README.md
@@ -23,7 +23,7 @@
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 2x | ---- | 42.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
| Swin-Tiny-FPN | Faster | 2 | 1x | ---- | 42.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_1x_coco.yml) |
| Swin-Tiny-FPN | Faster | 2 | 2x | ---- | 44.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_2x_coco.yml) |
-| Swin-Tiny-FPN | Faster | 2 | 3x | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
+| Swin-Tiny-FPN | Faster | 2 | 3x | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](../swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
## Citations
```
diff --git a/configs/faster_rcnn/_base_/optimizer_swin_1x.yml b/configs/faster_rcnn/_base_/optimizer_swin_1x.yml
index 5c1c6679940..66de8f0b5d9 100644
--- a/configs/faster_rcnn/_base_/optimizer_swin_1x.yml
+++ b/configs/faster_rcnn/_base_/optimizer_swin_1x.yml
@@ -15,8 +15,6 @@ OptimizerBuilder:
optimizer:
type: AdamW
weight_decay: 0.05
-
param_groups:
- -
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
- weight_decay: 0.
+ - params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
+ weight_decay: 0.0
diff --git a/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_2x_coco.yml b/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_2x_coco.yml
index 5848c4943b4..902dcbe831a 100644
--- a/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_2x_coco.yml
+++ b/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_2x_coco.yml
@@ -14,9 +14,3 @@ LearningRate:
- !LinearWarmup
start_factor: 0.1
steps: 1000
-
-OptimizerBuilder:
- clip_grad_by_norm: 1.0
- optimizer:
- type: AdamW
- weight_decay: 0.05
diff --git a/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_3x_coco.yml b/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_3x_coco.yml
deleted file mode 100644
index a1b68cf4703..00000000000
--- a/configs/faster_rcnn/faster_rcnn_swin_tiny_fpn_3x_coco.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-_BASE_: [
- 'faster_rcnn_swin_tiny_fpn_1x_coco.yml',
-]
-weights: output/faster_rcnn_swin_tiny_fpn_3x_coco/model_final
-
-epoch: 36
-
-LearningRate:
- base_lr: 0.0001
- schedulers:
- - !PiecewiseDecay
- gamma: 0.1
- milestones: [24, 33]
- - !LinearWarmup
- start_factor: 0.1
- steps: 1000
-
-OptimizerBuilder:
- clip_grad_by_norm: 1.0
- optimizer:
- type: AdamW
- weight_decay: 0.05
diff --git a/configs/swin/README.md b/configs/swin/README.md
new file mode 100644
index 00000000000..617ee67d3ff
--- /dev/null
+++ b/configs/swin/README.md
@@ -0,0 +1,26 @@
+# Swin Transformer
+
+## COCO Model Zoo
+
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| swin_T_224 | Faster R-CNN | 2 | 36e | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
+| swin_T_224 | PP-YOLOE+ | 8 | 36e | ---- | 43.6 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_swin_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_plus_swin_tiny_36e_coco.yml) |
+
+
+## Citations
+```
+@article{liu2021Swin,
+ title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows},
+ author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},
+ journal={arXiv preprint arXiv:2103.14030},
+ year={2021}
+}
+
+@inproceedings{liu2021swinv2,
+ title={Swin Transformer V2: Scaling Up Capacity and Resolution},
+ author={Ze Liu and Han Hu and Yutong Lin and Zhuliang Yao and Zhenda Xie and Yixuan Wei and Jia Ning and Yue Cao and Zheng Zhang and Li Dong and Furu Wei and Baining Guo},
+ booktitle={International Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year={2022}
+}
+```
diff --git a/configs/swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml b/configs/swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml
new file mode 100644
index 00000000000..3fb2da3dde1
--- /dev/null
+++ b/configs/swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml
@@ -0,0 +1,82 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
+ '../faster_rcnn/_base_/faster_fpn_reader.yml',
+]
+weights: output/faster_rcnn_swin_tiny_fpn_3x_coco/model_final
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
+
+
+FasterRCNN:
+ backbone: SwinTransformer
+ neck: FPN
+ rpn_head: RPNHead
+ bbox_head: BBoxHead
+ bbox_post_process: BBoxPostProcess
+
+SwinTransformer:
+ arch: 'swin_T_224' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
+ ape: false
+ drop_path_rate: 0.1
+ patch_norm: true
+ out_indices: [0, 1, 2, 3]
+
+
+worker_num: 2
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResizeCrop: {resizes: [400, 500, 600], cropsizes: [[384, 600], ], prob: 0.5}
+ - RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 2}
+ - RandomFlip: {prob: 0.5}
+ - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 2
+ shuffle: true
+ drop_last: true
+ collate_batch: false
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 1
+
+TestReader:
+ inputs_def:
+ image_shape: [-1, 3, 640, 640] # TODO deploy: set fixes shape currently
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: 640, keep_ratio: True}
+ - Pad: {size: 640}
+ - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+ - Permute: {}
+ batch_size: 1
+
+
+epoch: 36
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [24, 33]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
+
+OptimizerBuilder:
+ clip_grad_by_norm: 1.0
+ optimizer:
+ type: AdamW
+ weight_decay: 0.05
+ param_groups:
+ - params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
+ weight_decay: 0.0
diff --git a/configs/swin/ppyoloe_plus_swin_tiny_36e_coco.yml b/configs/swin/ppyoloe_plus_swin_tiny_36e_coco.yml
new file mode 100644
index 00000000000..a5403d86e84
--- /dev/null
+++ b/configs/swin/ppyoloe_plus_swin_tiny_36e_coco.yml
@@ -0,0 +1,67 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_crn.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_reader.yml',
+]
+depth_mult: 0.33 # s version
+width_mult: 0.50
+
+log_iter: 50
+snapshot_epoch: 4
+weights: output/ppyoloe_plus_swin_tiny_36e_coco/model_final
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
+
+
+architecture: PPYOLOE
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+PPYOLOE:
+ backbone: SwinTransformer
+ neck: CustomCSPPAN
+ yolo_head: PPYOLOEHead
+ post_process: ~
+
+SwinTransformer:
+ arch: 'swin_T_224' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
+ ape: false
+ drop_path_rate: 0.1
+ patch_norm: true
+ out_indices: [1, 2, 3]
+
+PPYOLOEHead:
+ static_assigner_epoch: 12
+ nms:
+ nms_top_k: 10000
+ keep_top_k: 300
+ score_threshold: 0.01
+ nms_threshold: 0.7
+
+
+TrainReader:
+ batch_size: 8
+
+
+epoch: 36
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [24, 33]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
+
+OptimizerBuilder:
+ clip_grad_by_norm: 1.0
+ optimizer:
+ type: AdamW
+ weight_decay: 0.05
+ param_groups:
+ - params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
+ weight_decay: 0.0
diff --git a/ppdet/modeling/backbones/swin_transformer.py b/ppdet/modeling/backbones/swin_transformer.py
index 8a581b763d6..64aabab4781 100644
--- a/ppdet/modeling/backbones/swin_transformer.py
+++ b/ppdet/modeling/backbones/swin_transformer.py
@@ -191,8 +191,6 @@ def __init__(self,
relative_coords[:, :, 1] += self.window_size[1] - 1
relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
self.relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww
- self.register_buffer("relative_position_index",
- self.relative_position_index)
self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
@@ -425,7 +423,6 @@ class BasicLayer(nn.Layer):
""" A basic Swin Transformer layer for one stage.
Args:
dim (int): Number of input channels.
- input_resolution (tuple[int]): Input resolution.
depth (int): Number of blocks.
num_heads (int): Number of attention heads.
window_size (int): Local window size.
@@ -500,10 +497,7 @@ def forward(self, x, H, W):
cnt = 0
for h in h_slices:
for w in w_slices:
- try:
- img_mask[:, h, w, :] = cnt
- except:
- pass
+ img_mask[:, h, w, :] = cnt
cnt += 1
@@ -572,15 +566,12 @@ def forward(self, x):
@register
@serializable
class SwinTransformer(nn.Layer):
- """ Swin Transformer
- A PaddlePaddle impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` -
- https://arxiv.org/pdf/2103.14030
-
+ """ Swin Transformer backbone
Args:
- img_size (int | tuple(int)): Input image size. Default 224
+ arch (str): Architecture of FocalNet
+ pretrain_img_size (int | tuple(int)): Input image size. Default 224
patch_size (int | tuple(int)): Patch size. Default: 4
in_chans (int): Number of input image channels. Default: 3
- num_classes (int): Number of classes for classification head. Default: 1000
embed_dim (int): Patch embedding dimension. Default: 96
depths (tuple(int)): Depth of each Swin Transformer layer.
num_heads (tuple(int)): Number of attention heads in different layers.
@@ -619,6 +610,7 @@ def __init__(self,
pretrained=None):
super(SwinTransformer, self).__init__()
assert arch in MODEL_cfg.keys(), "Unsupported arch: {}".format(arch)
+
pretrain_img_size = MODEL_cfg[arch]['pretrain_img_size']
embed_dim = MODEL_cfg[arch]['embed_dim']
depths = MODEL_cfg[arch]['depths']
@@ -748,7 +740,7 @@ def forward(self, x):
(0, 3, 1, 2))
outs.append(out)
- return tuple(outs)
+ return outs
@property
def out_shape(self):
diff --git a/ppdet/modeling/transformers/utils.py b/ppdet/modeling/transformers/utils.py
index d8f869fbc27..c41b069cffb 100644
--- a/ppdet/modeling/transformers/utils.py
+++ b/ppdet/modeling/transformers/utils.py
@@ -236,7 +236,7 @@ def get_sine_pos_embed(pos_tensor,
"""generate sine position embedding from a position tensor
Args:
- pos_tensor (torch.Tensor): Shape as `(None, n)`.
+ pos_tensor (Tensor): Shape as `(None, n)`.
num_pos_feats (int): projected shape for each float in the tensor. Default: 128
temperature (int): The temperature used for scaling
the position embedding. Default: 10000.
@@ -245,7 +245,7 @@ def get_sine_pos_embed(pos_tensor,
be `[pos(y), pos(x)]`. Defaults: True.
Returns:
- torch.Tensor: Returned position embedding # noqa
+ Tensor: Returned position embedding # noqa
with shape `(None, n * num_pos_feats)`.
"""
scale = 2. * math.pi
From 6eb4784d8b1645ac94f629db09b1af7043902527 Mon Sep 17 00:00:00 2001
From: shangliang Xu
Date: Tue, 7 Mar 2023 20:20:25 +0800
Subject: [PATCH 035/116] [mask dino] add mask_dino model (#7887)
align torch dn code
merge deformable dino mask-dino same code
reset norm attr
fix dino amp training
fix bbox_pred in detr postprocess
---
.../_base_/deformable_detr_r50.yml | 4 +-
.../_base_/deformable_detr_reader.yml | 4 -
configs/detr/_base_/detr_r50.yml | 4 +-
configs/detr/_base_/detr_reader.yml | 4 -
configs/dino/_base_/dino_focalnet.yml | 4 +-
configs/dino/_base_/dino_r50.yml | 4 +-
configs/dino/_base_/dino_reader.yml | 4 -
configs/dino/_base_/dino_swin.yml | 4 +-
.../group_detr/_base_/dino_2000_reader.yml | 4 -
configs/group_detr/_base_/dino_reader.yml | 4 -
configs/group_detr/_base_/group_dino_r50.yml | 4 +-
.../group_detr/_base_/group_dino_vit_huge.yml | 4 +-
ppdet/data/reader.py | 2 +-
ppdet/data/transform/operators.py | 118 ++--
ppdet/modeling/architectures/detr.py | 21 +-
ppdet/modeling/backbones/resnet.py | 8 +-
ppdet/modeling/heads/detr_head.py | 74 ++-
ppdet/modeling/layers.py | 2 +-
ppdet/modeling/losses/detr_loss.py | 345 ++++++++---
ppdet/modeling/post_process.py | 62 +-
ppdet/modeling/transformers/__init__.py | 2 +
.../transformers/deformable_transformer.py | 85 ++-
.../modeling/transformers/detr_transformer.py | 6 +-
.../modeling/transformers/dino_transformer.py | 240 ++------
.../transformers/mask_dino_transformer.py | 536 ++++++++++++++++++
ppdet/modeling/transformers/matchers.py | 79 ++-
ppdet/modeling/transformers/utils.py | 138 ++++-
27 files changed, 1363 insertions(+), 403 deletions(-)
create mode 100644 ppdet/modeling/transformers/mask_dino_transformer.py
diff --git a/configs/deformable_detr/_base_/deformable_detr_r50.yml b/configs/deformable_detr/_base_/deformable_detr_r50.yml
index 641129a6e51..81ae696f833 100644
--- a/configs/deformable_detr/_base_/deformable_detr_r50.yml
+++ b/configs/deformable_detr/_base_/deformable_detr_r50.yml
@@ -8,7 +8,7 @@ DETR:
backbone: ResNet
transformer: DeformableTransformer
detr_head: DeformableDETRHead
- post_process: DETRBBoxPostProcess
+ post_process: DETRPostProcess
ResNet:
@@ -40,7 +40,7 @@ DeformableDETRHead:
DETRLoss:
- loss_coeff: {class: 2, bbox: 5, giou: 2, mask: 1, dice: 1}
+ loss_coeff: {class: 2, bbox: 5, giou: 2}
aux_loss: True
diff --git a/configs/deformable_detr/_base_/deformable_detr_reader.yml b/configs/deformable_detr/_base_/deformable_detr_reader.yml
index c15a0f3b639..891a4979075 100644
--- a/configs/deformable_detr/_base_/deformable_detr_reader.yml
+++ b/configs/deformable_detr/_base_/deformable_detr_reader.yml
@@ -28,8 +28,6 @@ EvalReader:
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
shuffle: false
drop_last: false
@@ -41,8 +39,6 @@ TestReader:
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
shuffle: false
drop_last: false
diff --git a/configs/detr/_base_/detr_r50.yml b/configs/detr/_base_/detr_r50.yml
index 5006f11937c..17d9ef86a8e 100644
--- a/configs/detr/_base_/detr_r50.yml
+++ b/configs/detr/_base_/detr_r50.yml
@@ -7,7 +7,7 @@ DETR:
backbone: ResNet
transformer: DETRTransformer
detr_head: DETRHead
- post_process: DETRBBoxPostProcess
+ post_process: DETRPostProcess
ResNet:
@@ -36,7 +36,7 @@ DETRHead:
DETRLoss:
- loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1, mask: 1, dice: 1}
+ loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1}
aux_loss: True
diff --git a/configs/detr/_base_/detr_reader.yml b/configs/detr/_base_/detr_reader.yml
index 997ef724afc..26120694a19 100644
--- a/configs/detr/_base_/detr_reader.yml
+++ b/configs/detr/_base_/detr_reader.yml
@@ -28,8 +28,6 @@ EvalReader:
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
shuffle: false
drop_last: false
@@ -41,8 +39,6 @@ TestReader:
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
shuffle: false
drop_last: false
diff --git a/configs/dino/_base_/dino_focalnet.yml b/configs/dino/_base_/dino_focalnet.yml
index 7313dcf4e18..645d0a23589 100644
--- a/configs/dino/_base_/dino_focalnet.yml
+++ b/configs/dino/_base_/dino_focalnet.yml
@@ -8,7 +8,7 @@ DETR:
backbone: FocalNet
transformer: DINOTransformer
detr_head: DINOHead
- post_process: DETRBBoxPostProcess
+ post_process: DETRPostProcess
FocalNet:
arch: 'focalnet_L_384_22k_fl4'
@@ -41,5 +41,5 @@ DINOHead:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
-DETRBBoxPostProcess:
+DETRPostProcess:
num_top_queries: 300
diff --git a/configs/dino/_base_/dino_r50.yml b/configs/dino/_base_/dino_r50.yml
index 0b151bd4896..4b7dc2e1958 100644
--- a/configs/dino/_base_/dino_r50.yml
+++ b/configs/dino/_base_/dino_r50.yml
@@ -8,7 +8,7 @@ DETR:
backbone: ResNet
transformer: DINOTransformer
detr_head: DINOHead
- post_process: DETRBBoxPostProcess
+ post_process: DETRPostProcess
ResNet:
# index 0 stands for res2
@@ -45,5 +45,5 @@ DINOHead:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
-DETRBBoxPostProcess:
+DETRPostProcess:
num_top_queries: 300
diff --git a/configs/dino/_base_/dino_reader.yml b/configs/dino/_base_/dino_reader.yml
index 0fdf7a8985b..78ff8fd6543 100644
--- a/configs/dino/_base_/dino_reader.yml
+++ b/configs/dino/_base_/dino_reader.yml
@@ -28,8 +28,6 @@ EvalReader:
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
@@ -39,6 +37,4 @@ TestReader:
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
diff --git a/configs/dino/_base_/dino_swin.yml b/configs/dino/_base_/dino_swin.yml
index b2edb961ce5..91c970e64c9 100644
--- a/configs/dino/_base_/dino_swin.yml
+++ b/configs/dino/_base_/dino_swin.yml
@@ -7,7 +7,7 @@ DETR:
backbone: SwinTransformer
transformer: DINOTransformer
detr_head: DINOHead
- post_process: DETRBBoxPostProcess
+ post_process: DETRPostProcess
SwinTransformer:
arch: 'swin_L_384' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
@@ -42,5 +42,5 @@ DINOHead:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
-DETRBBoxPostProcess:
+DETRPostProcess:
num_top_queries: 300
diff --git a/configs/group_detr/_base_/dino_2000_reader.yml b/configs/group_detr/_base_/dino_2000_reader.yml
index ef7620eb856..c37616115b0 100644
--- a/configs/group_detr/_base_/dino_2000_reader.yml
+++ b/configs/group_detr/_base_/dino_2000_reader.yml
@@ -28,8 +28,6 @@ EvalReader:
- Resize: {target_size: [1184, 2000], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
shuffle: false
drop_last: false
@@ -41,8 +39,6 @@ TestReader:
- Resize: {target_size: [1184, 2000], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
shuffle: false
drop_last: false
diff --git a/configs/group_detr/_base_/dino_reader.yml b/configs/group_detr/_base_/dino_reader.yml
index c15a0f3b639..891a4979075 100644
--- a/configs/group_detr/_base_/dino_reader.yml
+++ b/configs/group_detr/_base_/dino_reader.yml
@@ -28,8 +28,6 @@ EvalReader:
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
shuffle: false
drop_last: false
@@ -41,8 +39,6 @@ TestReader:
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
- batch_transforms:
- - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 1
shuffle: false
drop_last: false
diff --git a/configs/group_detr/_base_/group_dino_r50.yml b/configs/group_detr/_base_/group_dino_r50.yml
index 587f7f51943..1feeabd7735 100644
--- a/configs/group_detr/_base_/group_dino_r50.yml
+++ b/configs/group_detr/_base_/group_dino_r50.yml
@@ -8,7 +8,7 @@ DETR:
backbone: ResNet
transformer: GroupDINOTransformer
detr_head: DINOHead
- post_process: DETRBBoxPostProcess
+ post_process: DETRPostProcess
ResNet:
# index 0 stands for res2
@@ -47,7 +47,7 @@ DINOHead:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
-DETRBBoxPostProcess:
+DETRPostProcess:
num_top_queries: 300
dual_queries: True
dual_groups: 10
diff --git a/configs/group_detr/_base_/group_dino_vit_huge.yml b/configs/group_detr/_base_/group_dino_vit_huge.yml
index 8849f8a2d6f..68f318d0542 100644
--- a/configs/group_detr/_base_/group_dino_vit_huge.yml
+++ b/configs/group_detr/_base_/group_dino_vit_huge.yml
@@ -8,7 +8,7 @@ DETR:
neck: SimpleFeaturePyramid
transformer: GroupDINOTransformer
detr_head: DINOHead
- post_process: DETRBBoxPostProcess
+ post_process: DETRPostProcess
VisionTransformer2D:
patch_size: 16
@@ -62,7 +62,7 @@ DINOHead:
matcher_coeff: {class: 2, bbox: 5, giou: 2}
-DETRBBoxPostProcess:
+DETRPostProcess:
num_top_queries: 300
dual_queries: True
dual_groups: 10
diff --git a/ppdet/data/reader.py b/ppdet/data/reader.py
index 227fabca6dc..041f7735d7f 100644
--- a/ppdet/data/reader.py
+++ b/ppdet/data/reader.py
@@ -248,7 +248,7 @@ def __init__(self,
batch_transforms=[],
batch_size=1,
shuffle=False,
- drop_last=True,
+ drop_last=False,
num_classes=80,
**kwargs):
super(EvalReader, self).__init__(sample_transforms, batch_transforms,
diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py
index 61a4aacba02..25f3452993e 100644
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -1782,56 +1782,110 @@ class RandomScaledCrop(BaseOperator):
"""Resize image and bbox based on long side (with optional random scaling),
then crop or pad image to target size.
Args:
- target_dim (int): target size.
+ target_size (int|list): target size, "hw" format.
scale_range (list): random scale range.
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
+ fill_value (float|list|tuple): color value used to fill the canvas,
+ in RGB order.
"""
def __init__(self,
- target_dim=512,
+ target_size=512,
scale_range=[.1, 2.],
- interp=cv2.INTER_LINEAR):
+ interp=cv2.INTER_LINEAR,
+ fill_value=(123.675, 116.28, 103.53)):
super(RandomScaledCrop, self).__init__()
- self.target_dim = target_dim
+ assert isinstance(target_size, (
+ Integral, Sequence)), "target_size must be Integer, List or Tuple"
+ if isinstance(target_size, Integral):
+ target_size = [target_size, ] * 2
+
+ self.target_size = target_size
self.scale_range = scale_range
self.interp = interp
+ assert isinstance(fill_value, (Number, Sequence)), \
+ "fill value must be either float or sequence"
+ if isinstance(fill_value, Number):
+ fill_value = (fill_value, ) * 3
+ if not isinstance(fill_value, tuple):
+ fill_value = tuple(fill_value)
+ self.fill_value = fill_value
+
+ def apply_image(self, img, output_size, offset_x, offset_y):
+ th, tw = self.target_size
+ rh, rw = output_size
+ img = cv2.resize(
+ img, (rw, rh), interpolation=self.interp).astype(np.float32)
+ canvas = np.ones([th, tw, 3], dtype=np.float32)
+ canvas *= np.array(self.fill_value, dtype=np.float32)
+ canvas[:min(th, rh), :min(tw, rw)] = \
+ img[offset_y:offset_y + th, offset_x:offset_x + tw]
+ return canvas
+
+ def apply_bbox(self, gt_bbox, gt_class, scale, offset_x, offset_y):
+ th, tw = self.target_size
+ shift_array = np.array(
+ [
+ offset_x,
+ offset_y,
+ ] * 2, dtype=np.float32)
+ boxes = gt_bbox * scale - shift_array
+ boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, tw)
+ boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, th)
+ # filter boxes with no area
+ area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
+ valid = (area > 1.).nonzero()[0]
+ return boxes[valid], gt_class[valid], valid
+
+ def apply_segm(self, segms, output_size, offset_x, offset_y, valid=None):
+ th, tw = self.target_size
+ rh, rw = output_size
+ out_segms = []
+ for segm in segms:
+ segm = cv2.resize(segm, (rw, rh), interpolation=cv2.INTER_NEAREST)
+ segm = segm.astype(np.float32)
+ canvas = np.zeros([th, tw], dtype=segm.dtype)
+ canvas[:min(th, rh), :min(tw, rw)] = \
+ segm[offset_y:offset_y + th, offset_x:offset_x + tw]
+ out_segms.append(canvas)
+ out_segms = np.stack(out_segms)
+ return out_segms if valid is None else out_segms[valid]
def apply(self, sample, context=None):
img = sample['image']
h, w = img.shape[:2]
random_scale = np.random.uniform(*self.scale_range)
- dim = self.target_dim
- random_dim = int(dim * random_scale)
- dim_max = max(h, w)
- scale = random_dim / dim_max
- resize_w = int(w * scale + 0.5)
- resize_h = int(h * scale + 0.5)
- offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
- offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
-
- img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
- img = np.array(img)
- canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
- canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
- offset_y:offset_y + dim, offset_x:offset_x + dim, :]
- sample['image'] = canvas
- sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
- scale_factor = sample['sacle_factor']
+ target_scale_size = [t * random_scale for t in self.target_size]
+ # Compute actual rescaling applied to image.
+ scale = min(target_scale_size[0] / h, target_scale_size[1] / w)
+ output_size = [int(round(h * scale)), int(round(w * scale))]
+ # get offset
+ offset_x = int(
+ max(0, np.random.uniform(0., output_size[1] - self.target_size[1])))
+ offset_y = int(
+ max(0, np.random.uniform(0., output_size[0] - self.target_size[0])))
+
+ # apply to image
+ sample['image'] = self.apply_image(img, output_size, offset_x, offset_y)
+
+ # apply to bbox
+ valid = None
+ if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+ sample['gt_bbox'], sample['gt_class'], valid = self.apply_bbox(
+ sample['gt_bbox'], sample['gt_class'], scale, offset_x,
+ offset_y)
+
+ # apply to segm
+ if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
+ sample['gt_segm'] = self.apply_segm(sample['gt_segm'], output_size,
+ offset_x, offset_y, valid)
+
+ sample['im_shape'] = np.asarray(output_size, dtype=np.float32)
+ scale_factor = sample['scale_factor']
sample['scale_factor'] = np.asarray(
[scale_factor[0] * scale, scale_factor[1] * scale],
dtype=np.float32)
- if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
- scale_array = np.array([scale, scale] * 2, dtype=np.float32)
- shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
- boxes = sample['gt_bbox'] * scale_array - shift_array
- boxes = np.clip(boxes, 0, dim - 1)
- # filter boxes with no area
- area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
- valid = (area > 1.).nonzero()[0]
- sample['gt_bbox'] = boxes[valid]
- sample['gt_class'] = sample['gt_class'][valid]
-
return sample
diff --git a/ppdet/modeling/architectures/detr.py b/ppdet/modeling/architectures/detr.py
index 223eeda89cb..2d599258592 100644
--- a/ppdet/modeling/architectures/detr.py
+++ b/ppdet/modeling/architectures/detr.py
@@ -28,14 +28,15 @@
class DETR(BaseArch):
__category__ = 'architecture'
__inject__ = ['post_process']
- __shared__ = ['exclude_post_process']
+ __shared__ = ['with_mask', 'exclude_post_process']
def __init__(self,
backbone,
transformer='DETRTransformer',
detr_head='DETRHead',
neck=None,
- post_process='DETRBBoxPostProcess',
+ post_process='DETRPostProcess',
+ with_mask=False,
exclude_post_process=False):
super(DETR, self).__init__()
self.backbone = backbone
@@ -43,6 +44,7 @@ def __init__(self,
self.transformer = transformer
self.detr_head = detr_head
self.post_process = post_process
+ self.with_mask = with_mask
self.exclude_post_process = exclude_post_process
@classmethod
@@ -95,13 +97,16 @@ def _forward(self):
else:
preds = self.detr_head(out_transformer, body_feats)
if self.exclude_post_process:
- bboxes, logits, masks = preds
- return bboxes, logits
+ bbox, bbox_num, mask = preds
else:
- bbox, bbox_num = self.post_process(
- preds, self.inputs['im_shape'], self.inputs['scale_factor'])
- output = {'bbox': bbox, 'bbox_num': bbox_num}
- return output
+ bbox, bbox_num, mask = self.post_process(
+ preds, self.inputs['im_shape'], self.inputs['scale_factor'],
+ paddle.shape(self.inputs['image'])[2:])
+
+ output = {'bbox': bbox, 'bbox_num': bbox_num}
+ if self.with_mask:
+ output['mask'] = mask
+ return output
def get_loss(self):
return self._forward()
diff --git a/ppdet/modeling/backbones/resnet.py b/ppdet/modeling/backbones/resnet.py
index 3b9508c49f9..a64f400d9ee 100755
--- a/ppdet/modeling/backbones/resnet.py
+++ b/ppdet/modeling/backbones/resnet.py
@@ -443,7 +443,8 @@ def __init__(self,
return_idx=[0, 1, 2, 3],
dcn_v2_stages=[-1],
num_stages=4,
- std_senet=False):
+ std_senet=False,
+ freeze_stem_only=False):
"""
Residual Network, see https://arxiv.org/abs/1512.03385
@@ -558,8 +559,9 @@ def __init__(self,
if freeze_at >= 0:
self._freeze_parameters(self.conv1)
- for i in range(min(freeze_at + 1, num_stages)):
- self._freeze_parameters(self.res_layers[i])
+ if not freeze_stem_only:
+ for i in range(min(freeze_at + 1, num_stages)):
+ self._freeze_parameters(self.res_layers[i])
def _freeze_parameters(self, m):
for p in m.parameters():
diff --git a/ppdet/modeling/heads/detr_head.py b/ppdet/modeling/heads/detr_head.py
index 61448e4e0ba..f65a984347c 100644
--- a/ppdet/modeling/heads/detr_head.py
+++ b/ppdet/modeling/heads/detr_head.py
@@ -24,7 +24,7 @@
from ..initializer import linear_init_, constant_
from ..transformers.utils import inverse_sigmoid
-__all__ = ['DETRHead', 'DeformableDETRHead', 'DINOHead']
+__all__ = ['DETRHead', 'DeformableDETRHead', 'DINOHead', 'MaskDINOHead']
class MLP(nn.Layer):
@@ -459,3 +459,75 @@ def forward(self, out_transformer, body_feats, inputs=None):
dn_meta=dn_meta)
else:
return (dec_out_bboxes[-1], dec_out_logits[-1], None)
+
+
+@register
+class MaskDINOHead(nn.Layer):
+ __inject__ = ['loss']
+
+ def __init__(self, loss='DINOLoss'):
+ super(MaskDINOHead, self).__init__()
+ self.loss = loss
+
+ def forward(self, out_transformer, body_feats, inputs=None):
+ (dec_out_logits, dec_out_bboxes, dec_out_masks, enc_out, init_out,
+ dn_meta) = out_transformer
+ if self.training:
+ assert inputs is not None
+ assert 'gt_bbox' in inputs and 'gt_class' in inputs
+ assert 'gt_segm' in inputs
+
+ if dn_meta is not None:
+ dn_out_logits, dec_out_logits = paddle.split(
+ dec_out_logits, dn_meta['dn_num_split'], axis=2)
+ dn_out_bboxes, dec_out_bboxes = paddle.split(
+ dec_out_bboxes, dn_meta['dn_num_split'], axis=2)
+ dn_out_masks, dec_out_masks = paddle.split(
+ dec_out_masks, dn_meta['dn_num_split'], axis=2)
+ if init_out is not None:
+ init_out_logits, init_out_bboxes, init_out_masks = init_out
+ init_out_logits_dn, init_out_logits = paddle.split(
+ init_out_logits, dn_meta['dn_num_split'], axis=1)
+ init_out_bboxes_dn, init_out_bboxes = paddle.split(
+ init_out_bboxes, dn_meta['dn_num_split'], axis=1)
+ init_out_masks_dn, init_out_masks = paddle.split(
+ init_out_masks, dn_meta['dn_num_split'], axis=1)
+
+ dec_out_logits = paddle.concat(
+ [init_out_logits.unsqueeze(0), dec_out_logits])
+ dec_out_bboxes = paddle.concat(
+ [init_out_bboxes.unsqueeze(0), dec_out_bboxes])
+ dec_out_masks = paddle.concat(
+ [init_out_masks.unsqueeze(0), dec_out_masks])
+
+ dn_out_logits = paddle.concat(
+ [init_out_logits_dn.unsqueeze(0), dn_out_logits])
+ dn_out_bboxes = paddle.concat(
+ [init_out_bboxes_dn.unsqueeze(0), dn_out_bboxes])
+ dn_out_masks = paddle.concat(
+ [init_out_masks_dn.unsqueeze(0), dn_out_masks])
+ else:
+ dn_out_bboxes, dn_out_logits = None, None
+ dn_out_masks = None
+
+ enc_out_logits, enc_out_bboxes, enc_out_masks = enc_out
+ out_logits = paddle.concat(
+ [enc_out_logits.unsqueeze(0), dec_out_logits])
+ out_bboxes = paddle.concat(
+ [enc_out_bboxes.unsqueeze(0), dec_out_bboxes])
+ out_masks = paddle.concat(
+ [enc_out_masks.unsqueeze(0), dec_out_masks])
+
+ return self.loss(
+ out_bboxes,
+ out_logits,
+ inputs['gt_bbox'],
+ inputs['gt_class'],
+ masks=out_masks,
+ gt_mask=inputs['gt_segm'],
+ dn_out_logits=dn_out_logits,
+ dn_out_bboxes=dn_out_bboxes,
+ dn_out_masks=dn_out_masks,
+ dn_meta=dn_meta)
+ else:
+ return (dec_out_bboxes[-1], dec_out_logits[-1], dec_out_masks[-1])
diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py
index 16368e81e62..f267b174584 100644
--- a/ppdet/modeling/layers.py
+++ b/ppdet/modeling/layers.py
@@ -1135,6 +1135,7 @@ def _convert_attention_mask(attn_mask, dtype):
"""
return nn.layer.transformer._convert_attention_mask(attn_mask, dtype)
+
@register
class MultiHeadAttention(nn.Layer):
"""
@@ -1296,7 +1297,6 @@ def forward(self, query, key=None, value=None, attn_mask=None):
self.dropout,
training=self.training,
mode="upscale_in_train")
-
out = paddle.matmul(weights, v)
# combine heads
diff --git a/ppdet/modeling/losses/detr_loss.py b/ppdet/modeling/losses/detr_loss.py
index f4291a7a74b..45a2d5e14d7 100644
--- a/ppdet/modeling/losses/detr_loss.py
+++ b/ppdet/modeling/losses/detr_loss.py
@@ -54,8 +54,8 @@ def __init__(self,
use_focal_loss (bool): Use focal loss or not.
"""
super(DETRLoss, self).__init__()
- self.num_classes = num_classes
+ self.num_classes = num_classes
self.matcher = matcher
self.loss_coeff = loss_coeff
self.aux_loss = aux_loss
@@ -76,8 +76,7 @@ def _get_loss_class(self,
postfix=""):
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
name_class = "loss_class" + postfix
- if logits is None:
- return {name_class: paddle.zeros([1])}
+
target_label = paddle.full(logits.shape[:2], bg_index, dtype='int64')
bs, num_query_objects = target_label.shape
if sum(len(a) for a in gt_class) > 0:
@@ -101,8 +100,7 @@ def _get_loss_bbox(self, boxes, gt_bbox, match_indices, num_gts,
# boxes: [b, query, 4], gt_bbox: list[[n, 4]]
name_bbox = "loss_bbox" + postfix
name_giou = "loss_giou" + postfix
- if boxes is None:
- return {name_bbox: paddle.zeros([1]), name_giou: paddle.zeros([1])}
+
loss = dict()
if sum(len(a) for a in gt_bbox) == 0:
loss[name_bbox] = paddle.to_tensor([0.])
@@ -124,8 +122,7 @@ def _get_loss_mask(self, masks, gt_mask, match_indices, num_gts,
# masks: [b, query, h, w], gt_mask: list[[n, H, W]]
name_mask = "loss_mask" + postfix
name_dice = "loss_dice" + postfix
- if masks is None:
- return {name_mask: paddle.zeros([1]), name_dice: paddle.zeros([1])}
+
loss = dict()
if sum(len(a) for a in gt_mask) == 0:
loss[name_mask] = paddle.to_tensor([0.])
@@ -164,20 +161,22 @@ def _get_loss_aux(self,
bg_index,
num_gts,
dn_match_indices=None,
- postfix=""):
- if boxes is None or logits is None:
- return {
- "loss_class_aux" + postfix: paddle.paddle.zeros([1]),
- "loss_bbox_aux" + postfix: paddle.paddle.zeros([1]),
- "loss_giou_aux" + postfix: paddle.paddle.zeros([1])
- }
+ postfix="",
+ masks=None,
+ gt_mask=None):
loss_class = []
- loss_bbox = []
- loss_giou = []
- for aux_boxes, aux_logits in zip(boxes, logits):
+ loss_bbox, loss_giou = [], []
+ loss_mask, loss_dice = [], []
+ for i, (aux_boxes, aux_logits) in enumerate(zip(boxes, logits)):
+ aux_masks = masks[i] if masks is not None else None
if dn_match_indices is None:
- match_indices = self.matcher(aux_boxes, aux_logits, gt_bbox,
- gt_class)
+ match_indices = self.matcher(
+ aux_boxes,
+ aux_logits,
+ gt_bbox,
+ gt_class,
+ masks=aux_masks,
+ gt_mask=gt_mask)
else:
match_indices = dn_match_indices
loss_class.append(
@@ -188,11 +187,19 @@ def _get_loss_aux(self,
num_gts, postfix)
loss_bbox.append(loss_['loss_bbox' + postfix])
loss_giou.append(loss_['loss_giou' + postfix])
+ if masks is not None and gt_mask is not None:
+ loss_ = self._get_loss_mask(aux_masks, gt_mask, match_indices,
+ num_gts, postfix)
+ loss_mask.append(loss_['loss_mask' + postfix])
+ loss_dice.append(loss_['loss_dice' + postfix])
loss = {
"loss_class_aux" + postfix: paddle.add_n(loss_class),
"loss_bbox_aux" + postfix: paddle.add_n(loss_bbox),
"loss_giou_aux" + postfix: paddle.add_n(loss_giou)
}
+ if masks is not None and gt_mask is not None:
+ loss["loss_mask_aux" + postfix] = paddle.add_n(loss_mask)
+ loss["loss_dice_aux" + postfix] = paddle.add_n(loss_dice)
return loss
def _get_index_updates(self, num_query_objects, target, match_indices):
@@ -220,6 +227,44 @@ def _get_src_target_assign(self, src, target, match_indices):
])
return src_assign, target_assign
+ def _get_num_gts(self, targets, dtype="float32"):
+ num_gts = sum(len(a) for a in targets)
+ num_gts = paddle.to_tensor([num_gts], dtype=dtype)
+ if paddle.distributed.get_world_size() > 1:
+ paddle.distributed.all_reduce(num_gts)
+ num_gts /= paddle.distributed.get_world_size()
+ num_gts = paddle.clip(num_gts, min=1.)
+ return num_gts
+
+ def _get_prediction_loss(self,
+ boxes,
+ logits,
+ gt_bbox,
+ gt_class,
+ masks=None,
+ gt_mask=None,
+ postfix="",
+ dn_match_indices=None,
+ num_gts=1):
+ if dn_match_indices is None:
+ match_indices = self.matcher(
+ boxes, logits, gt_bbox, gt_class, masks=masks, gt_mask=gt_mask)
+ else:
+ match_indices = dn_match_indices
+
+ loss = dict()
+ loss.update(
+ self._get_loss_class(logits, gt_class, match_indices,
+ self.num_classes, num_gts, postfix))
+ loss.update(
+ self._get_loss_bbox(boxes, gt_bbox, match_indices, num_gts,
+ postfix))
+ if masks is not None and gt_mask is not None:
+ loss.update(
+ self._get_loss_mask(masks, gt_mask, match_indices, num_gts,
+ postfix))
+ return loss
+
def forward(self,
boxes,
logits,
@@ -231,48 +276,44 @@ def forward(self,
**kwargs):
r"""
Args:
- boxes (Tensor|None): [l, b, query, 4]
- logits (Tensor|None): [l, b, query, num_classes]
+ boxes (Tensor): [l, b, query, 4]
+ logits (Tensor): [l, b, query, num_classes]
gt_bbox (List(Tensor)): list[[n, 4]]
gt_class (List(Tensor)): list[[n, 1]]
- masks (Tensor, optional): [b, query, h, w]
+ masks (Tensor, optional): [l, b, query, h, w]
gt_mask (List(Tensor), optional): list[[n, H, W]]
postfix (str): postfix of loss name
"""
- dn_match_indices = kwargs.get("dn_match_indices", None)
- if dn_match_indices is None and (boxes is not None and
- logits is not None):
- match_indices = self.matcher(boxes[-1].detach(),
- logits[-1].detach(), gt_bbox, gt_class)
- else:
- match_indices = dn_match_indices
- num_gts = sum(len(a) for a in gt_bbox)
- num_gts = paddle.to_tensor([num_gts], dtype="float32")
- if paddle.distributed.get_world_size() > 1:
- paddle.distributed.all_reduce(num_gts)
- num_gts /= paddle.distributed.get_world_size()
- num_gts = paddle.clip(num_gts, min=1.) * kwargs.get("dn_num_group", 1.)
+ dn_match_indices = kwargs.get("dn_match_indices", None)
+ num_gts = kwargs.get("num_gts", None)
+ if num_gts is None:
+ num_gts = self._get_num_gts(gt_class)
- total_loss = dict()
- total_loss.update(
- self._get_loss_class(logits[
- -1] if logits is not None else None, gt_class, match_indices,
- self.num_classes, num_gts, postfix))
- total_loss.update(
- self._get_loss_bbox(boxes[-1] if boxes is not None else None,
- gt_bbox, match_indices, num_gts, postfix))
- if masks is not None and gt_mask is not None:
- total_loss.update(
- self._get_loss_mask(masks if masks is not None else None,
- gt_mask, match_indices, num_gts, postfix))
+ total_loss = self._get_prediction_loss(
+ boxes[-1],
+ logits[-1],
+ gt_bbox,
+ gt_class,
+ masks=masks[-1] if masks is not None else None,
+ gt_mask=gt_mask,
+ postfix=postfix,
+ dn_match_indices=dn_match_indices,
+ num_gts=num_gts)
if self.aux_loss:
total_loss.update(
self._get_loss_aux(
- boxes[:-1] if boxes is not None else None, logits[:-1]
- if logits is not None else None, gt_bbox, gt_class,
- self.num_classes, num_gts, dn_match_indices, postfix))
+ boxes[:-1],
+ logits[:-1],
+ gt_bbox,
+ gt_class,
+ self.num_classes,
+ num_gts,
+ dn_match_indices,
+ postfix,
+ masks=masks[:-1] if masks is not None else None,
+ gt_mask=gt_mask))
return total_loss
@@ -291,8 +332,9 @@ def forward(self,
dn_out_logits=None,
dn_meta=None,
**kwargs):
- total_loss = super(DINOLoss, self).forward(boxes, logits, gt_bbox,
- gt_class)
+ num_gts = self._get_num_gts(gt_class)
+ total_loss = super(DINOLoss, self).forward(
+ boxes, logits, gt_bbox, gt_class, num_gts=num_gts)
if dn_meta is not None:
dn_positive_idx, dn_num_group = \
@@ -300,31 +342,186 @@ def forward(self,
assert len(gt_class) == len(dn_positive_idx)
# denoising match indices
- dn_match_indices = []
- for i in range(len(gt_class)):
- num_gt = len(gt_class[i])
- if num_gt > 0:
- gt_idx = paddle.arange(end=num_gt, dtype="int64")
- gt_idx = gt_idx.unsqueeze(0).tile(
- [dn_num_group, 1]).flatten()
- assert len(gt_idx) == len(dn_positive_idx[i])
- dn_match_indices.append((dn_positive_idx[i], gt_idx))
- else:
- dn_match_indices.append((paddle.zeros(
- [0], dtype="int64"), paddle.zeros(
- [0], dtype="int64")))
+ dn_match_indices = self.get_dn_match_indices(
+ gt_class, dn_positive_idx, dn_num_group)
+
+ # compute denoising training loss
+ num_gts *= dn_num_group
+ dn_loss = super(DINOLoss, self).forward(
+ dn_out_bboxes,
+ dn_out_logits,
+ gt_bbox,
+ gt_class,
+ postfix="_dn",
+ dn_match_indices=dn_match_indices,
+ num_gts=num_gts)
+ total_loss.update(dn_loss)
else:
- dn_match_indices, dn_num_group = None, 1.
+ total_loss.update(
+ {k + '_dn': paddle.to_tensor([0.])
+ for k in total_loss.keys()})
+
+ return total_loss
+
+ @staticmethod
+ def get_dn_match_indices(labels, dn_positive_idx, dn_num_group):
+ dn_match_indices = []
+ for i in range(len(labels)):
+ num_gt = len(labels[i])
+ if num_gt > 0:
+ gt_idx = paddle.arange(end=num_gt, dtype="int64")
+ gt_idx = gt_idx.tile([dn_num_group])
+ assert len(dn_positive_idx[i]) == len(gt_idx)
+ dn_match_indices.append((dn_positive_idx[i], gt_idx))
+ else:
+ dn_match_indices.append((paddle.zeros(
+ [0], dtype="int64"), paddle.zeros(
+ [0], dtype="int64")))
+ return dn_match_indices
+
+
+@register
+class MaskDINOLoss(DETRLoss):
+ __shared__ = ['num_classes', 'use_focal_loss', 'num_sample_points']
+ __inject__ = ['matcher']
+
+ def __init__(self,
+ num_classes=80,
+ matcher='HungarianMatcher',
+ loss_coeff={
+ 'class': 4,
+ 'bbox': 5,
+ 'giou': 2,
+ 'mask': 5,
+ 'dice': 5
+ },
+ aux_loss=True,
+ use_focal_loss=False,
+ num_sample_points=12544,
+ oversample_ratio=3.0,
+ important_sample_ratio=0.75):
+ super(MaskDINOLoss, self).__init__(num_classes, matcher, loss_coeff,
+ aux_loss, use_focal_loss)
+ assert oversample_ratio >= 1
+ assert important_sample_ratio <= 1 and important_sample_ratio >= 0
- # compute denoising training loss
- dn_loss = super(DINOLoss, self).forward(
- dn_out_bboxes,
- dn_out_logits,
+ self.num_sample_points = num_sample_points
+ self.oversample_ratio = oversample_ratio
+ self.important_sample_ratio = important_sample_ratio
+ self.num_oversample_points = int(num_sample_points * oversample_ratio)
+ self.num_important_points = int(num_sample_points *
+ important_sample_ratio)
+ self.num_random_points = num_sample_points - self.num_important_points
+
+ def forward(self,
+ boxes,
+ logits,
+ gt_bbox,
+ gt_class,
+ masks=None,
+ gt_mask=None,
+ postfix="",
+ dn_out_bboxes=None,
+ dn_out_logits=None,
+ dn_out_masks=None,
+ dn_meta=None,
+ **kwargs):
+ num_gts = self._get_num_gts(gt_class)
+ total_loss = super(MaskDINOLoss, self).forward(
+ boxes,
+ logits,
gt_bbox,
gt_class,
- postfix="_dn",
- dn_match_indices=dn_match_indices,
- dn_num_group=dn_num_group)
- total_loss.update(dn_loss)
+ masks=masks,
+ gt_mask=gt_mask,
+ num_gts=num_gts)
+
+ if dn_meta is not None:
+ dn_positive_idx, dn_num_group = \
+ dn_meta["dn_positive_idx"], dn_meta["dn_num_group"]
+ assert len(gt_class) == len(dn_positive_idx)
+
+ # denoising match indices
+ dn_match_indices = DINOLoss.get_dn_match_indices(
+ gt_class, dn_positive_idx, dn_num_group)
+
+ # compute denoising training loss
+ num_gts *= dn_num_group
+ dn_loss = super(MaskDINOLoss, self).forward(
+ dn_out_bboxes,
+ dn_out_logits,
+ gt_bbox,
+ gt_class,
+ masks=dn_out_masks,
+ gt_mask=gt_mask,
+ postfix="_dn",
+ dn_match_indices=dn_match_indices,
+ num_gts=num_gts)
+ total_loss.update(dn_loss)
+ else:
+ total_loss.update(
+ {k + '_dn': paddle.to_tensor([0.])
+ for k in total_loss.keys()})
return total_loss
+
+ def _get_loss_mask(self, masks, gt_mask, match_indices, num_gts,
+ postfix=""):
+ # masks: [b, query, h, w], gt_mask: list[[n, H, W]]
+ name_mask = "loss_mask" + postfix
+ name_dice = "loss_dice" + postfix
+
+ loss = dict()
+ if sum(len(a) for a in gt_mask) == 0:
+ loss[name_mask] = paddle.to_tensor([0.])
+ loss[name_dice] = paddle.to_tensor([0.])
+ return loss
+
+ src_masks, target_masks = self._get_src_target_assign(masks, gt_mask,
+ match_indices)
+ # sample points
+ sample_points = self._get_point_coords_by_uncertainty(src_masks)
+ sample_points = 2.0 * sample_points.unsqueeze(1) - 1.0
+
+ src_masks = F.grid_sample(
+ src_masks.unsqueeze(1), sample_points,
+ align_corners=False).squeeze([1, 2])
+
+ target_masks = F.grid_sample(
+ target_masks.unsqueeze(1), sample_points,
+ align_corners=False).squeeze([1, 2]).detach()
+
+ loss[name_mask] = self.loss_coeff[
+ 'mask'] * F.binary_cross_entropy_with_logits(
+ src_masks, target_masks,
+ reduction='none').mean(1).sum() / num_gts
+ loss[name_dice] = self.loss_coeff['dice'] * self._dice_loss(
+ src_masks, target_masks, num_gts)
+ return loss
+
+ def _get_point_coords_by_uncertainty(self, masks):
+ # Sample points based on their uncertainty.
+ masks = masks.detach()
+ num_masks = masks.shape[0]
+ sample_points = paddle.rand(
+ [num_masks, 1, self.num_oversample_points, 2])
+
+ out_mask = F.grid_sample(
+ masks.unsqueeze(1), 2.0 * sample_points - 1.0,
+ align_corners=False).squeeze([1, 2])
+ out_mask = -paddle.abs(out_mask)
+
+ _, topk_ind = paddle.topk(out_mask, self.num_important_points, axis=1)
+ batch_ind = paddle.arange(end=num_masks, dtype=topk_ind.dtype)
+ batch_ind = batch_ind.unsqueeze(-1).tile([1, self.num_important_points])
+ topk_ind = paddle.stack([batch_ind, topk_ind], axis=-1)
+
+ sample_points = paddle.gather_nd(sample_points.squeeze(1), topk_ind)
+ if self.num_random_points > 0:
+ sample_points = paddle.concat(
+ [
+ sample_points,
+ paddle.rand([num_masks, self.num_random_points, 2])
+ ],
+ axis=1)
+ return sample_points
diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py
index b48cc98a71f..af222444ee6 100644
--- a/ppdet/modeling/post_process.py
+++ b/ppdet/modeling/post_process.py
@@ -26,7 +26,7 @@
__all__ = [
'BBoxPostProcess', 'MaskPostProcess', 'JDEBBoxPostProcess',
- 'CenterNetPostProcess', 'DETRBBoxPostProcess', 'SparsePostProcess'
+ 'CenterNetPostProcess', 'DETRPostProcess', 'SparsePostProcess'
]
@@ -443,8 +443,8 @@ def __call__(self, hm, wh, reg, im_shape, scale_factor):
@register
-class DETRBBoxPostProcess(object):
- __shared__ = ['num_classes', 'use_focal_loss']
+class DETRPostProcess(object):
+ __shared__ = ['num_classes', 'use_focal_loss', 'with_mask']
__inject__ = []
def __init__(self,
@@ -452,22 +452,39 @@ def __init__(self,
num_top_queries=100,
dual_queries=False,
dual_groups=0,
- use_focal_loss=False):
- super(DETRBBoxPostProcess, self).__init__()
+ use_focal_loss=False,
+ with_mask=False,
+ mask_threshold=0.5,
+ use_avg_mask_score=False):
+ super(DETRPostProcess, self).__init__()
self.num_classes = num_classes
self.num_top_queries = num_top_queries
self.dual_queries = dual_queries
self.dual_groups = dual_groups
self.use_focal_loss = use_focal_loss
+ self.with_mask = with_mask
+ self.mask_threshold = mask_threshold
+ self.use_avg_mask_score = use_avg_mask_score
- def __call__(self, head_out, im_shape, scale_factor):
+ def _mask_postprocess(self, mask_pred, score_pred, index):
+ mask_score = F.sigmoid(paddle.gather_nd(mask_pred, index))
+ mask_pred = (mask_score > self.mask_threshold).astype(mask_score.dtype)
+ if self.use_avg_mask_score:
+ avg_mask_score = (mask_pred * mask_score).sum([-2, -1]) / (
+ mask_pred.sum([-2, -1]) + 1e-6)
+ score_pred *= avg_mask_score
+
+ return mask_pred[0].astype('int32'), score_pred
+
+ def __call__(self, head_out, im_shape, scale_factor, pad_shape):
"""
Decode the bbox.
Args:
head_out (tuple): bbox_pred, cls_logit and masks of bbox_head output.
- im_shape (Tensor): The shape of the input image.
+ im_shape (Tensor): The shape of the input image without padding.
scale_factor (Tensor): The scale factor of the input image.
+ pad_shape (Tensor): The shape of the input image with padding.
Returns:
bbox_pred (Tensor): The output prediction with shape [N, 6], including
labels, scores and bboxes. The size of bboxes are corresponding
@@ -482,11 +499,13 @@ def __call__(self, head_out, im_shape, scale_factor):
bboxes[:, :int(num_queries // (self.dual_groups + 1)), :]
bbox_pred = bbox_cxcywh_to_xyxy(bboxes)
+ # calculate the original shape of the image
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
img_h, img_w = paddle.split(origin_shape, 2, axis=-1)
- origin_shape = paddle.concat(
- [img_w, img_h, img_w, img_h], axis=-1).reshape([-1, 1, 4])
- bbox_pred *= origin_shape
+ # calculate the shape of the image with padding
+ out_shape = pad_shape / im_shape * origin_shape
+ out_shape = out_shape.flip(1).tile([1, 2]).unsqueeze(1)
+ bbox_pred *= out_shape
scores = F.sigmoid(logits) if self.use_focal_loss else F.softmax(
logits)[:, :, :-1]
@@ -512,6 +531,25 @@ def __call__(self, head_out, im_shape, scale_factor):
index = paddle.stack([batch_ind, index], axis=-1)
bbox_pred = paddle.gather_nd(bbox_pred, index)
+ mask_pred = None
+ if self.with_mask:
+ assert masks is not None
+ masks = F.interpolate(
+ masks, scale_factor=4, mode="bilinear", align_corners=False)
+ # TODO: Support prediction with bs>1.
+ # remove padding for input image
+ h, w = im_shape.astype('int32')[0]
+ masks = masks[..., :h, :w]
+ # get pred_mask in the original resolution.
+ img_h = img_h[0].astype('int32')
+ img_w = img_w[0].astype('int32')
+ masks = F.interpolate(
+ masks,
+ size=(img_h, img_w),
+ mode="bilinear",
+ align_corners=False)
+ mask_pred, scores = self._mask_postprocess(masks, scores, index)
+
bbox_pred = paddle.concat(
[
labels.unsqueeze(-1).astype('float32'), scores.unsqueeze(-1),
@@ -519,9 +557,9 @@ def __call__(self, head_out, im_shape, scale_factor):
],
axis=-1)
bbox_num = paddle.to_tensor(
- bbox_pred.shape[1], dtype='int32').tile([bbox_pred.shape[0]])
+ self.num_top_queries, dtype='int32').tile([bbox_pred.shape[0]])
bbox_pred = bbox_pred.reshape([-1, 6])
- return bbox_pred, bbox_num
+ return bbox_pred, bbox_num, mask_pred
@register
diff --git a/ppdet/modeling/transformers/__init__.py b/ppdet/modeling/transformers/__init__.py
index 0457e041423..e20bd6203ce 100644
--- a/ppdet/modeling/transformers/__init__.py
+++ b/ppdet/modeling/transformers/__init__.py
@@ -19,6 +19,7 @@
from . import deformable_transformer
from . import dino_transformer
from . import group_detr_transformer
+from . import mask_dino_transformer
from .detr_transformer import *
from .utils import *
@@ -28,3 +29,4 @@
from .dino_transformer import *
from .petr_transformer import *
from .group_detr_transformer import *
+from .mask_dino_transformer import *
diff --git a/ppdet/modeling/transformers/deformable_transformer.py b/ppdet/modeling/transformers/deformable_transformer.py
index 79aefad2972..b46fb298709 100644
--- a/ppdet/modeling/transformers/deformable_transformer.py
+++ b/ppdet/modeling/transformers/deformable_transformer.py
@@ -167,23 +167,24 @@ def __init__(self,
activation="relu",
n_levels=4,
n_points=4,
+ lr_mult=0.1,
weight_attr=None,
bias_attr=None):
super(DeformableTransformerEncoderLayer, self).__init__()
# self attention
self.self_attn = MSDeformableAttention(d_model, n_head, n_levels,
- n_points)
+ n_points, lr_mult)
self.dropout1 = nn.Dropout(dropout)
- self.norm1 = nn.LayerNorm(d_model)
+ self.norm1 = nn.LayerNorm(
+ d_model, weight_attr=weight_attr, bias_attr=bias_attr)
# ffn
- self.linear1 = nn.Linear(d_model, dim_feedforward, weight_attr,
- bias_attr)
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
self.activation = getattr(F, activation)
self.dropout2 = nn.Dropout(dropout)
- self.linear2 = nn.Linear(dim_feedforward, d_model, weight_attr,
- bias_attr)
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
self.dropout3 = nn.Dropout(dropout)
- self.norm2 = nn.LayerNorm(d_model)
+ self.norm2 = nn.LayerNorm(
+ d_model, weight_attr=weight_attr, bias_attr=bias_attr)
self._reset_parameters()
def _reset_parameters(self):
@@ -207,10 +208,10 @@ def forward(self,
spatial_shapes,
level_start_index,
src_mask=None,
- pos_embed=None):
+ query_pos_embed=None):
# self attention
src2 = self.self_attn(
- self.with_pos_embed(src, pos_embed), reference_points, src,
+ self.with_pos_embed(src, query_pos_embed), reference_points, src,
spatial_shapes, level_start_index, src_mask)
src = src + self.dropout1(src2)
src = self.norm1(src)
@@ -243,23 +244,22 @@ def get_reference_points(spatial_shapes, valid_ratios, offset=0.5):
return reference_points
def forward(self,
- src,
+ feat,
spatial_shapes,
level_start_index,
- src_mask=None,
- pos_embed=None,
+ feat_mask=None,
+ query_pos_embed=None,
valid_ratios=None):
- output = src
if valid_ratios is None:
valid_ratios = paddle.ones(
- [src.shape[0], spatial_shapes.shape[0], 2])
+ [feat.shape[0], spatial_shapes.shape[0], 2])
reference_points = self.get_reference_points(spatial_shapes,
valid_ratios)
for layer in self.layers:
- output = layer(output, reference_points, spatial_shapes,
- level_start_index, src_mask, pos_embed)
+ feat = layer(feat, reference_points, spatial_shapes,
+ level_start_index, feat_mask, query_pos_embed)
- return output
+ return feat
class DeformableTransformerDecoderLayer(nn.Layer):
@@ -271,6 +271,7 @@ def __init__(self,
activation="relu",
n_levels=4,
n_points=4,
+ lr_mult=0.1,
weight_attr=None,
bias_attr=None):
super(DeformableTransformerDecoderLayer, self).__init__()
@@ -278,23 +279,24 @@ def __init__(self,
# self attention
self.self_attn = MultiHeadAttention(d_model, n_head, dropout=dropout)
self.dropout1 = nn.Dropout(dropout)
- self.norm1 = nn.LayerNorm(d_model)
+ self.norm1 = nn.LayerNorm(
+ d_model, weight_attr=weight_attr, bias_attr=bias_attr)
# cross attention
self.cross_attn = MSDeformableAttention(d_model, n_head, n_levels,
- n_points)
+ n_points, lr_mult)
self.dropout2 = nn.Dropout(dropout)
- self.norm2 = nn.LayerNorm(d_model)
+ self.norm2 = nn.LayerNorm(
+ d_model, weight_attr=weight_attr, bias_attr=bias_attr)
# ffn
- self.linear1 = nn.Linear(d_model, dim_feedforward, weight_attr,
- bias_attr)
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
self.activation = getattr(F, activation)
self.dropout3 = nn.Dropout(dropout)
- self.linear2 = nn.Linear(dim_feedforward, d_model, weight_attr,
- bias_attr)
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
self.dropout4 = nn.Dropout(dropout)
- self.norm3 = nn.LayerNorm(d_model)
+ self.norm3 = nn.LayerNorm(
+ d_model, weight_attr=weight_attr, bias_attr=bias_attr)
self._reset_parameters()
def _reset_parameters(self):
@@ -378,7 +380,7 @@ def __init__(self,
num_queries=300,
position_embed_type='sine',
return_intermediate_dec=True,
- backbone_num_channels=[512, 1024, 2048],
+ in_feats_channel=[512, 1024, 2048],
num_feature_levels=4,
num_encoder_points=4,
num_decoder_points=4,
@@ -390,12 +392,12 @@ def __init__(self,
dropout=0.1,
activation="relu",
lr_mult=0.1,
- weight_attr=None,
- bias_attr=None):
+ pe_temperature=10000,
+ pe_offset=-0.5):
super(DeformableTransformer, self).__init__()
assert position_embed_type in ['sine', 'learned'], \
f'ValueError: position_embed_type not supported {position_embed_type}!'
- assert len(backbone_num_channels) <= num_feature_levels
+ assert len(in_feats_channel) <= num_feature_levels
self.hidden_dim = hidden_dim
self.nhead = nhead
@@ -403,13 +405,13 @@ def __init__(self,
encoder_layer = DeformableTransformerEncoderLayer(
hidden_dim, nhead, dim_feedforward, dropout, activation,
- num_feature_levels, num_encoder_points, weight_attr, bias_attr)
+ num_feature_levels, num_encoder_points, lr_mult)
self.encoder = DeformableTransformerEncoder(encoder_layer,
num_encoder_layers)
decoder_layer = DeformableTransformerDecoderLayer(
hidden_dim, nhead, dim_feedforward, dropout, activation,
- num_feature_levels, num_decoder_points, weight_attr, bias_attr)
+ num_feature_levels, num_decoder_points)
self.decoder = DeformableTransformerDecoder(
decoder_layer, num_decoder_layers, return_intermediate_dec)
@@ -424,18 +426,14 @@ def __init__(self,
bias_attr=ParamAttr(learning_rate=lr_mult))
self.input_proj = nn.LayerList()
- for in_channels in backbone_num_channels:
+ for in_channels in in_feats_channel:
self.input_proj.append(
nn.Sequential(
nn.Conv2D(
- in_channels,
- hidden_dim,
- kernel_size=1,
- weight_attr=weight_attr,
- bias_attr=bias_attr),
+ in_channels, hidden_dim, kernel_size=1),
nn.GroupNorm(32, hidden_dim)))
- in_channels = backbone_num_channels[-1]
- for _ in range(num_feature_levels - len(backbone_num_channels)):
+ in_channels = in_feats_channel[-1]
+ for _ in range(num_feature_levels - len(in_feats_channel)):
self.input_proj.append(
nn.Sequential(
nn.Conv2D(
@@ -443,17 +441,16 @@ def __init__(self,
hidden_dim,
kernel_size=3,
stride=2,
- padding=1,
- weight_attr=weight_attr,
- bias_attr=bias_attr),
+ padding=1),
nn.GroupNorm(32, hidden_dim)))
in_channels = hidden_dim
self.position_embedding = PositionEmbedding(
hidden_dim // 2,
+ temperature=pe_temperature,
normalize=True if position_embed_type == 'sine' else False,
embed_type=position_embed_type,
- offset=-0.5)
+ offset=pe_offset)
self._reset_parameters()
@@ -469,7 +466,7 @@ def _reset_parameters(self):
@classmethod
def from_config(cls, cfg, input_shape):
- return {'backbone_num_channels': [i.channels for i in input_shape], }
+ return {'in_feats_channel': [i.channels for i in input_shape], }
def forward(self, src_feats, src_mask=None, *args, **kwargs):
srcs = []
diff --git a/ppdet/modeling/transformers/detr_transformer.py b/ppdet/modeling/transformers/detr_transformer.py
index ccbdb0a3d2a..efeb3208627 100644
--- a/ppdet/modeling/transformers/detr_transformer.py
+++ b/ppdet/modeling/transformers/detr_transformer.py
@@ -243,6 +243,8 @@ def __init__(self,
dim_feedforward=2048,
dropout=0.1,
activation="relu",
+ pe_temperature=10000,
+ pe_offset=0.,
attn_dropout=None,
act_dropout=None,
normalize_before=False):
@@ -274,8 +276,10 @@ def __init__(self,
self.query_pos_embed = nn.Embedding(num_queries, hidden_dim)
self.position_embedding = PositionEmbedding(
hidden_dim // 2,
+ temperature=pe_temperature,
normalize=True if position_embed_type == 'sine' else False,
- embed_type=position_embed_type)
+ embed_type=position_embed_type,
+ offset=pe_offset)
self._reset_parameters()
diff --git a/ppdet/modeling/transformers/dino_transformer.py b/ppdet/modeling/transformers/dino_transformer.py
index 8050ef94430..89073e821d3 100644
--- a/ppdet/modeling/transformers/dino_transformer.py
+++ b/ppdet/modeling/transformers/dino_transformer.py
@@ -31,125 +31,18 @@
from ..layers import MultiHeadAttention
from .position_encoding import PositionEmbedding
from ..heads.detr_head import MLP
-from .deformable_transformer import MSDeformableAttention
+from .deformable_transformer import (MSDeformableAttention,
+ DeformableTransformerEncoderLayer,
+ DeformableTransformerEncoder)
from ..initializer import (linear_init_, constant_, xavier_uniform_, normal_,
bias_init_with_prob)
from .utils import (_get_clones, get_valid_ratio,
get_contrastive_denoising_training_group,
- get_sine_pos_embed)
+ get_sine_pos_embed, inverse_sigmoid)
__all__ = ['DINOTransformer']
-class DINOTransformerEncoderLayer(nn.Layer):
- def __init__(self,
- d_model=256,
- n_head=8,
- dim_feedforward=1024,
- dropout=0.,
- activation="relu",
- n_levels=4,
- n_points=4,
- weight_attr=None,
- bias_attr=None):
- super(DINOTransformerEncoderLayer, self).__init__()
- # self attention
- self.self_attn = MSDeformableAttention(d_model, n_head, n_levels,
- n_points, 1.0)
- self.dropout1 = nn.Dropout(dropout)
- self.norm1 = nn.LayerNorm(
- d_model,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
- # ffn
- self.linear1 = nn.Linear(d_model, dim_feedforward, weight_attr,
- bias_attr)
- self.activation = getattr(F, activation)
- self.dropout2 = nn.Dropout(dropout)
- self.linear2 = nn.Linear(dim_feedforward, d_model, weight_attr,
- bias_attr)
- self.dropout3 = nn.Dropout(dropout)
- self.norm2 = nn.LayerNorm(
- d_model,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
- self._reset_parameters()
-
- def _reset_parameters(self):
- linear_init_(self.linear1)
- linear_init_(self.linear2)
- xavier_uniform_(self.linear1.weight)
- xavier_uniform_(self.linear2.weight)
-
- def with_pos_embed(self, tensor, pos):
- return tensor if pos is None else tensor + pos
-
- def forward_ffn(self, src):
- src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
- src = src + self.dropout3(src2)
- src = self.norm2(src)
- return src
-
- def forward(self,
- src,
- reference_points,
- spatial_shapes,
- level_start_index,
- src_mask=None,
- query_pos_embed=None):
- # self attention
- src2 = self.self_attn(
- self.with_pos_embed(src, query_pos_embed), reference_points, src,
- spatial_shapes, level_start_index, src_mask)
- src = src + self.dropout1(src2)
- src = self.norm1(src)
- # ffn
- src = self.forward_ffn(src)
-
- return src
-
-
-class DINOTransformerEncoder(nn.Layer):
- def __init__(self, encoder_layer, num_layers):
- super(DINOTransformerEncoder, self).__init__()
- self.layers = _get_clones(encoder_layer, num_layers)
- self.num_layers = num_layers
-
- @staticmethod
- def get_reference_points(spatial_shapes, valid_ratios, offset=0.5):
- valid_ratios = valid_ratios.unsqueeze(1)
- reference_points = []
- for i, (H, W) in enumerate(spatial_shapes):
- ref_y, ref_x = paddle.meshgrid(
- paddle.arange(end=H) + offset, paddle.arange(end=W) + offset)
- ref_y = ref_y.flatten().unsqueeze(0) / (valid_ratios[:, :, i, 1] *
- H)
- ref_x = ref_x.flatten().unsqueeze(0) / (valid_ratios[:, :, i, 0] *
- W)
- reference_points.append(paddle.stack((ref_x, ref_y), axis=-1))
- reference_points = paddle.concat(reference_points, 1).unsqueeze(2)
- reference_points = reference_points * valid_ratios
- return reference_points
-
- def forward(self,
- feat,
- spatial_shapes,
- level_start_index,
- feat_mask=None,
- query_pos_embed=None,
- valid_ratios=None):
- if valid_ratios is None:
- valid_ratios = paddle.ones(
- [feat.shape[0], spatial_shapes.shape[0], 2])
- reference_points = self.get_reference_points(spatial_shapes,
- valid_ratios)
- for layer in self.layers:
- feat = layer(feat, reference_points, spatial_shapes,
- level_start_index, feat_mask, query_pos_embed)
-
- return feat
-
-
class DINOTransformerDecoderLayer(nn.Layer):
def __init__(self,
d_model=256,
@@ -159,6 +52,7 @@ def __init__(self,
activation="relu",
n_levels=4,
n_points=4,
+ lr_mult=1.0,
weight_attr=None,
bias_attr=None):
super(DINOTransformerDecoderLayer, self).__init__()
@@ -167,31 +61,23 @@ def __init__(self,
self.self_attn = MultiHeadAttention(d_model, n_head, dropout=dropout)
self.dropout1 = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(
- d_model,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ d_model, weight_attr=weight_attr, bias_attr=bias_attr)
# cross attention
self.cross_attn = MSDeformableAttention(d_model, n_head, n_levels,
- n_points, 1.0)
+ n_points, lr_mult)
self.dropout2 = nn.Dropout(dropout)
self.norm2 = nn.LayerNorm(
- d_model,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ d_model, weight_attr=weight_attr, bias_attr=bias_attr)
# ffn
- self.linear1 = nn.Linear(d_model, dim_feedforward, weight_attr,
- bias_attr)
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
self.activation = getattr(F, activation)
self.dropout3 = nn.Dropout(dropout)
- self.linear2 = nn.Linear(dim_feedforward, d_model, weight_attr,
- bias_attr)
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
self.dropout4 = nn.Dropout(dropout)
self.norm3 = nn.LayerNorm(
- d_model,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ d_model, weight_attr=weight_attr, bias_attr=bias_attr)
self._reset_parameters()
def _reset_parameters(self):
@@ -218,7 +104,10 @@ def forward(self,
# self attention
q = k = self.with_pos_embed(tgt, query_pos_embed)
if attn_mask is not None:
- attn_mask = attn_mask.astype('bool')
+ attn_mask = paddle.where(
+ attn_mask.astype('bool'),
+ paddle.zeros(attn_mask.shape, tgt.dtype),
+ paddle.full(attn_mask.shape, float("-inf"), tgt.dtype))
tgt2 = self.self_attn(q, k, value=tgt, attn_mask=attn_mask)
tgt = tgt + self.dropout1(tgt2)
tgt = self.norm1(tgt)
@@ -243,16 +132,14 @@ def __init__(self,
hidden_dim,
decoder_layer,
num_layers,
- return_intermediate=True):
+ weight_attr=None,
+ bias_attr=None):
super(DINOTransformerDecoder, self).__init__()
self.layers = _get_clones(decoder_layer, num_layers)
self.hidden_dim = hidden_dim
self.num_layers = num_layers
- self.return_intermediate = return_intermediate
self.norm = nn.LayerNorm(
- hidden_dim,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ hidden_dim, weight_attr=weight_attr, bias_attr=bias_attr)
def forward(self,
tgt,
@@ -271,9 +158,10 @@ def forward(self,
output = tgt
intermediate = []
- inter_ref_bboxes_unact = []
+ inter_bboxes = []
+ ref_points = F.sigmoid(ref_points_unact)
for i, layer in enumerate(self.layers):
- reference_points_input = F.sigmoid(ref_points_unact).unsqueeze(
+ reference_points_input = ref_points.detach().unsqueeze(
2) * valid_ratios.tile([1, 1, 2]).unsqueeze(1)
query_pos_embed = get_sine_pos_embed(
reference_points_input[..., 0, :], self.hidden_dim // 2)
@@ -283,19 +171,13 @@ def forward(self,
memory_spatial_shapes, memory_level_start_index,
attn_mask, memory_mask, query_pos_embed)
- inter_ref_bbox_unact = bbox_head[i](output) + ref_points_unact
-
- if self.return_intermediate:
- intermediate.append(self.norm(output))
- inter_ref_bboxes_unact.append(inter_ref_bbox_unact)
+ ref_points = F.sigmoid(bbox_head[i](output) + inverse_sigmoid(
+ ref_points.detach()))
- ref_points_unact = inter_ref_bbox_unact.detach()
+ intermediate.append(self.norm(output))
+ inter_bboxes.append(ref_points)
- if self.return_intermediate:
- return paddle.stack(intermediate), paddle.stack(
- inter_ref_bboxes_unact)
-
- return output, ref_points_unact
+ return paddle.stack(intermediate), paddle.stack(inter_bboxes)
@register
@@ -307,8 +189,7 @@ def __init__(self,
hidden_dim=256,
num_queries=900,
position_embed_type='sine',
- return_intermediate_dec=True,
- backbone_feat_channels=[512, 1024, 2048],
+ in_feats_channel=[512, 1024, 2048],
num_levels=4,
num_encoder_points=4,
num_decoder_points=4,
@@ -318,6 +199,7 @@ def __init__(self,
dim_feedforward=1024,
dropout=0.,
activation="relu",
+ lr_mult=1.0,
pe_temperature=10000,
pe_offset=-0.5,
num_denoising=100,
@@ -328,7 +210,7 @@ def __init__(self,
super(DINOTransformer, self).__init__()
assert position_embed_type in ['sine', 'learned'], \
f'ValueError: position_embed_type not supported {position_embed_type}!'
- assert len(backbone_feat_channels) <= num_levels
+ assert len(in_feats_channel) <= num_levels
self.hidden_dim = hidden_dim
self.nhead = nhead
@@ -338,20 +220,23 @@ def __init__(self,
self.eps = eps
self.num_decoder_layers = num_decoder_layers
+ weight_attr = ParamAttr(regularizer=L2Decay(0.0))
+ bias_attr = ParamAttr(regularizer=L2Decay(0.0))
# backbone feature projection
- self._build_input_proj_layer(backbone_feat_channels)
+ self._build_input_proj_layer(in_feats_channel, weight_attr, bias_attr)
# Transformer module
- encoder_layer = DINOTransformerEncoderLayer(
+ encoder_layer = DeformableTransformerEncoderLayer(
hidden_dim, nhead, dim_feedforward, dropout, activation, num_levels,
- num_encoder_points)
- self.encoder = DINOTransformerEncoder(encoder_layer, num_encoder_layers)
+ num_encoder_points, lr_mult, weight_attr, bias_attr)
+ self.encoder = DeformableTransformerEncoder(encoder_layer,
+ num_encoder_layers)
decoder_layer = DINOTransformerDecoderLayer(
hidden_dim, nhead, dim_feedforward, dropout, activation, num_levels,
- num_decoder_points)
+ num_decoder_points, lr_mult, weight_attr, bias_attr)
self.decoder = DINOTransformerDecoder(hidden_dim, decoder_layer,
- num_decoder_layers,
- return_intermediate_dec)
+ num_decoder_layers, weight_attr,
+ bias_attr)
# denoising part
self.denoising_class_embed = nn.Embedding(
@@ -383,9 +268,7 @@ def __init__(self,
self.enc_output = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LayerNorm(
- hidden_dim,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0))))
+ hidden_dim, weight_attr=weight_attr, bias_attr=bias_attr))
self.enc_score_head = nn.Linear(hidden_dim, num_classes)
self.enc_bbox_head = MLP(hidden_dim, hidden_dim, 4, num_layers=3)
# decoder head
@@ -426,22 +309,25 @@ def _reset_parameters(self):
@classmethod
def from_config(cls, cfg, input_shape):
- return {'backbone_feat_channels': [i.channels for i in input_shape], }
+ return {'in_feats_channel': [i.channels for i in input_shape], }
- def _build_input_proj_layer(self, backbone_feat_channels):
+ def _build_input_proj_layer(self,
+ in_feats_channel,
+ weight_attr=None,
+ bias_attr=None):
self.input_proj = nn.LayerList()
- for in_channels in backbone_feat_channels:
+ for in_channels in in_feats_channel:
self.input_proj.append(
nn.Sequential(
('conv', nn.Conv2D(
- in_channels, self.hidden_dim, kernel_size=1)),
- ('norm', nn.GroupNorm(
- 32,
- self.hidden_dim,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0))))))
- in_channels = backbone_feat_channels[-1]
- for _ in range(self.num_levels - len(backbone_feat_channels)):
+ in_channels, self.hidden_dim, kernel_size=1)), (
+ 'norm', nn.GroupNorm(
+ 32,
+ self.hidden_dim,
+ weight_attr=weight_attr,
+ bias_attr=bias_attr))))
+ in_channels = in_feats_channel[-1]
+ for _ in range(self.num_levels - len(in_feats_channel)):
self.input_proj.append(
nn.Sequential(
('conv', nn.Conv2D(
@@ -452,8 +338,8 @@ def _build_input_proj_layer(self, backbone_feat_channels):
padding=1)), ('norm', nn.GroupNorm(
32,
self.hidden_dim,
- weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
- bias_attr=ParamAttr(regularizer=L2Decay(0.0))))))
+ weight_attr=weight_attr,
+ bias_attr=bias_attr))))
in_channels = self.hidden_dim
def _get_encoder_input(self, feats, pad_mask=None):
@@ -540,7 +426,7 @@ def forward(self, feats, pad_mask=None, gt_meta=None):
denoising_bbox_unact)
# decoder
- inter_feats, inter_ref_bboxes_unact = self.decoder(
+ inter_feats, inter_bboxes = self.decoder(
target, init_ref_points_unact, memory, spatial_shapes,
level_start_index, self.dec_bbox_head, self.query_pos_head,
valid_ratios, attn_mask, mask_flatten)
@@ -555,8 +441,7 @@ def forward(self, feats, pad_mask=None, gt_meta=None):
else:
out_bboxes.append(
F.sigmoid(self.dec_bbox_head[i](inter_feats[i]) +
- inter_ref_bboxes_unact[i - 1]))
-
+ inverse_sigmoid(inter_bboxes[i - 1])))
out_bboxes = paddle.stack(out_bboxes)
out_logits = paddle.stack(out_logits)
@@ -579,11 +464,8 @@ def _get_encoder_output_anchors(self,
valid_H, valid_W = h, w
grid_y, grid_x = paddle.meshgrid(
- paddle.arange(
- end=h, dtype=memory.dtype),
- paddle.arange(
- end=w, dtype=memory.dtype))
- grid_xy = paddle.stack([grid_x, grid_y], -1)
+ paddle.arange(end=h), paddle.arange(end=w))
+ grid_xy = paddle.stack([grid_x, grid_y], -1).astype(memory.dtype)
valid_WH = paddle.stack([valid_W, valid_H], -1).reshape(
[-1, 1, 1, 2]).astype(grid_xy.dtype)
@@ -623,7 +505,7 @@ def _get_decoder_input(self,
_, topk_ind = paddle.topk(
enc_outputs_class.max(-1), self.num_queries, axis=1)
# extract region proposal boxes
- batch_ind = paddle.arange(end=bs, dtype=topk_ind.dtype)
+ batch_ind = paddle.arange(end=bs).astype(topk_ind.dtype)
batch_ind = batch_ind.unsqueeze(-1).tile([1, self.num_queries])
topk_ind = paddle.stack([batch_ind, topk_ind], axis=-1)
reference_points_unact = paddle.gather_nd(enc_outputs_coord_unact,
diff --git a/ppdet/modeling/transformers/mask_dino_transformer.py b/ppdet/modeling/transformers/mask_dino_transformer.py
new file mode 100644
index 00000000000..6b292238596
--- /dev/null
+++ b/ppdet/modeling/transformers/mask_dino_transformer.py
@@ -0,0 +1,536 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Modified from Deformable-DETR (https://github.com/fundamentalvision/Deformable-DETR)
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Modified from detrex (https://github.com/IDEA-Research/detrex)
+# Copyright 2022 The IDEA Authors. All rights reserved.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+
+from ppdet.core.workspace import register
+from .position_encoding import PositionEmbedding
+from ..heads.detr_head import MLP
+from .deformable_transformer import (DeformableTransformerEncoderLayer,
+ DeformableTransformerEncoder)
+from .dino_transformer import (DINOTransformerDecoderLayer)
+from ..initializer import (linear_init_, constant_, xavier_uniform_,
+ bias_init_with_prob)
+from .utils import (_get_clones, get_valid_ratio, get_denoising_training_group,
+ get_sine_pos_embed, inverse_sigmoid, mask_to_box_coordinate)
+
+__all__ = ['MaskDINO']
+
+
+class ConvGNBlock(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ groups=1,
+ num_groups=32,
+ bias=False,
+ act=None):
+ super(ConvGNBlock, self).__init__()
+ self.conv = nn.Conv2D(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=(kernel_size - 1) // 2,
+ groups=groups,
+ bias_attr=bias)
+ self.norm = nn.GroupNorm(
+ num_groups,
+ out_channels,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ self.act = getattr(F, act) if act is not None else None
+
+ self._init_weights()
+
+ def _init_weights(self):
+ xavier_uniform_(self.conv.weight)
+
+ def forward(self, x):
+ x = self.norm(self.conv(x))
+ if self.act is not None:
+ x = self.act(x)
+ return x
+
+
+class MaskDINOTransformerDecoder(nn.Layer):
+ def __init__(self, hidden_dim, decoder_layer, num_layers):
+ super(MaskDINOTransformerDecoder, self).__init__()
+ self.layers = _get_clones(decoder_layer, num_layers)
+ self.hidden_dim = hidden_dim
+ self.num_layers = num_layers
+
+ def forward(self,
+ tgt,
+ ref_points_unact,
+ memory,
+ memory_spatial_shapes,
+ memory_level_start_index,
+ bbox_head,
+ query_pos_head,
+ dec_norm,
+ valid_ratios=None,
+ attn_mask=None,
+ memory_mask=None):
+ if valid_ratios is None:
+ valid_ratios = paddle.ones(
+ [memory.shape[0], memory_spatial_shapes.shape[0], 2])
+
+ output = tgt
+ intermediate = []
+ inter_bboxes = []
+ ref_points = F.sigmoid(ref_points_unact)
+ for i, layer in enumerate(self.layers):
+ reference_points_input = ref_points.detach().unsqueeze(
+ 2) * valid_ratios.tile([1, 1, 2]).unsqueeze(1)
+ query_pos_embed = get_sine_pos_embed(
+ reference_points_input[..., 0, :], self.hidden_dim // 2)
+ query_pos_embed = query_pos_head(query_pos_embed)
+
+ output = layer(output, reference_points_input, memory,
+ memory_spatial_shapes, memory_level_start_index,
+ attn_mask, memory_mask, query_pos_embed)
+
+ ref_points = F.sigmoid(
+ bbox_head(output) + inverse_sigmoid(ref_points.detach()))
+
+ intermediate.append(dec_norm(output))
+ inter_bboxes.append(ref_points)
+
+ return paddle.stack(intermediate), paddle.stack(inter_bboxes)
+
+
+@register
+class MaskDINO(nn.Layer):
+ __shared__ = ['num_classes', 'hidden_dim']
+
+ def __init__(self,
+ num_classes=80,
+ hidden_dim=256,
+ num_queries=300,
+ position_embed_type='sine',
+ in_feats_channel=[256, 512, 1024, 2048],
+ num_levels=3,
+ num_encoder_points=4,
+ num_decoder_points=4,
+ nhead=8,
+ num_encoder_layers=6,
+ num_decoder_layers=9,
+ enc_dim_feedforward=1024,
+ dec_dim_feedforward=2048,
+ dropout=0.,
+ activation="relu",
+ lr_mult=1.0,
+ pe_temperature=10000,
+ pe_offset=-0.5,
+ num_denoising=100,
+ label_noise_ratio=0.4,
+ box_noise_scale=0.4,
+ learnt_init_query=False,
+ mask_enhanced=True,
+ eps=1e-2):
+ super(MaskDINO, self).__init__()
+ assert position_embed_type in ['sine', 'learned'], \
+ f'ValueError: position_embed_type not supported {position_embed_type}!'
+ feat0_dim = in_feats_channel.pop(0)
+ assert len(in_feats_channel) <= num_levels
+
+ self.hidden_dim = hidden_dim
+ self.nhead = nhead
+ self.num_levels = num_levels
+ self.num_classes = num_classes
+ self.num_queries = num_queries
+ self.eps = eps
+ self.num_decoder_layers = num_decoder_layers
+ self.mask_enhanced = mask_enhanced
+
+ weight_attr = ParamAttr(regularizer=L2Decay(0.0))
+ bias_attr = ParamAttr(regularizer=L2Decay(0.0))
+ # backbone feature projection
+ self._build_input_proj_layer(in_feats_channel, weight_attr, bias_attr)
+
+ # Transformer module
+ encoder_layer = DeformableTransformerEncoderLayer(
+ hidden_dim, nhead, enc_dim_feedforward, dropout, activation,
+ num_levels, num_encoder_points, lr_mult, weight_attr, bias_attr)
+ self.encoder = DeformableTransformerEncoder(encoder_layer,
+ num_encoder_layers)
+ decoder_layer = DINOTransformerDecoderLayer(
+ hidden_dim, nhead, dec_dim_feedforward, dropout, activation,
+ num_levels, num_decoder_points, lr_mult, weight_attr, bias_attr)
+ self.decoder = MaskDINOTransformerDecoder(hidden_dim, decoder_layer,
+ num_decoder_layers)
+
+ # denoising part
+ self.denoising_class_embed = nn.Embedding(
+ num_classes,
+ hidden_dim,
+ weight_attr=ParamAttr(initializer=nn.initializer.Normal()))
+ self.num_denoising = num_denoising
+ self.label_noise_ratio = label_noise_ratio
+ self.box_noise_scale = box_noise_scale
+
+ # position embedding
+ self.position_embedding = PositionEmbedding(
+ hidden_dim // 2,
+ temperature=pe_temperature,
+ normalize=True if position_embed_type == 'sine' else False,
+ embed_type=position_embed_type,
+ offset=pe_offset)
+ self.level_embed = nn.Embedding(
+ num_levels,
+ hidden_dim,
+ weight_attr=ParamAttr(initializer=nn.initializer.Normal()))
+ # decoder embedding
+ self.learnt_init_query = learnt_init_query
+ if learnt_init_query:
+ self.tgt_embed = nn.Embedding(num_queries, hidden_dim)
+ self.query_pos_head = MLP(2 * hidden_dim,
+ hidden_dim,
+ hidden_dim,
+ num_layers=2)
+ # mask embedding
+ self.mask_query_head = MLP(hidden_dim,
+ hidden_dim,
+ hidden_dim,
+ num_layers=3)
+
+ # encoder mask head
+ self.enc_mask_lateral = ConvGNBlock(feat0_dim, hidden_dim, 1)
+ self.enc_mask_output = nn.Sequential(
+ ConvGNBlock(
+ hidden_dim, hidden_dim, 3, act=activation),
+ nn.Conv2D(hidden_dim, hidden_dim, 1))
+ # encoder head
+ self.enc_output = nn.Sequential(
+ nn.Linear(hidden_dim, hidden_dim),
+ nn.LayerNorm(
+ hidden_dim, weight_attr=weight_attr, bias_attr=bias_attr))
+ # decoder norm layer
+ self.dec_norm = nn.LayerNorm(
+ hidden_dim, weight_attr=weight_attr, bias_attr=bias_attr)
+ # shared prediction head
+ self.class_head = nn.Linear(hidden_dim, num_classes)
+ self.bbox_head = MLP(hidden_dim, hidden_dim, 4, num_layers=3)
+
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ # class and bbox head init
+ bias_cls = bias_init_with_prob(0.01)
+ linear_init_(self.class_head)
+ constant_(self.class_head.bias, bias_cls)
+ constant_(self.bbox_head.layers[-1].weight)
+ constant_(self.bbox_head.layers[-1].bias)
+
+ xavier_uniform_(self.enc_mask_output[1].weight)
+ linear_init_(self.enc_output[0])
+ xavier_uniform_(self.enc_output[0].weight)
+ if self.learnt_init_query:
+ xavier_uniform_(self.tgt_embed.weight)
+ xavier_uniform_(self.query_pos_head.layers[0].weight)
+ xavier_uniform_(self.query_pos_head.layers[1].weight)
+ for l in self.input_proj:
+ xavier_uniform_(l[0].weight)
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'in_feats_channel': [i.channels for i in input_shape], }
+
+ def _build_input_proj_layer(self,
+ in_feats_channel,
+ weight_attr=None,
+ bias_attr=None):
+ self.input_proj = nn.LayerList()
+ for in_channels in in_feats_channel:
+ self.input_proj.append(
+ nn.Sequential(
+ ('conv', nn.Conv2D(
+ in_channels, self.hidden_dim, kernel_size=1)), (
+ 'norm', nn.GroupNorm(
+ 32,
+ self.hidden_dim,
+ weight_attr=weight_attr,
+ bias_attr=bias_attr))))
+ in_channels = in_feats_channel[-1]
+ for _ in range(self.num_levels - len(in_feats_channel)):
+ self.input_proj.append(
+ nn.Sequential(
+ ('conv', nn.Conv2D(
+ in_channels,
+ self.hidden_dim,
+ kernel_size=3,
+ stride=2,
+ padding=1)), ('norm', nn.GroupNorm(
+ 32,
+ self.hidden_dim,
+ weight_attr=weight_attr,
+ bias_attr=bias_attr))))
+ in_channels = self.hidden_dim
+
+ def _get_encoder_input(self, feats, pad_mask=None):
+ # get projection features
+ proj_feats = [self.input_proj[i](feat) for i, feat in enumerate(feats)]
+ if self.num_levels > len(proj_feats):
+ len_srcs = len(proj_feats)
+ for i in range(len_srcs, self.num_levels):
+ if i == len_srcs:
+ proj_feats.append(self.input_proj[i](feats[-1]))
+ else:
+ proj_feats.append(self.input_proj[i](proj_feats[-1]))
+
+ # get encoder inputs
+ feat_flatten = []
+ mask_flatten = []
+ lvl_pos_embed_flatten = []
+ spatial_shapes = []
+ valid_ratios = []
+ for i, feat in enumerate(proj_feats):
+ bs, _, h, w = paddle.shape(feat)
+ spatial_shapes.append(paddle.concat([h, w]))
+ # [b,c,h,w] -> [b,h*w,c]
+ feat_flatten.append(feat.flatten(2).transpose([0, 2, 1]))
+ if pad_mask is not None:
+ mask = F.interpolate(pad_mask.unsqueeze(0), size=(h, w))[0]
+ else:
+ mask = paddle.ones([bs, h, w])
+ valid_ratios.append(get_valid_ratio(mask))
+ # [b, h*w, c]
+ pos_embed = self.position_embedding(mask).flatten(1, 2)
+ lvl_pos_embed = pos_embed + self.level_embed.weight[i]
+ lvl_pos_embed_flatten.append(lvl_pos_embed)
+ if pad_mask is not None:
+ # [b, h*w]
+ mask_flatten.append(mask.flatten(1))
+
+ # [b, l, c]
+ feat_flatten = paddle.concat(feat_flatten, 1)
+ # [b, l]
+ mask_flatten = None if pad_mask is None else paddle.concat(mask_flatten,
+ 1)
+ # [b, l, c]
+ lvl_pos_embed_flatten = paddle.concat(lvl_pos_embed_flatten, 1)
+ # [num_levels, 2]
+ spatial_shapes = paddle.to_tensor(
+ paddle.stack(spatial_shapes).astype('int64'))
+ # [l], 每一个level的起始index
+ level_start_index = paddle.concat([
+ paddle.zeros(
+ [1], dtype='int64'), spatial_shapes.prod(1).cumsum(0)[:-1]
+ ])
+ # [b, num_levels, 2]
+ valid_ratios = paddle.stack(valid_ratios, 1)
+ return (feat_flatten, spatial_shapes, level_start_index, mask_flatten,
+ lvl_pos_embed_flatten, valid_ratios)
+
+ def forward(self, feats, pad_mask=None, gt_meta=None):
+ feat0 = feats.pop(0)
+ # input projection and embedding
+ (feat_flatten, spatial_shapes, level_start_index, mask_flatten,
+ lvl_pos_embed_flatten,
+ valid_ratios) = self._get_encoder_input(feats, pad_mask)
+
+ # encoder
+ memory = self.encoder(feat_flatten, spatial_shapes, level_start_index,
+ mask_flatten, lvl_pos_embed_flatten, valid_ratios)
+
+ mask_feat = self._get_encoder_mask_feature(feat0, memory,
+ spatial_shapes)
+
+ # prepare denoising training
+ if self.training:
+ denoising_class, denoising_bbox_unact, attn_mask, dn_meta = \
+ get_denoising_training_group(gt_meta,
+ self.num_classes,
+ self.num_queries,
+ self.denoising_class_embed.weight,
+ self.num_denoising,
+ self.label_noise_ratio,
+ self.box_noise_scale)
+ else:
+ denoising_class, denoising_bbox_unact, attn_mask, dn_meta = None, None, None, None
+
+ target, init_ref_points_unact, enc_out, init_out = \
+ self._get_decoder_input(
+ memory, mask_feat, spatial_shapes, mask_flatten, denoising_class,
+ denoising_bbox_unact)
+
+ # decoder
+ inter_feats, inter_bboxes = self.decoder(
+ target, init_ref_points_unact, memory, spatial_shapes,
+ level_start_index, self.bbox_head, self.query_pos_head,
+ self.dec_norm, valid_ratios, attn_mask, mask_flatten)
+
+ out_logits = []
+ out_bboxes = []
+ out_masks = []
+ for i in range(self.num_decoder_layers):
+ if self.training or i == self.num_decoder_layers - 1:
+ logits_, masks_ = self._get_pred_class_and_mask(inter_feats[i],
+ mask_feat)
+ else:
+ continue
+ out_logits.append(logits_)
+ out_masks.append(masks_)
+ if i == 0:
+ out_bboxes.append(
+ F.sigmoid(
+ self.bbox_head(inter_feats[i]) + init_ref_points_unact))
+ else:
+ out_bboxes.append(
+ F.sigmoid(
+ self.bbox_head(inter_feats[i]) + inverse_sigmoid(
+ inter_bboxes[i - 1])))
+ out_bboxes = paddle.stack(out_bboxes)
+ out_logits = paddle.stack(out_logits)
+ out_masks = paddle.stack(out_masks)
+
+ return (out_logits, out_bboxes, out_masks, enc_out, init_out, dn_meta)
+
+ def _get_encoder_mask_feature(self, in_feat, memory, spatial_shapes):
+ memory_feat0 = memory.split(
+ spatial_shapes.prod(1).split(self.num_levels), axis=1)[0]
+ h, w = spatial_shapes[0]
+ memory_feat0 = memory_feat0.reshape(
+ [0, h, w, self.hidden_dim]).transpose([0, 3, 1, 2])
+ out = self.enc_mask_lateral(in_feat) + F.interpolate(
+ memory_feat0,
+ scale_factor=2.0,
+ mode='bilinear',
+ align_corners=False)
+ return self.enc_mask_output(out)
+
+ def _get_encoder_output_anchors(self,
+ memory,
+ spatial_shapes,
+ memory_mask=None,
+ grid_size=0.05):
+ output_anchors = []
+ idx = 0
+ for lvl, (h, w) in enumerate(spatial_shapes):
+ if memory_mask is not None:
+ mask_ = memory_mask[:, idx:idx + h * w].reshape([-1, h, w])
+ valid_H = paddle.sum(mask_[:, :, 0], 1)
+ valid_W = paddle.sum(mask_[:, 0, :], 1)
+ else:
+ valid_H, valid_W = h, w
+
+ grid_y, grid_x = paddle.meshgrid(
+ paddle.arange(end=h), paddle.arange(end=w))
+ grid_xy = paddle.stack([grid_x, grid_y], -1).astype(memory.dtype)
+
+ valid_WH = paddle.stack([valid_W, valid_H], -1).reshape(
+ [-1, 1, 1, 2]).astype(grid_xy.dtype)
+ grid_xy = (grid_xy.unsqueeze(0) + 0.5) / valid_WH
+ wh = paddle.ones_like(grid_xy) * grid_size * (2.0**lvl)
+ output_anchors.append(
+ paddle.concat([grid_xy, wh], -1).reshape([-1, h * w, 4]))
+ idx += h * w
+
+ output_anchors = paddle.concat(output_anchors, 1)
+ valid_mask = ((output_anchors > self.eps) *
+ (output_anchors < 1 - self.eps)).all(-1, keepdim=True)
+ output_anchors = paddle.log(output_anchors / (1 - output_anchors))
+ if memory_mask is not None:
+ valid_mask = (valid_mask * (memory_mask.unsqueeze(-1) > 0)) > 0
+ output_anchors = paddle.where(valid_mask, output_anchors,
+ paddle.to_tensor(float("inf")))
+
+ memory = paddle.where(valid_mask, memory, paddle.to_tensor(0.))
+ output_memory = self.enc_output(memory)
+ return output_memory, output_anchors
+
+ def _get_decoder_input(self,
+ memory,
+ mask_feat,
+ spatial_shapes,
+ memory_mask=None,
+ denoising_class=None,
+ denoising_bbox_unact=None):
+ # prepare input for decoder
+ bs, _, _ = memory.shape
+ output_memory, output_anchors = self._get_encoder_output_anchors(
+ memory, spatial_shapes, memory_mask)
+ enc_logits_unact = self.class_head(output_memory)
+ enc_bboxes_unact = self.bbox_head(output_memory) + output_anchors
+
+ # get topk index
+ _, topk_ind = paddle.topk(
+ enc_logits_unact.max(-1), self.num_queries, axis=1)
+ batch_ind = paddle.arange(end=bs).astype(topk_ind.dtype)
+ batch_ind = batch_ind.unsqueeze(-1).tile([1, self.num_queries])
+ topk_ind = paddle.stack([batch_ind, topk_ind], axis=-1)
+
+ # extract content and position query embedding
+ target = paddle.gather_nd(output_memory, topk_ind)
+ reference_points_unact = paddle.gather_nd(enc_bboxes_unact,
+ topk_ind) # unsigmoided.
+ # get encoder output: {logits, bboxes, masks}
+ enc_out_logits, enc_out_masks = self._get_pred_class_and_mask(target,
+ mask_feat)
+ enc_out_bboxes = F.sigmoid(reference_points_unact)
+ enc_out = (enc_out_logits, enc_out_bboxes, enc_out_masks)
+
+ # concat denoising query
+ if self.learnt_init_query:
+ target = self.tgt_embed.weight.unsqueeze(0).tile([bs, 1, 1])
+ else:
+ target = target.detach()
+ if denoising_class is not None:
+ target = paddle.concat([denoising_class, target], 1)
+ if self.mask_enhanced:
+ # use mask-enhanced anchor box initialization
+ reference_points = mask_to_box_coordinate(
+ enc_out_masks > 0, normalize=True, format="xywh")
+ reference_points_unact = inverse_sigmoid(reference_points)
+ if denoising_bbox_unact is not None:
+ reference_points_unact = paddle.concat(
+ [denoising_bbox_unact, reference_points_unact], 1)
+
+ # direct prediction from the matching and denoising part in the begining
+ if self.training and denoising_class is not None:
+ init_out_logits, init_out_masks = self._get_pred_class_and_mask(
+ target, mask_feat)
+ init_out_bboxes = F.sigmoid(reference_points_unact)
+ init_out = (init_out_logits, init_out_bboxes, init_out_masks)
+ else:
+ init_out = None
+
+ return target, reference_points_unact.detach(), enc_out, init_out
+
+ def _get_pred_class_and_mask(self, query_embed, mask_feat):
+ out_query = self.dec_norm(query_embed)
+ out_logits = self.class_head(out_query)
+ mask_query_embed = self.mask_query_head(out_query)
+ _, _, h, w = paddle.shape(mask_feat)
+ # [b, q, c] x [b, c, h, w] -> [b, q, h, w]
+ out_mask = paddle.bmm(mask_query_embed, mask_feat.flatten(2)).reshape(
+ [0, 0, h, w])
+ return out_logits, out_mask
diff --git a/ppdet/modeling/transformers/matchers.py b/ppdet/modeling/transformers/matchers.py
index 794d8632803..f163a6eeae9 100644
--- a/ppdet/modeling/transformers/matchers.py
+++ b/ppdet/modeling/transformers/matchers.py
@@ -34,13 +34,19 @@
@register
@serializable
class HungarianMatcher(nn.Layer):
- __shared__ = ['use_focal_loss']
+ __shared__ = ['use_focal_loss', 'with_mask', 'num_sample_points']
def __init__(self,
- matcher_coeff={'class': 1,
- 'bbox': 5,
- 'giou': 2},
+ matcher_coeff={
+ 'class': 1,
+ 'bbox': 5,
+ 'giou': 2,
+ 'mask': 1,
+ 'dice': 1
+ },
use_focal_loss=False,
+ with_mask=False,
+ num_sample_points=12544,
alpha=0.25,
gamma=2.0):
r"""
@@ -50,18 +56,28 @@ def __init__(self,
super(HungarianMatcher, self).__init__()
self.matcher_coeff = matcher_coeff
self.use_focal_loss = use_focal_loss
+ self.with_mask = with_mask
+ self.num_sample_points = num_sample_points
self.alpha = alpha
self.gamma = gamma
self.giou_loss = GIoULoss()
- def forward(self, boxes, logits, gt_bbox, gt_class):
+ def forward(self,
+ boxes,
+ logits,
+ gt_bbox,
+ gt_class,
+ masks=None,
+ gt_mask=None):
r"""
Args:
boxes (Tensor): [b, query, 4]
logits (Tensor): [b, query, num_classes]
gt_bbox (List(Tensor)): list[[n, 4]]
gt_class (List(Tensor)): list[[n, 1]]
+ masks (Tensor|None): [b, query, h, w]
+ gt_mask (List(Tensor)): list[[n, H, W]]
Returns:
A list of size batch_size, containing tuples of (index_i, index_j) where:
@@ -72,18 +88,19 @@ def forward(self, boxes, logits, gt_bbox, gt_class):
"""
bs, num_queries = boxes.shape[:2]
- num_gts = sum(len(a) for a in gt_class)
- if num_gts == 0:
+ num_gts = [len(a) for a in gt_class]
+ if sum(num_gts) == 0:
return [(paddle.to_tensor(
[], dtype=paddle.int64), paddle.to_tensor(
[], dtype=paddle.int64)) for _ in range(bs)]
# We flatten to compute the cost matrices in a batch
# [batch_size * num_queries, num_classes]
+ logits = logits.detach()
out_prob = F.sigmoid(logits.flatten(
0, 1)) if self.use_focal_loss else F.softmax(logits.flatten(0, 1))
# [batch_size * num_queries, 4]
- out_bbox = boxes.flatten(0, 1)
+ out_bbox = boxes.detach().flatten(0, 1)
# Also concat the target labels and boxes
tgt_ids = paddle.concat(gt_class).flatten()
@@ -111,11 +128,53 @@ def forward(self, boxes, logits, gt_bbox, gt_class):
bbox_cxcywh_to_xyxy(tgt_bbox.unsqueeze(0))).squeeze(-1)
# Final cost matrix
- C = self.matcher_coeff['class'] * cost_class + self.matcher_coeff['bbox'] * cost_bbox + \
+ C = self.matcher_coeff['class'] * cost_class + \
+ self.matcher_coeff['bbox'] * cost_bbox + \
self.matcher_coeff['giou'] * cost_giou
+ # Compute the mask cost and dice cost
+ if self.with_mask:
+ assert (masks is not None and gt_mask is not None,
+ 'Make sure the input has `mask` and `gt_mask`')
+ # all masks share the same set of points for efficient matching
+ sample_points = paddle.rand([bs, 1, self.num_sample_points, 2])
+ sample_points = 2.0 * sample_points - 1.0
+
+ out_mask = F.grid_sample(
+ masks.detach(), sample_points, align_corners=False).squeeze(-2)
+ out_mask = out_mask.flatten(0, 1)
+
+ tgt_mask = paddle.concat(gt_mask).unsqueeze(1)
+ sample_points = paddle.concat([
+ a.tile([b, 1, 1, 1]) for a, b in zip(sample_points, num_gts)
+ if b > 0
+ ])
+ tgt_mask = F.grid_sample(
+ tgt_mask, sample_points, align_corners=False).squeeze([1, 2])
+
+ with paddle.amp.auto_cast(enable=False):
+ # binary cross entropy cost
+ pos_cost_mask = F.binary_cross_entropy_with_logits(
+ out_mask, paddle.ones_like(out_mask), reduction='none')
+ neg_cost_mask = F.binary_cross_entropy_with_logits(
+ out_mask, paddle.zeros_like(out_mask), reduction='none')
+ cost_mask = paddle.matmul(
+ pos_cost_mask, tgt_mask, transpose_y=True) + paddle.matmul(
+ neg_cost_mask, 1 - tgt_mask, transpose_y=True)
+ cost_mask /= self.num_sample_points
+
+ # dice cost
+ out_mask = F.sigmoid(out_mask)
+ numerator = 2 * paddle.matmul(
+ out_mask, tgt_mask, transpose_y=True)
+ denominator = out_mask.sum(
+ -1, keepdim=True) + tgt_mask.sum(-1).unsqueeze(0)
+ cost_dice = 1 - (numerator + 1) / (denominator + 1)
+
+ C = C + self.matcher_coeff['mask'] * cost_mask + \
+ self.matcher_coeff['dice'] * cost_dice
+
C = C.reshape([bs, num_queries, -1])
C = [a.squeeze(0) for a in C.chunk(bs)]
-
sizes = [a.shape[0] for a in gt_bbox]
indices = [
linear_sum_assignment(c.split(sizes, -1)[i].numpy())
diff --git a/ppdet/modeling/transformers/utils.py b/ppdet/modeling/transformers/utils.py
index c41b069cffb..a40950d9ff5 100644
--- a/ppdet/modeling/transformers/utils.py
+++ b/ppdet/modeling/transformers/utils.py
@@ -63,9 +63,9 @@ def sigmoid_focal_loss(logit, label, normalizer=1.0, alpha=0.25, gamma=2.0):
return loss.mean(1).sum() / normalizer
-def inverse_sigmoid(x, eps=1e-6):
+def inverse_sigmoid(x, eps=1e-5):
x = x.clip(min=0., max=1.)
- return paddle.log(x / (1 - x + eps) + eps)
+ return paddle.log(x.clip(min=eps) / (1 - x).clip(min=eps))
def deformable_attention_core_func(value, value_spatial_shapes,
@@ -122,6 +122,99 @@ def get_valid_ratio(mask):
return paddle.stack([valid_ratio_w, valid_ratio_h], -1)
+def get_denoising_training_group(targets,
+ num_classes,
+ num_queries,
+ class_embed,
+ num_denoising=100,
+ label_noise_ratio=0.5,
+ box_noise_scale=1.0):
+ if num_denoising <= 0:
+ return None, None, None, None
+ num_gts = [len(t) for t in targets["gt_class"]]
+ max_gt_num = max(num_gts)
+ if max_gt_num == 0:
+ return None, None, None, None
+
+ num_group = num_denoising // max_gt_num
+ num_group = 1 if num_group == 0 else num_group
+ # pad gt to max_num of a batch
+ bs = len(targets["gt_class"])
+ input_query_class = paddle.full(
+ [bs, max_gt_num], num_classes, dtype='int32')
+ input_query_bbox = paddle.zeros([bs, max_gt_num, 4])
+ pad_gt_mask = paddle.zeros([bs, max_gt_num])
+ for i in range(bs):
+ num_gt = num_gts[i]
+ if num_gt > 0:
+ input_query_class[i, :num_gt] = targets["gt_class"][i].squeeze(-1)
+ input_query_bbox[i, :num_gt] = targets["gt_bbox"][i]
+ pad_gt_mask[i, :num_gt] = 1
+
+ input_query_class = input_query_class.tile([1, num_group])
+ input_query_bbox = input_query_bbox.tile([1, num_group, 1])
+ pad_gt_mask = pad_gt_mask.tile([1, num_group])
+
+ dn_positive_idx = paddle.nonzero(pad_gt_mask)[:, 1]
+ dn_positive_idx = paddle.split(dn_positive_idx,
+ [n * num_group for n in num_gts])
+ # total denoising queries
+ num_denoising = int(max_gt_num * num_group)
+
+ if label_noise_ratio > 0:
+ input_query_class = input_query_class.flatten()
+ pad_gt_mask = pad_gt_mask.flatten()
+ # half of bbox prob
+ mask = paddle.rand(input_query_class.shape) < (label_noise_ratio * 0.5)
+ chosen_idx = paddle.nonzero(mask * pad_gt_mask).squeeze(-1)
+ # randomly put a new one here
+ new_label = paddle.randint_like(
+ chosen_idx, 0, num_classes, dtype=input_query_class.dtype)
+ input_query_class.scatter_(chosen_idx, new_label)
+ input_query_class.reshape_([bs, num_denoising])
+ pad_gt_mask.reshape_([bs, num_denoising])
+
+ if box_noise_scale > 0:
+ diff = paddle.concat(
+ [input_query_bbox[..., 2:] * 0.5, input_query_bbox[..., 2:]],
+ axis=-1) * box_noise_scale
+ diff *= (paddle.rand(input_query_bbox.shape) * 2.0 - 1.0)
+ input_query_bbox += diff
+ input_query_bbox = inverse_sigmoid(input_query_bbox)
+
+ class_embed = paddle.concat(
+ [class_embed, paddle.zeros([1, class_embed.shape[-1]])])
+ input_query_class = paddle.gather(
+ class_embed, input_query_class.flatten(),
+ axis=0).reshape([bs, num_denoising, -1])
+
+ tgt_size = num_denoising + num_queries
+ attn_mask = paddle.ones([tgt_size, tgt_size]) < 0
+ # match query cannot see the reconstruction
+ attn_mask[num_denoising:, :num_denoising] = True
+ # reconstruct cannot see each other
+ for i in range(num_group):
+ if i == 0:
+ attn_mask[max_gt_num * i:max_gt_num * (i + 1), max_gt_num * (i + 1):
+ num_denoising] = True
+ if i == num_group - 1:
+ attn_mask[max_gt_num * i:max_gt_num * (i + 1), :max_gt_num *
+ i] = True
+ else:
+ attn_mask[max_gt_num * i:max_gt_num * (i + 1), max_gt_num * (i + 1):
+ num_denoising] = True
+ attn_mask[max_gt_num * i:max_gt_num * (i + 1), :max_gt_num *
+ i] = True
+ attn_mask = ~attn_mask
+ dn_meta = {
+ "dn_positive_idx": dn_positive_idx,
+ "dn_num_group": num_group,
+ "dn_num_split": [num_denoising, num_queries]
+ }
+
+ return input_query_class, input_query_bbox, attn_mask, dn_meta
+
+
def get_contrastive_denoising_training_group(targets,
num_classes,
num_queries,
@@ -204,7 +297,7 @@ def get_contrastive_denoising_training_group(targets,
tgt_size = num_denoising + num_queries
attn_mask = paddle.ones([tgt_size, tgt_size]) < 0
- # match query cannot see the reconstruct
+ # match query cannot see the reconstruction
attn_mask[num_denoising:, :num_denoising] = True
# reconstruct cannot see each other
for i in range(num_group):
@@ -263,3 +356,42 @@ def sine_func(x):
pos_res[0], pos_res[1] = pos_res[1], pos_res[0]
pos_res = paddle.concat(pos_res, axis=2)
return pos_res
+
+
+def mask_to_box_coordinate(mask,
+ normalize=False,
+ format="xyxy",
+ dtype="float32"):
+ """
+ Compute the bounding boxes around the provided mask.
+ Args:
+ mask (Tensor:bool): [b, c, h, w]
+
+ Returns:
+ bbox (Tensor): [b, c, 4]
+ """
+ assert mask.ndim == 4
+ assert format in ["xyxy", "xywh"]
+ if mask.sum() == 0:
+ return paddle.zeros([mask.shape[0], mask.shape[1], 4], dtype=dtype)
+
+ h, w = mask.shape[-2:]
+ y, x = paddle.meshgrid(
+ paddle.arange(
+ end=h, dtype=dtype), paddle.arange(
+ end=w, dtype=dtype))
+
+ x_mask = x * mask
+ x_max = x_mask.flatten(-2).max(-1) + 1
+ x_min = paddle.where(mask, x_mask,
+ paddle.to_tensor(1e8)).flatten(-2).min(-1)
+
+ y_mask = y * mask
+ y_max = y_mask.flatten(-2).max(-1) + 1
+ y_min = paddle.where(mask, y_mask,
+ paddle.to_tensor(1e8)).flatten(-2).min(-1)
+ out_bbox = paddle.stack([x_min, y_min, x_max, y_max], axis=-1)
+ if normalize:
+ out_bbox /= paddle.to_tensor([w, h, w, h]).astype(dtype)
+
+ return out_bbox if format == "xyxy" else bbox_xyxy_to_cxcywh(out_bbox)
From c35edbfe0ef8825aa791f9f677c7d75a86947dc3 Mon Sep 17 00:00:00 2001
From: shangliang Xu
Date: Wed, 8 Mar 2023 12:29:20 +0800
Subject: [PATCH 036/116] [docs]fix dino, deformable-detr docs,
test=document_fix (#7888)
---
configs/deformable_detr/README.md | 6 +++---
configs/dino/README.md | 8 ++++----
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/configs/deformable_detr/README.md b/configs/deformable_detr/README.md
index c995ab9175f..c237c0aac1d 100644
--- a/configs/deformable_detr/README.md
+++ b/configs/deformable_detr/README.md
@@ -8,9 +8,9 @@ Deformable DETR is an object detection model based on DETR. We reproduced the mo
## Model Zoo
-| Backbone | Model | Images/GPU | Inf time (fps) | Box AP | Config | Download |
-|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:|
-| R-50 | Deformable DETR | 2 | --- | 44.5 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/deformable_detr/deformable_detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/deformable_detr_r50_1x_coco.pdparams) |
+| Backbone | Model | Images/GPU | Epochs | Box AP | Config | Log | Download |
+|:--------:|:---------------:|:----------:|:------:|:------:|:------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------:|
+| R-50 | Deformable DETR | 2 | 50 | 44.5 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/deformable_detr/deformable_detr_r50_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/deformable_detr_r50_1x_coco_44.5.log) | [model](https://paddledet.bj.bcebos.com/models/deformable_detr_r50_1x_coco.pdparams) |
**Notes:**
diff --git a/configs/dino/README.md b/configs/dino/README.md
index e7d666f8b0f..7849edbe074 100644
--- a/configs/dino/README.md
+++ b/configs/dino/README.md
@@ -8,10 +8,10 @@
## Model Zoo
-| Backbone | Model | Epochs | Box AP | Config | Download |
-|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:--------------------------------------------------------------------------------:|
-| R-50 | dino_r50_4scale | 12 | 49.1 | [config](./dino_r50_4scale_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_1x_coco.pdparams) |
-| R-50 | dino_r50_4scale | 24 | 50.5 | [config](./dino_r50_4scale_2x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_2x_coco.pdparams) |
+| Backbone | Model | Epochs | Box AP | Config | Log | Download |
+|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:-------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
+| R-50 | dino_r50_4scale | 12 | 49.5 | [config](./dino_r50_4scale_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_1x_coco_49.5.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_1x_coco.pdparams) |
+| R-50 | dino_r50_4scale | 24 | 50.8 | [config](./dino_r50_4scale_2x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_2x_coco_50.8.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_2x_coco.pdparams) |
**Notes:**
From 1d07733b94cf52886ae323a6a0ad5b9547136cf0 Mon Sep 17 00:00:00 2001
From: shangliang Xu
Date: Wed, 8 Mar 2023 17:23:31 +0800
Subject: [PATCH 037/116] [dev] fix 'SchemaDict' bug in create dataset (#7893)
---
tools/eval.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/eval.py b/tools/eval.py
index 384a497906b..40cbbecd8b5 100755
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -29,7 +29,7 @@
import paddle
-from ppdet.core.workspace import load_config, merge_config
+from ppdet.core.workspace import create, load_config, merge_config
from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
from ppdet.utils.cli import ArgsParser, merge_args
from ppdet.engine import Trainer, init_parallel_env
@@ -130,7 +130,7 @@ def run(FLAGS, cfg):
json_eval_results(
cfg.metric,
json_directory=FLAGS.output_eval,
- dataset=cfg['EvalDataset'])
+ dataset=create('EvalDataset')())
return
# init parallel environment if nranks > 1
From f4ef20df45edd4bcd0ee5b5d1e9208d94558e47a Mon Sep 17 00:00:00 2001
From: shangliang Xu
Date: Wed, 8 Mar 2023 18:03:33 +0800
Subject: [PATCH 038/116] [dev] fix use trt dynamic shape to infer (#7889)
---
deploy/python/infer.py | 19 +++++++++----------
deploy/python/utils.py | 10 ++++++++++
2 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index 31e491b1278..b167b04ce9c 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -46,8 +46,6 @@
'PPLCNet', 'DETR', 'CenterTrack'
}
-TUNED_TRT_DYNAMIC_MODELS = {'DETR'}
-
def bench_log(detector, img_list, model_info, batch_size=1, name=None):
mems = {
@@ -445,7 +443,7 @@ def predict_video(self, video_file, camera_id):
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
out_path = os.path.join(self.output_dir, video_out_name)
- fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
index = 1
while (1):
@@ -823,8 +821,7 @@ def load_predictor(model_dir,
cpu_threads=1,
enable_mkldnn=False,
enable_mkldnn_bfloat16=False,
- delete_shuffle_pass=False,
- tuned_trt_shape_file="shape_range_info.pbtxt"):
+ delete_shuffle_pass=False):
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
@@ -891,8 +888,6 @@ def load_predictor(model_dir,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys():
- if arch in TUNED_TRT_DYNAMIC_MODELS:
- config.collect_shape_range_info(tuned_trt_shape_file)
config.enable_tensorrt_engine(
workspace_size=(1 << 25) * batch_size,
max_batch_size=batch_size,
@@ -900,9 +895,13 @@ def load_predictor(model_dir,
precision_mode=precision_map[run_mode],
use_static=False,
use_calib_mode=trt_calib_mode)
- if arch in TUNED_TRT_DYNAMIC_MODELS:
- config.enable_tuned_tensorrt_dynamic_shape(tuned_trt_shape_file,
- True)
+ if FLAGS.collect_trt_shape_info:
+ config.collect_shape_range_info(FLAGS.tuned_trt_shape_file)
+ elif os.path.exists(FLAGS.tuned_trt_shape_file):
+ print(f'Use dynamic shape file: '
+ f'{FLAGS.tuned_trt_shape_file} for TRT...')
+ config.enable_tuned_tensorrt_dynamic_shape(
+ FLAGS.tuned_trt_shape_file, True)
if use_dynamic_shape:
min_input_shape = {
diff --git a/deploy/python/utils.py b/deploy/python/utils.py
index d1f7d59f857..7fc8148b3a0 100644
--- a/deploy/python/utils.py
+++ b/deploy/python/utils.py
@@ -201,6 +201,16 @@ def argsparser():
type=str,
default='ios',
help="Combine method matching metric, choose in ['iou', 'ios'].")
+ parser.add_argument(
+ "--collect_trt_shape_info",
+ action='store_true',
+ default=False,
+ help="Whether to collect dynamic shape before using tensorrt.")
+ parser.add_argument(
+ "--tuned_trt_shape_file",
+ type=str,
+ default="shape_range_info.pbtxt",
+ help="Path of a dynamic shape file for tensorrt.")
return parser
From fa1ba1aa9235ed14a65aba65ccc9c2615c9d4ec1 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Thu, 9 Mar 2023 13:54:49 +0800
Subject: [PATCH 039/116] Add vitpose (#7894)
* add vitpose
* fix keypoints/README_en.md document_fix=test
* add vitpose_base_coco_256x192
* vitpose.py add some annotation for discriminate visio_transformer.py
* output_heatmap use gpu
---
configs/keypoint/README.md | 13 +-
configs/keypoint/README_en.md | 13 +-
.../vit_pose/vitpose_base_coco_256x196.yml | 171 ++++++++++
.../vitpose_base_simple_coco_256x192.yml | 164 +++++++++
ppdet/data/source/keypoint_coco.py | 7 +-
ppdet/modeling/architectures/__init__.py | 2 +
.../architectures/keypoint_vitpose.py | 317 +++++++++++++++++
ppdet/modeling/backbones/__init__.py | 3 +-
ppdet/modeling/backbones/vitpose.py | 320 ++++++++++++++++++
ppdet/modeling/heads/__init__.py | 2 +
ppdet/modeling/heads/vitpose_head.py | 278 +++++++++++++++
ppdet/modeling/keypoint_utils.py | 61 ++++
ppdet/optimizer/adamw.py | 2 +-
13 files changed, 1348 insertions(+), 5 deletions(-)
create mode 100644 configs/keypoint/vit_pose/vitpose_base_coco_256x196.yml
create mode 100644 configs/keypoint/vit_pose/vitpose_base_simple_coco_256x192.yml
create mode 100644 ppdet/modeling/architectures/keypoint_vitpose.py
create mode 100644 ppdet/modeling/backbones/vitpose.py
create mode 100644 ppdet/modeling/heads/vitpose_head.py
diff --git a/configs/keypoint/README.md b/configs/keypoint/README.md
index c93932d7360..8b08f092043 100644
--- a/configs/keypoint/README.md
+++ b/configs/keypoint/README.md
@@ -72,8 +72,11 @@ COCO数据集
| LiteHRNet-18 |Top-Down| 384x288 | 69.7 | [lite_hrnet_18_384x288_coco.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/lite_hrnet_18_384x288_coco.pdparams) | [config](./lite_hrnet/lite_hrnet_18_384x288_coco.yml) |
| LiteHRNet-30 | Top-Down|256x192 | 69.4 | [lite_hrnet_30_256x192_coco.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/lite_hrnet_30_256x192_coco.pdparams) | [config](./lite_hrnet/lite_hrnet_30_256x192_coco.yml) |
| LiteHRNet-30 |Top-Down| 384x288 | 72.5 | [lite_hrnet_30_384x288_coco.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/lite_hrnet_30_384x288_coco.pdparams) | [config](./lite_hrnet/lite_hrnet_30_384x288_coco.yml) |
+|Vitpose_base_simple |Top-Down| 256x192 | 77.7 | [vitpose_base_simple_256x192_coco.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/vitpose_base_simple_256x192_coco.pdparams) | [config](./vit_pose/vitpose_base_simple_coco_256x192.yml) |
+|Vitpose_base |Top-Down| 256x192 | 78.2 | [vitpose_base_coco_256x192.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/vitpose_base_coco_256x192.pdparams) | [config](./vit_pose/vitpose_base_coco_256x192.yml) |
-备注: Top-Down模型测试AP结果基于GroundTruth标注框
+备注: 1.Top-Down模型测试AP结果基于GroundTruth标注框
+ 2.vitpose训练用[MAE](https://bj.bcebos.com/v1/paddledet/models/keypoint/mae_pretrain_vit_base.pdparams)做为预训练模型
MPII数据集
| 模型 | 方案| 输入尺寸 | PCKh(Mean) | PCKh(Mean@0.1) | 模型下载 | 配置文件 |
@@ -284,4 +287,12 @@ python deploy/python/det_keypoint_unite_infer.py \
booktitle={CVPR},
year={2021}
}
+
+@inproceedings{
+ xu2022vitpose,
+ title={ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation},
+ author={Yufei Xu and Jing Zhang and Qiming Zhang and Dacheng Tao},
+ booktitle={Advances in Neural Information Processing Systems},
+ year={2022},
+}
```
diff --git a/configs/keypoint/README_en.md b/configs/keypoint/README_en.md
index 15f659645c3..252f31745b4 100644
--- a/configs/keypoint/README_en.md
+++ b/configs/keypoint/README_en.md
@@ -75,8 +75,11 @@ COCO Dataset
| LiteHRNet-18 | 384x288 | 69.7 | [lite_hrnet_18_384x288_coco.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/lite_hrnet_18_384x288_coco.pdparams) | [config](./lite_hrnet/lite_hrnet_18_384x288_coco.yml) |
| LiteHRNet-30 | 256x192 | 69.4 | [lite_hrnet_30_256x192_coco.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/lite_hrnet_30_256x192_coco.pdparams) | [config](./lite_hrnet/lite_hrnet_30_256x192_coco.yml) |
| LiteHRNet-30 | 384x288 | 72.5 | [lite_hrnet_30_384x288_coco.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/lite_hrnet_30_384x288_coco.pdparams) | [config](./lite_hrnet/lite_hrnet_30_384x288_coco.yml) |
+| Vitpose_base_simple | 256x192 | 77.7 | [vitpose_base_simple_256x192_coco.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/vitpose_base_simple_256x192_coco.pdparams) | [config](./vit_pose/vitpose_base_simple_coco_256x192.yml) |
+| Vitpose_base | 256x192 | 78.2 | [vitpose_base_coco_256x192.pdparams](https://bj.bcebos.com/v1/paddledet/models/keypoint/vitpose_base_coco_256x192.pdparams) | [config](./vit_pose/vitpose_base_coco_256x192.yml) |
-Note:The AP results of Top-Down models are based on bounding boxes in GroundTruth.
+Note:1.The AP results of Top-Down models are based on bounding boxes in GroundTruth.
+ 2.Vitpose training uses [MAE](https://bj.bcebos.com/v1/paddledet/models/keypoint/mae_pretrain_vit_base.pdparams) as the pre-training model
MPII Dataset
| Model | Input Size | PCKh(Mean) | PCKh(Mean@0.1) | Model Download | Config File |
@@ -266,4 +269,12 @@ We provide benchmarks in different runtime environments for your reference when
booktitle={CVPR},
year={2021}
}
+
+@inproceedings{
+ xu2022vitpose,
+ title={ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation},
+ author={Yufei Xu and Jing Zhang and Qiming Zhang and Dacheng Tao},
+ booktitle={Advances in Neural Information Processing Systems},
+ year={2022},
+}
```
diff --git a/configs/keypoint/vit_pose/vitpose_base_coco_256x196.yml b/configs/keypoint/vit_pose/vitpose_base_coco_256x196.yml
new file mode 100644
index 00000000000..3e4934e3aaa
--- /dev/null
+++ b/configs/keypoint/vit_pose/vitpose_base_coco_256x196.yml
@@ -0,0 +1,171 @@
+use_gpu: true
+log_iter: 50
+save_dir: output
+snapshot_epoch: 10
+weights: output/vitpose_base_simple_coco_256x192/model_final
+epoch: 210
+num_joints: &num_joints 17
+pixel_std: &pixel_std 200
+metric: KeyPointTopDownCOCOEval
+num_classes: 1
+train_height: &train_height 256
+train_width: &train_width 192
+trainsize: &trainsize [*train_width, *train_height]
+hmsize: &hmsize [48, 64]
+flip_perm: &flip_perm [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
+
+
+#####model
+architecture: VitPose_TopDown
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/keypoint/mae_pretrain_vit_base.pdparams
+VitPose_TopDown:
+ backbone: ViT
+ head: TopdownHeatmapSimpleHead
+ post_process: VitPosePostProcess
+ loss: KeyPointMSELoss
+ flip_test: True
+
+ViT:
+ img_size: [256, 192]
+ patch_size: 16
+ embed_dim: 768
+ depth: 12
+ num_heads: 12
+ ratio: 1
+ mlp_ratio: 4
+ qkv_bias: True
+ drop_path_rate: 0.3
+ epsilon: 0.000001
+
+
+TopdownHeatmapSimpleHead:
+ in_channels: 768
+ num_deconv_layers: 2
+ num_deconv_filters: [256,256]
+ num_deconv_kernels: [4,4]
+ out_channels: 17
+ shift_heatmap: False
+ flip_pairs: *flip_perm
+ extra: {final_conv_kernel: 1}
+
+VitPosePostProcess:
+ use_dark: True
+
+KeyPointMSELoss:
+ use_target_weight: true
+ loss_scale: 1.0
+
+####optimizer
+LearningRate:
+ base_lr: 0.0005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [170, 200]
+ - !LinearWarmup
+ start_factor: 0.001
+ steps: 500
+
+OptimizerBuilder:
+ clip_grad_by_norm: 1.0
+ optimizer:
+ type: AdamWDL
+ betas: [0.9, 0.999]
+ weight_decay: 0.1
+ num_layers: 12
+ layer_decay: 0.75
+ filter_bias_and_bn: True
+ skip_decay_names: ['pos_embed','norm']
+ set_param_lr_func: 'layerwise_lr_decay'
+
+
+
+
+#####data
+TrainDataset:
+ !KeypointTopDownCocoDataset
+ image_dir: train2017
+ anno_path: annotations/person_keypoints_train2017.json
+ dataset_dir: dataset/coco
+ num_joints: *num_joints
+ trainsize: *trainsize
+ pixel_std: *pixel_std
+ center_scale: 0.4
+
+
+
+
+EvalDataset:
+ !KeypointTopDownCocoDataset
+ image_dir: val2017
+ anno_path: annotations/person_keypoints_val2017.json
+ dataset_dir: dataset/coco
+ num_joints: *num_joints
+ trainsize: *trainsize
+ pixel_std: *pixel_std
+ image_thre: 0.0
+ use_gt_bbox: True
+
+TestDataset:
+ !ImageFolder
+ anno_path: dataset/coco/keypoint_imagelist.txt
+
+worker_num: 4
+global_mean: &global_mean [0.485, 0.456, 0.406]
+global_std: &global_std [0.229, 0.224, 0.225]
+TrainReader:
+ sample_transforms:
+ - RandomFlipHalfBodyTransform:
+ scale: 0.5
+ rot: 40
+ num_joints_half_body: 8
+ prob_half_body: 0.3
+ pixel_std: *pixel_std
+ trainsize: *trainsize
+ upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ flip_pairs: *flip_perm
+
+ - TopDownAffine:
+ trainsize: *trainsize
+ use_udp: true
+ - ToHeatmapsTopDown_UDP:
+ hmsize: *hmsize
+ sigma: 2
+
+ batch_transforms:
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 64
+ shuffle: True
+ drop_last: True
+
+EvalReader:
+ sample_transforms:
+ - TopDownAffine:
+ trainsize: *trainsize
+ use_udp: true
+ batch_transforms:
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 64
+
+TestReader:
+ inputs_def:
+ image_shape: [3, *train_height, *train_width]
+ sample_transforms:
+ - Decode: {}
+ - TopDownEvalAffine:
+ trainsize: *trainsize
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 1
+ fuse_normalize: false
diff --git a/configs/keypoint/vit_pose/vitpose_base_simple_coco_256x192.yml b/configs/keypoint/vit_pose/vitpose_base_simple_coco_256x192.yml
new file mode 100644
index 00000000000..2e34f259397
--- /dev/null
+++ b/configs/keypoint/vit_pose/vitpose_base_simple_coco_256x192.yml
@@ -0,0 +1,164 @@
+use_gpu: true
+log_iter: 50
+save_dir: output
+snapshot_epoch: 10
+weights: output/vitpose_base_simple_coco_256x192/model_final
+epoch: 210
+num_joints: &num_joints 17
+pixel_std: &pixel_std 200
+metric: KeyPointTopDownCOCOEval
+num_classes: 1
+train_height: &train_height 256
+train_width: &train_width 192
+trainsize: &trainsize [*train_width, *train_height]
+hmsize: &hmsize [48, 64]
+flip_perm: &flip_perm [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
+
+
+#####model
+architecture: VitPose_TopDown
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/keypoint/mae_pretrain_vit_base.pdparams
+VitPose_TopDown:
+ backbone: ViT
+ head: TopdownHeatmapSimpleHead
+ post_process: VitPosePostProcess
+ loss: KeyPointMSELoss
+ flip_test: True
+
+ViT:
+ img_size: [256, 192]
+ qkv_bias: True
+ drop_path_rate: 0.3
+ epsilon: 0.000001
+
+
+TopdownHeatmapSimpleHead:
+ in_channels: 768
+ num_deconv_layers: 0
+ num_deconv_filters: []
+ num_deconv_kernels: []
+ upsample: 4
+ shift_heatmap: False
+ flip_pairs: *flip_perm
+ extra: {final_conv_kernel: 3}
+
+VitPosePostProcess:
+ use_dark: True
+
+KeyPointMSELoss:
+ use_target_weight: true
+ loss_scale: 1.0
+
+####optimizer
+LearningRate:
+ base_lr: 0.0005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [170, 200]
+ - !LinearWarmup
+ start_factor: 0.001
+ steps: 500
+
+OptimizerBuilder:
+ clip_grad_by_norm: 1.0
+ optimizer:
+ type: AdamWDL
+ betas: [0.9, 0.999]
+ weight_decay: 0.1
+ num_layers: 12
+ layer_decay: 0.75
+ filter_bias_and_bn: True
+ skip_decay_names: ['pos_embed','norm']
+ set_param_lr_func: 'layerwise_lr_decay'
+
+
+
+
+#####data
+TrainDataset:
+ !KeypointTopDownCocoDataset
+ image_dir: train2017
+ anno_path: annotations/person_keypoints_train2017.json
+ dataset_dir: dataset/coco
+ num_joints: *num_joints
+ trainsize: *trainsize
+ pixel_std: *pixel_std
+ center_scale: 0.4
+
+
+
+EvalDataset:
+ !KeypointTopDownCocoDataset
+ image_dir: val2017
+ anno_path: annotations/person_keypoints_val2017.json
+ dataset_dir: dataset/coco
+ num_joints: *num_joints
+ trainsize: *trainsize
+ pixel_std: *pixel_std
+ image_thre: 0.0
+ use_gt_bbox: True
+
+TestDataset:
+ !ImageFolder
+ anno_path: dataset/coco/keypoint_imagelist.txt
+
+worker_num: 4
+global_mean: &global_mean [0.485, 0.456, 0.406]
+global_std: &global_std [0.229, 0.224, 0.225]
+TrainReader:
+ sample_transforms:
+ - RandomFlipHalfBodyTransform:
+ scale: 0.5
+ rot: 40
+ num_joints_half_body: 8
+ prob_half_body: 0.3
+ pixel_std: *pixel_std
+ trainsize: *trainsize
+ upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ flip_pairs: *flip_perm
+
+ - TopDownAffine:
+ trainsize: *trainsize
+ use_udp: true
+ - ToHeatmapsTopDown_UDP:
+ hmsize: *hmsize
+ sigma: 2
+
+ batch_transforms:
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 64
+ shuffle: True
+ drop_last: True
+
+EvalReader:
+ sample_transforms:
+ - TopDownAffine:
+ trainsize: *trainsize
+ use_udp: true
+ batch_transforms:
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 64
+
+TestReader:
+ inputs_def:
+ image_shape: [3, *train_height, *train_width]
+ sample_transforms:
+ - Decode: {}
+ - TopDownEvalAffine:
+ trainsize: *trainsize
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 1
+ fuse_normalize: false
diff --git a/ppdet/data/source/keypoint_coco.py b/ppdet/data/source/keypoint_coco.py
index 11ecea53840..6e072dc6e88 100644
--- a/ppdet/data/source/keypoint_coco.py
+++ b/ppdet/data/source/keypoint_coco.py
@@ -491,7 +491,8 @@ def __init__(self,
bbox_file=None,
use_gt_bbox=True,
pixel_std=200,
- image_thre=0.0):
+ image_thre=0.0,
+ center_scale=None):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform)
@@ -500,6 +501,7 @@ def __init__(self,
self.trainsize = trainsize
self.pixel_std = pixel_std
self.image_thre = image_thre
+ self.center_scale = center_scale
self.dataset_name = 'coco'
def parse_dataset(self):
@@ -574,6 +576,9 @@ def _box2cs(self, box):
center[1] = y + h * 0.5
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
+ if self.center_scale is not None and np.random.rand() < 0.3:
+ center += self.center_scale * (np.random.rand(2) - 0.5) * [w, h]
+
if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
diff --git a/ppdet/modeling/architectures/__init__.py b/ppdet/modeling/architectures/__init__.py
index 8899e5c0b4c..4c6c5ed0ac1 100644
--- a/ppdet/modeling/architectures/__init__.py
+++ b/ppdet/modeling/architectures/__init__.py
@@ -25,6 +25,7 @@
from . import s2anet
from . import keypoint_hrhrnet
from . import keypoint_hrnet
+from . import keypoint_vitpose
from . import jde
from . import deepsort
from . import fairmot
@@ -55,6 +56,7 @@
from .s2anet import *
from .keypoint_hrhrnet import *
from .keypoint_hrnet import *
+from .keypoint_vitpose import *
from .jde import *
from .deepsort import *
from .fairmot import *
diff --git a/ppdet/modeling/architectures/keypoint_vitpose.py b/ppdet/modeling/architectures/keypoint_vitpose.py
new file mode 100644
index 00000000000..b00226a8307
--- /dev/null
+++ b/ppdet/modeling/architectures/keypoint_vitpose.py
@@ -0,0 +1,317 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import numpy as np
+import math
+import cv2
+from ppdet.core.workspace import register, create, serializable
+from .meta_arch import BaseArch
+from ..keypoint_utils import transform_preds
+from .. import layers as L
+
+__all__ = ['VitPose_TopDown', 'VitPosePostProcess']
+
+
+@register
+class VitPose_TopDown(BaseArch):
+ __category__ = 'architecture'
+ __inject__ = ['loss']
+
+ def __init__(self, backbone, head, loss, post_process, flip_test):
+ """
+ VitPose network, see https://arxiv.org/pdf/2204.12484v2.pdf
+
+ Args:
+ backbone (nn.Layer): backbone instance
+ post_process (object): `HRNetPostProcess` instance
+
+ """
+ super(VitPose_TopDown, self).__init__()
+ self.backbone = backbone
+ self.head = head
+ self.loss = loss
+ self.post_process = post_process
+ self.flip_test = flip_test
+
+ @classmethod
+ def from_config(cls, cfg, *args, **kwargs):
+ # backbone
+ backbone = create(cfg['backbone'])
+ #head
+ head = create(cfg['head'])
+ #post_process
+ post_process = create(cfg['post_process'])
+
+ return {
+ 'backbone': backbone,
+ 'head': head,
+ 'post_process': post_process
+ }
+
+ def _forward_train(self):
+
+ feats = self.backbone.forward_features(self.inputs['image'])
+ vitpost_output = self.head(feats)
+ return self.loss(vitpost_output, self.inputs)
+
+ def _forward_test(self):
+
+ feats = self.backbone.forward_features(self.inputs['image'])
+ output_heatmap = self.head(feats)
+
+ if self.flip_test:
+ img_flipped = self.inputs['image'].flip(3)
+ features_flipped = self.backbone.forward_features(img_flipped)
+ output_flipped_heatmap = self.head.inference_model(features_flipped,
+ self.flip_test)
+
+ output_heatmap = (output_heatmap + output_flipped_heatmap) * 0.5
+
+ imshape = (self.inputs['im_shape'].numpy()
+ )[:, ::-1] if 'im_shape' in self.inputs else None
+ center = self.inputs['center'].numpy(
+ ) if 'center' in self.inputs else np.round(imshape / 2.)
+ scale = self.inputs['scale'].numpy(
+ ) if 'scale' in self.inputs else imshape / 200.
+
+ result = self.post_process(output_heatmap.cpu().numpy(), center, scale)
+
+ return result
+
+ def get_loss(self):
+ return self._forward_train()
+
+ def get_pred(self):
+ res_lst = self._forward_test()
+ outputs = {'keypoint': res_lst}
+ return outputs
+
+
+@register
+@serializable
+class VitPosePostProcess(object):
+ def __init__(self, use_dark=False):
+ self.use_dark = use_dark
+
+ def get_max_preds(self, heatmaps):
+ '''get predictions from score maps
+
+ Args:
+ heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
+
+ Returns:
+ preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
+ maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
+ '''
+ assert isinstance(heatmaps,
+ np.ndarray), 'heatmaps should be numpy.ndarray'
+ assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+
+ batch_size = heatmaps.shape[0]
+ num_joints = heatmaps.shape[1]
+ width = heatmaps.shape[3]
+ heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
+ idx = np.argmax(heatmaps_reshaped, 2)
+ maxvals = np.amax(heatmaps_reshaped, 2)
+
+ maxvals = maxvals.reshape((batch_size, num_joints, 1))
+ idx = idx.reshape((batch_size, num_joints, 1))
+
+ preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+
+ preds[:, :, 0] = (preds[:, :, 0]) % width
+ preds[:, :, 1] = np.floor((preds[:, :, 1]) // width)
+
+ pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
+ pred_mask = pred_mask.astype(np.float32)
+
+ preds *= pred_mask
+
+ return preds, maxvals
+
+ def post_datk_udp(self, coords, batch_heatmaps, kernel=3):
+ """DARK post-pocessing. Implemented by udp. Paper ref: Huang et al. The
+ Devil is in the Details: Delving into Unbiased Data Processing for Human
+ Pose Estimation (CVPR 2020). Zhang et al. Distribution-Aware Coordinate
+ Representation for Human Pose Estimation (CVPR 2020).
+
+ Note:
+ - batch size: B
+ - num keypoints: K
+ - num persons: N
+ - height of heatmaps: H
+ - width of heatmaps: W
+
+ B=1 for bottom_up paradigm where all persons share the same heatmap.
+ B=N for top_down paradigm where each person has its own heatmaps.
+
+ Args:
+ coords (np.ndarray[N, K, 2]): Initial coordinates of human pose.
+ batch_heatmaps (np.ndarray[B, K, H, W]): batch_heatmaps
+ kernel (int): Gaussian kernel size (K) for modulation.
+
+ Returns:
+ np.ndarray([N, K, 2]): Refined coordinates.
+ """
+ if not isinstance(batch_heatmaps, np.ndarray):
+ batch_heatmaps = batch_heatmaps.cpu().numpy()
+ B, K, H, W = batch_heatmaps.shape
+ N = coords.shape[0]
+ assert (B == 1 or B == N)
+ for heatmaps in batch_heatmaps:
+ for heatmap in heatmaps:
+ cv2.GaussianBlur(heatmap, (kernel, kernel), 0, heatmap)
+ np.clip(batch_heatmaps, 0.001, 50, batch_heatmaps)
+ np.log(batch_heatmaps, batch_heatmaps)
+
+ batch_heatmaps_pad = np.pad(batch_heatmaps, ((0, 0), (0, 0), (1, 1),
+ (1, 1)),
+ mode='edge').flatten()
+
+ index = coords[..., 0] + 1 + (coords[..., 1] + 1) * (W + 2)
+ index += (W + 2) * (H + 2) * np.arange(0, B * K).reshape(-1, K)
+ index = index.astype(int).reshape(-1, 1)
+ i_ = batch_heatmaps_pad[index]
+ ix1 = batch_heatmaps_pad[index + 1]
+ iy1 = batch_heatmaps_pad[index + W + 2]
+ ix1y1 = batch_heatmaps_pad[index + W + 3]
+ ix1_y1_ = batch_heatmaps_pad[index - W - 3]
+ ix1_ = batch_heatmaps_pad[index - 1]
+ iy1_ = batch_heatmaps_pad[index - 2 - W]
+
+ dx = 0.5 * (ix1 - ix1_)
+ dy = 0.5 * (iy1 - iy1_)
+ derivative = np.concatenate([dx, dy], axis=1)
+ derivative = derivative.reshape(N, K, 2, 1)
+ dxx = ix1 - 2 * i_ + ix1_
+ dyy = iy1 - 2 * i_ + iy1_
+ dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
+ hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
+ hessian = hessian.reshape(N, K, 2, 2)
+ hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
+ coords -= np.einsum('ijmn,ijnk->ijmk', hessian, derivative).squeeze()
+ return coords
+
+ def transform_preds_udp(self,
+ coords,
+ center,
+ scale,
+ output_size,
+ use_udp=True):
+ """Get final keypoint predictions from heatmaps and apply scaling and
+ translation to map them back to the image.
+
+ Note:
+ num_keypoints: K
+
+ Args:
+ coords (np.ndarray[K, ndims]):
+
+ * If ndims=2, corrds are predicted keypoint location.
+ * If ndims=4, corrds are composed of (x, y, scores, tags)
+ * If ndims=5, corrds are composed of (x, y, scores, tags,
+ flipped_tags)
+
+ center (np.ndarray[2, ]): Center of the bounding box (x, y).
+ scale (np.ndarray[2, ]): Scale of the bounding box
+ wrt [width, height].
+ output_size (np.ndarray[2, ] | list(2,)): Size of the
+ destination heatmaps.
+ use_udp (bool): Use unbiased data processing
+
+ Returns:
+ np.ndarray: Predicted coordinates in the images.
+ """
+
+ assert coords.shape[1] in (2, 4, 5)
+ assert len(center) == 2
+ assert len(scale) == 2
+ assert len(output_size) == 2
+
+ # Recover the scale which is normalized by a factor of 200.
+ scale = scale * 200.0
+
+ if use_udp:
+ scale_x = scale[0] / (output_size[0] - 1.0)
+ scale_y = scale[1] / (output_size[1] - 1.0)
+ else:
+ scale_x = scale[0] / output_size[0]
+ scale_y = scale[1] / output_size[1]
+
+ target_coords = np.ones_like(coords)
+ target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[
+ 0] * 0.5
+ target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[
+ 1] * 0.5
+
+ return target_coords
+
+ def get_final_preds(self, heatmaps, center, scale, kernelsize=11):
+ """the highest heatvalue location with a quarter offset in the
+ direction from the highest response to the second highest response.
+
+ Args:
+ heatmaps (numpy.ndarray): The predicted heatmaps
+ center (numpy.ndarray): The boxes center
+ scale (numpy.ndarray): The scale factor
+
+ Returns:
+ preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
+ maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
+ """
+ coords, maxvals = self.get_max_preds(heatmaps)
+
+ N, K, H, W = heatmaps.shape
+
+ if self.use_dark:
+ coords = self.post_datk_udp(coords, heatmaps, kernelsize)
+ preds = coords.copy()
+ # Transform back to the image
+ for i in range(N):
+ preds[i] = self.transform_preds_udp(preds[i], center[i],
+ scale[i], [W, H])
+ else:
+ for n in range(coords.shape[0]):
+ for p in range(coords.shape[1]):
+ hm = heatmaps[n][p]
+ px = int(math.floor(coords[n][p][0] + 0.5))
+ py = int(math.floor(coords[n][p][1] + 0.5))
+ if 1 < px < W - 1 and 1 < py < H - 1:
+ diff = np.array([
+ hm[py][px + 1] - hm[py][px - 1],
+ hm[py + 1][px] - hm[py - 1][px]
+ ])
+ coords[n][p] += np.sign(diff) * .25
+ preds = coords.copy()
+
+ # Transform back
+ for i in range(coords.shape[0]):
+ preds[i] = transform_preds(coords[i], center[i], scale[i],
+ [W, H])
+
+ return preds, maxvals
+
+ def __call__(self, output, center, scale):
+ preds, maxvals = self.get_final_preds(output, center, scale)
+ outputs = [[
+ np.concatenate(
+ (preds, maxvals), axis=-1), np.mean(
+ maxvals, axis=1)
+ ]]
+ return outputs
\ No newline at end of file
diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py
index f8b183e27bd..a20189c9487 100644
--- a/ppdet/modeling/backbones/__init__.py
+++ b/ppdet/modeling/backbones/__init__.py
@@ -62,4 +62,5 @@
from .mobileone import *
from .trans_encoder import *
from .focalnet import *
-from .vit_mae import *
+from .vitpose import *
+from .vit_mae import *
\ No newline at end of file
diff --git a/ppdet/modeling/backbones/vitpose.py b/ppdet/modeling/backbones/vitpose.py
new file mode 100644
index 00000000000..23e00be1e76
--- /dev/null
+++ b/ppdet/modeling/backbones/vitpose.py
@@ -0,0 +1,320 @@
+# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Code was based on https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
+# reference: https://arxiv.org/abs/2010.11929
+
+from collections.abc import Callable
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+from paddle.nn.initializer import TruncatedNormal, Constant, Normal
+from ppdet.core.workspace import register, serializable
+
+trunc_normal_ = TruncatedNormal(std=.02)
+
+
+def to_2tuple(x):
+ if isinstance(x, (list, tuple)):
+ return x
+ return tuple([x] * 2)
+
+
+def drop_path(x, drop_prob=0., training=False):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+ See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
+ """
+ if drop_prob == 0. or not training:
+ return x
+ keep_prob = paddle.to_tensor(1.0 - drop_prob).astype(x.dtype)
+ shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
+ random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype)
+ random_tensor = paddle.floor(random_tensor) # binarize
+ output = x.divide(keep_prob) * random_tensor
+ return output
+
+
+class DropPath(nn.Layer):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+ """
+
+ def __init__(self, drop_prob=None):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+
+class Identity(nn.Layer):
+ def __init__(self):
+ super(Identity, self).__init__()
+
+ def forward(self, input):
+ return input
+
+
+class Mlp(nn.Layer):
+ def __init__(self,
+ in_features,
+ hidden_features=None,
+ out_features=None,
+ act_layer=nn.GELU,
+ drop=0.):
+ super().__init__()
+ out_features = out_features or in_features
+ hidden_features = hidden_features or in_features
+ self.fc1 = nn.Linear(in_features, hidden_features)
+ self.act = act_layer()
+ self.fc2 = nn.Linear(hidden_features, out_features)
+ self.drop = nn.Dropout(drop)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+
+class Attention(nn.Layer):
+ def __init__(self,
+ dim,
+ num_heads=8,
+ qkv_bias=False,
+ qk_scale=None,
+ attn_drop=0.,
+ proj_drop=0.):
+ super().__init__()
+ self.num_heads = num_heads
+ head_dim = dim // num_heads
+ self.scale = qk_scale or head_dim**-0.5
+
+ self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
+
+ self.attn_drop = nn.Dropout(attn_drop)
+ self.proj = nn.Linear(dim, dim)
+ self.proj_drop = nn.Dropout(proj_drop)
+
+ def forward(self, x):
+
+ N, C = x.shape[1:]
+ qkv = self.qkv(x).reshape((-1, N, 3, self.num_heads, C //
+ self.num_heads)).transpose((2, 0, 3, 1, 4))
+
+ q, k, v = qkv[0], qkv[1], qkv[2]
+
+ attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale
+ attn = nn.functional.softmax(attn, axis=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((-1, N, C))
+ x = self.proj(x)
+
+ x = self.proj_drop(x)
+ return x
+
+
+class Block(nn.Layer):
+ def __init__(self,
+ dim,
+ num_heads,
+ mlp_ratio=4.,
+ qkv_bias=False,
+ qk_scale=None,
+ drop=0.,
+ attn_drop=0.,
+ drop_path=0.,
+ act_layer=nn.GELU,
+ norm_layer='nn.LayerNorm',
+ epsilon=1e-5):
+ super().__init__()
+ if isinstance(norm_layer, str):
+ self.norm1 = eval(norm_layer)(dim, epsilon=epsilon)
+ elif isinstance(norm_layer, Callable):
+ self.norm1 = norm_layer(dim)
+ else:
+ raise TypeError(
+ "The norm_layer must be str or paddle.nn.layer.Layer class")
+ self.attn = Attention(
+ dim,
+ num_heads=num_heads,
+ qkv_bias=qkv_bias,
+ qk_scale=qk_scale,
+ attn_drop=attn_drop,
+ proj_drop=drop)
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+ if isinstance(norm_layer, str):
+ self.norm2 = eval(norm_layer)(dim, epsilon=epsilon)
+ elif isinstance(norm_layer, Callable):
+ self.norm2 = norm_layer(dim)
+ else:
+ raise TypeError(
+ "The norm_layer must be str or paddle.nn.layer.Layer class")
+ mlp_hidden_dim = int(dim * mlp_ratio)
+ self.mlp = Mlp(in_features=dim,
+ hidden_features=mlp_hidden_dim,
+ act_layer=act_layer,
+ drop=drop)
+
+ def forward(self, x):
+ x = x + self.drop_path(self.attn(self.norm1(x)))
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+ return x
+
+
+class PatchEmbed(nn.Layer):
+ """ Image to Patch Embedding
+ """
+
+ def __init__(self,
+ img_size=224,
+ patch_size=16,
+ in_chans=3,
+ embed_dim=768,
+ ratio=1):
+ super().__init__()
+ img_size = to_2tuple(img_size)
+ patch_size = to_2tuple(patch_size)
+
+ num_patches = (img_size[1] // patch_size[1]) * (
+ img_size[0] // patch_size[0]) * (ratio**2)
+ self.img_size = img_size
+ self.patch_size = patch_size
+ self.num_patches = num_patches
+
+ self.proj = nn.Conv2D(
+ in_chans,
+ embed_dim,
+ kernel_size=patch_size,
+ stride=(patch_size[0] // ratio),
+ padding=(4 + 2 * (ratio // 2 - 1), 4 + 2 * (ratio // 2 - 1)))
+
+ def forward(self, x):
+ B, C, H, W = x.shape
+ assert H == self.img_size[0] and W == self.img_size[1], \
+ f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+
+ x = self.proj(x)
+ return x
+
+
+@register
+@serializable
+class ViT(nn.Layer):
+ """ Vision Transformer with support for patch input
+
+ This module is different from ppdet's VisionTransformer (from ppdet/modeling/backbones/visio_transformer.py),
+ the main differences are:
+ 1.the module PatchEmbed.proj has padding set,padding=(4 + 2 * (ratio // 2 - 1), 4 + 2 * (ratio // 2 - 1),
+ VisionTransformer dose not
+ 2.Attention module qkv is standard.but VisionTransformer provide more options
+ 3.MLP module only one Dropout,and VisionTransformer twice;
+ 4.VisionTransformer provide fpn layer,but the module does not.
+
+ """
+
+ def __init__(self,
+ img_size=224,
+ patch_size=16,
+ in_chans=3,
+ embed_dim=768,
+ depth=12,
+ num_heads=12,
+ mlp_ratio=4,
+ qkv_bias=False,
+ qk_scale=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.,
+ norm_layer='nn.LayerNorm',
+ epsilon=1e-5,
+ ratio=1,
+ pretrained=None,
+ **kwargs):
+ super().__init__()
+
+ self.pretrained = pretrained
+ self.num_features = self.embed_dim = embed_dim
+
+ self.patch_embed = PatchEmbed(
+ img_size=img_size,
+ patch_size=patch_size,
+ in_chans=in_chans,
+ embed_dim=embed_dim,
+ ratio=ratio)
+ num_patches = self.patch_embed.num_patches
+
+ self.pos_embed = self.create_parameter(
+ shape=(1, num_patches + 1, embed_dim),
+ default_initializer=trunc_normal_)
+ self.add_parameter("pos_embed", self.pos_embed)
+
+ dpr = np.linspace(0, drop_path_rate, depth, dtype='float32')
+
+ self.blocks = nn.LayerList([
+ Block(
+ dim=embed_dim,
+ num_heads=num_heads,
+ mlp_ratio=mlp_ratio,
+ qkv_bias=qkv_bias,
+ qk_scale=qk_scale,
+ drop=drop_rate,
+ attn_drop=attn_drop_rate,
+ drop_path=dpr[i],
+ norm_layer=norm_layer,
+ epsilon=epsilon) for i in range(depth)
+ ])
+
+ self.last_norm = eval(norm_layer)(embed_dim, epsilon=epsilon)
+ trunc_normal_(self.pos_embed)
+ self._init_weights()
+
+ def _init_weights(self):
+ pretrained = self.pretrained
+
+ if pretrained:
+
+ if 'http' in pretrained: #URL
+ path = paddle.utils.download.get_weights_path_from_url(
+ pretrained)
+ else: #model in local path
+ path = pretrained
+
+ load_state_dict = paddle.load(path)
+ self.set_state_dict(load_state_dict)
+ print("Load load_state_dict:", path)
+
+ def forward_features(self, x):
+
+ B = paddle.shape(x)[0]
+ x = self.patch_embed(x)
+ B, D, Hp, Wp = x.shape
+ x = x.flatten(2).transpose([0, 2, 1])
+ x = x + self.pos_embed[:, 1:] + self.pos_embed[:, :1]
+
+ for blk in self.blocks:
+ x = blk(x)
+
+ x = self.last_norm(x)
+ xp = paddle.reshape(
+ paddle.transpose(
+ x, perm=[0, 2, 1]), shape=[B, -1, Hp, Wp])
+
+ return xp
diff --git a/ppdet/modeling/heads/__init__.py b/ppdet/modeling/heads/__init__.py
index 07df124cd3a..44a9fa85d19 100644
--- a/ppdet/modeling/heads/__init__.py
+++ b/ppdet/modeling/heads/__init__.py
@@ -39,6 +39,7 @@
from . import ppyoloe_contrast_head
from . import centertrack_head
from . import sparse_roi_head
+from . import vitpose_head
from .bbox_head import *
from .mask_head import *
@@ -68,3 +69,4 @@
from .centertrack_head import *
from .sparse_roi_head import *
from .petr_head import *
+from .vitpose_head import *
\ No newline at end of file
diff --git a/ppdet/modeling/heads/vitpose_head.py b/ppdet/modeling/heads/vitpose_head.py
new file mode 100644
index 00000000000..43908ed57b1
--- /dev/null
+++ b/ppdet/modeling/heads/vitpose_head.py
@@ -0,0 +1,278 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+from ppdet.modeling.keypoint_utils import resize, flip_back
+from paddle.nn.initializer import TruncatedNormal, Constant, Normal
+from ppdet.modeling.layers import ConvTranspose2d, BatchNorm2d
+
+trunc_normal_ = TruncatedNormal(std=.02)
+normal_ = Normal(std=0.001)
+zeros_ = Constant(value=0.)
+ones_ = Constant(value=1.)
+
+__all__ = ['TopdownHeatmapSimpleHead']
+
+
+@register
+class TopdownHeatmapSimpleHead(nn.Layer):
+ def __init__(self,
+ in_channels=768,
+ out_channels=17,
+ num_deconv_layers=3,
+ num_deconv_filters=(256, 256, 256),
+ num_deconv_kernels=(4, 4, 4),
+ extra=None,
+ in_index=0,
+ input_transform=None,
+ align_corners=False,
+ upsample=0,
+ flip_pairs=None,
+ shift_heatmap=False,
+ target_type='GaussianHeatmap'):
+ super(TopdownHeatmapSimpleHead, self).__init__()
+
+ self.in_channels = in_channels
+ self.upsample = upsample
+ self.flip_pairs = flip_pairs
+ self.shift_heatmap = shift_heatmap
+ self.target_type = target_type
+
+ self._init_inputs(in_channels, in_index, input_transform)
+ self.in_index = in_index
+ self.align_corners = align_corners
+
+ if extra is not None and not isinstance(extra, dict):
+ raise TypeError('extra should be dict or None.')
+
+ if num_deconv_layers > 0:
+ self.deconv_layers = self._make_deconv_layer(
+ num_deconv_layers,
+ num_deconv_filters,
+ num_deconv_kernels, )
+ elif num_deconv_layers == 0:
+ self.deconv_layers = nn.Identity()
+ else:
+ raise ValueError(
+ f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+ identity_final_layer = False
+ if extra is not None and 'final_conv_kernel' in extra:
+ assert extra['final_conv_kernel'] in [0, 1, 3]
+ if extra['final_conv_kernel'] == 3:
+ padding = 1
+ elif extra['final_conv_kernel'] == 1:
+ padding = 0
+ else:
+ # 0 for Identity mapping.
+ identity_final_layer = True
+ kernel_size = extra['final_conv_kernel']
+ else:
+ kernel_size = 1
+ padding = 0
+
+ if identity_final_layer:
+ self.final_layer = nn.Identity()
+ else:
+ conv_channels = num_deconv_filters[
+ -1] if num_deconv_layers > 0 else self.in_channels
+
+ layers = []
+ if extra is not None:
+ num_conv_layers = extra.get('num_conv_layers', 0)
+ num_conv_kernels = extra.get('num_conv_kernels',
+ [1] * num_conv_layers)
+
+ for i in range(num_conv_layers):
+ layers.append(
+ nn.Conv2D(
+ in_channels=conv_channels,
+ out_channels=conv_channels,
+ kernel_size=num_conv_kernels[i],
+ stride=1,
+ padding=(num_conv_kernels[i] - 1) // 2))
+ layers.append(nn.BatchNorm2D(conv_channels))
+ layers.append(nn.ReLU())
+
+ layers.append(
+ nn.Conv2D(
+ in_channels=conv_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=(padding, padding)))
+
+ if len(layers) > 1:
+ self.final_layer = nn.Sequential(*layers)
+ else:
+ self.final_layer = layers[0]
+
+ self.init_weights()
+
+ @staticmethod
+ def _get_deconv_cfg(deconv_kernel):
+ """Get configurations for deconv layers."""
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+ else:
+ raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+ return deconv_kernel, padding, output_padding
+
+ def _init_inputs(self, in_channels, in_index, input_transform):
+ """Check and initialize input transforms.
+ """
+
+ if input_transform is not None:
+ assert input_transform in ['resize_concat', 'multiple_select']
+ self.input_transform = input_transform
+ self.in_index = in_index
+ if input_transform is not None:
+ assert isinstance(in_channels, (list, tuple))
+ assert isinstance(in_index, (list, tuple))
+ assert len(in_channels) == len(in_index)
+ if input_transform == 'resize_concat':
+ self.in_channels = sum(in_channels)
+ else:
+ self.in_channels = in_channels
+ else:
+ assert isinstance(in_channels, int)
+ assert isinstance(in_index, int)
+ self.in_channels = in_channels
+
+ def _transform_inputs(self, inputs):
+ """Transform inputs for decoder.
+ """
+ if not isinstance(inputs, list):
+ if not isinstance(inputs, list):
+
+ if self.upsample > 0:
+ inputs = resize(
+ input=F.relu(inputs),
+ scale_factor=self.upsample,
+ mode='bilinear',
+ align_corners=self.align_corners)
+ return inputs
+
+ if self.input_transform == 'resize_concat':
+ inputs = [inputs[i] for i in self.in_index]
+ upsampled_inputs = [
+ resize(
+ input=x,
+ size=inputs[0].shape[2:],
+ mode='bilinear',
+ align_corners=self.align_corners) for x in inputs
+ ]
+ inputs = paddle.concat(upsampled_inputs, dim=1)
+ elif self.input_transform == 'multiple_select':
+ inputs = [inputs[i] for i in self.in_index]
+ else:
+ inputs = inputs[self.in_index]
+
+ return inputs
+
+ def forward(self, x):
+ """Forward function."""
+ x = self._transform_inputs(x)
+ x = self.deconv_layers(x)
+ x = self.final_layer(x)
+
+ return x
+
+ def inference_model(self, x, flip_pairs=None):
+ """Inference function.
+
+ Returns:
+ output_heatmap (np.ndarray): Output heatmaps.
+
+ Args:
+ x (torch.Tensor[N,K,H,W]): Input features.
+ flip_pairs (None | list[tuple]):
+ Pairs of keypoints which are mirrored.
+ """
+ output = self.forward(x)
+
+ if flip_pairs is not None:
+ output_heatmap = flip_back(
+ output, self.flip_pairs, target_type=self.target_type)
+ # feature is not aligned, shift flipped heatmap for higher accuracy
+ if self.shift_heatmap:
+ output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
+ else:
+ output_heatmap = output
+ return output_heatmap
+
+ def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+ """Make deconv layers."""
+ if num_layers != len(num_filters):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_filters({len(num_filters)})'
+ raise ValueError(error_msg)
+ if num_layers != len(num_kernels):
+ error_msg = f'num_layers({num_layers}) ' \
+ f'!= length of num_kernels({len(num_kernels)})'
+ raise ValueError(error_msg)
+
+ layers = []
+ for i in range(num_layers):
+ kernel, padding, output_padding = \
+ self._get_deconv_cfg(num_kernels[i])
+
+ planes = num_filters[i]
+ layers.append(
+ ConvTranspose2d(
+ in_channels=self.in_channels,
+ out_channels=planes,
+ kernel_size=kernel,
+ stride=2,
+ padding=padding,
+ output_padding=output_padding,
+ bias=False))
+ layers.append(nn.BatchNorm2D(planes))
+ layers.append(nn.ReLU())
+ self.in_channels = planes
+
+ return nn.Sequential(*layers)
+
+ def init_weights(self):
+ """Initialize model weights."""
+ if not isinstance(self.deconv_layers, nn.Identity):
+
+ for m in self.deconv_layers:
+ if isinstance(m, nn.BatchNorm2D):
+ ones_(m.weight)
+ ones_(m.bias)
+ if not isinstance(self.final_layer, nn.Conv2D):
+
+ for m in self.final_layer:
+ if isinstance(m, nn.Conv2D):
+ normal_(m.weight)
+ zeros_(m.bias)
+ elif isinstance(m, nn.BatchNorm2D):
+ ones_(m.weight)
+ ones_(m.bias)
+ else:
+ normal_(self.final_layer.weight)
+ zeros_(self.final_layer.bias)
diff --git a/ppdet/modeling/keypoint_utils.py b/ppdet/modeling/keypoint_utils.py
index d5cbeb3ba68..377f1d75c94 100644
--- a/ppdet/modeling/keypoint_utils.py
+++ b/ppdet/modeling/keypoint_utils.py
@@ -17,6 +17,7 @@
import cv2
import numpy as np
+import paddle.nn.functional as F
def get_affine_mat_kernel(h, w, s, inv=False):
@@ -340,3 +341,63 @@ def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
keep = keep[:keep_cnt]
return keep
+
+
+def resize(input,
+ size=None,
+ scale_factor=None,
+ mode='nearest',
+ align_corners=None,
+ warning=True):
+ if warning:
+ if size is not None and align_corners:
+ input_h, input_w = tuple(int(x) for x in input.shape[2:])
+ output_h, output_w = tuple(int(x) for x in size)
+ if output_h > input_h or output_w > output_h:
+ if ((output_h > 1 and output_w > 1 and input_h > 1 and
+ input_w > 1) and (output_h - 1) % (input_h - 1) and
+ (output_w - 1) % (input_w - 1)):
+ warnings.warn(
+ f'When align_corners={align_corners}, '
+ 'the output would more aligned if '
+ f'input size {(input_h, input_w)} is `x+1` and '
+ f'out size {(output_h, output_w)} is `nx+1`')
+
+ return F.interpolate(input, size, scale_factor, mode, align_corners)
+
+
+def flip_back(output_flipped, flip_pairs, target_type='GaussianHeatmap'):
+ """Flip the flipped heatmaps back to the original form.
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+ - heatmap height: H
+ - heatmap width: W
+ Args:
+ output_flipped (np.ndarray[N, K, H, W]): The output heatmaps obtained
+ from the flipped images.
+ flip_pairs (list[tuple()): Pairs of keypoints which are mirrored
+ (for example, left ear -- right ear).
+ target_type (str): GaussianHeatmap or CombinedTarget
+ Returns:
+ np.ndarray: heatmaps that flipped back to the original image
+ """
+ assert len(output_flipped.shape) == 4, \
+ 'output_flipped should be [batch_size, num_keypoints, height, width]'
+ shape_ori = output_flipped.shape
+ channels = 1
+ if target_type.lower() == 'CombinedTarget'.lower():
+ channels = 3
+ output_flipped[:, 1::3, ...] = -output_flipped[:, 1::3, ...]
+ output_flipped = output_flipped.reshape((shape_ori[0], -1, channels,
+ shape_ori[2], shape_ori[3]))
+ output_flipped_back = output_flipped.clone()
+
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ output_flipped_back[:, left, ...] = output_flipped[:, right, ...]
+ output_flipped_back[:, right, ...] = output_flipped[:, left, ...]
+ output_flipped_back = output_flipped_back.reshape(shape_ori)
+ # Flip horizontally
+ output_flipped_back = output_flipped_back[..., ::-1]
+ return output_flipped_back
diff --git a/ppdet/optimizer/adamw.py b/ppdet/optimizer/adamw.py
index 6ecf676d632..12ab619a336 100644
--- a/ppdet/optimizer/adamw.py
+++ b/ppdet/optimizer/adamw.py
@@ -50,7 +50,7 @@ def layerwise_lr_decay(decay_rate, name_dict, n_layers, param):
layer = int(static_name[idx:].split('.')[1])
ratio = decay_rate**(n_layers - layer)
- elif 'cls_token' in static_name or 'patch_embed' in static_name:
+ elif 'cls_token' in static_name or 'patch_embed' in static_name or 'pos_embed' in static_name:
ratio = decay_rate**(n_layers + 1)
if IS_PADDLE_LATER_2_4:
From 67df6a1ea877e970e69e941c9317de697bf14ed1 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Thu, 9 Mar 2023 14:35:37 +0800
Subject: [PATCH 040/116] tood model fix assigner_bug (#7896)
---
ppdet/modeling/heads/tood_head.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ppdet/modeling/heads/tood_head.py b/ppdet/modeling/heads/tood_head.py
index 81b2edd7b72..f463ef2397b 100644
--- a/ppdet/modeling/heads/tood_head.py
+++ b/ppdet/modeling/heads/tood_head.py
@@ -293,7 +293,7 @@ def get_loss(self, head_outs, gt_meta):
pad_gt_mask = gt_meta['pad_gt_mask']
# label assignment
if gt_meta['epoch_id'] < self.static_assigner_epoch:
- assigned_labels, assigned_bboxes, assigned_scores, _ = self.static_assigner(
+ assigned_labels, assigned_bboxes, assigned_scores = self.static_assigner(
anchors,
num_anchors_list,
gt_labels,
@@ -302,7 +302,7 @@ def get_loss(self, head_outs, gt_meta):
bg_index=self.num_classes)
alpha_l = 0.25
else:
- assigned_labels, assigned_bboxes, assigned_scores, _ = self.assigner(
+ assigned_labels, assigned_bboxes, assigned_scores = self.assigner(
pred_scores.detach(),
pred_bboxes.detach() * stride_tensor,
bbox_center(anchors),
From 63f0721b2d34034a1be6a0104896e1a75c7bad94 Mon Sep 17 00:00:00 2001
From: shangliang Xu
Date: Thu, 9 Mar 2023 20:57:30 +0800
Subject: [PATCH 041/116] modify the deformable_attention_core_func code to fit
paddle-trt (#7900)
---
ppdet/modeling/transformers/utils.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/ppdet/modeling/transformers/utils.py b/ppdet/modeling/transformers/utils.py
index a40950d9ff5..b19233fdeec 100644
--- a/ppdet/modeling/transformers/utils.py
+++ b/ppdet/modeling/transformers/utils.py
@@ -74,8 +74,8 @@ def deformable_attention_core_func(value, value_spatial_shapes,
"""
Args:
value (Tensor): [bs, value_length, n_head, c]
- value_spatial_shapes (Tensor): [n_levels, 2]
- value_level_start_index (Tensor): [n_levels]
+ value_spatial_shapes (Tensor|List): [n_levels, 2]
+ value_level_start_index (Tensor|List): [n_levels]
sampling_locations (Tensor): [bs, query_length, n_head, n_levels, n_points, 2]
attention_weights (Tensor): [bs, query_length, n_head, n_levels, n_points]
@@ -85,8 +85,8 @@ def deformable_attention_core_func(value, value_spatial_shapes,
bs, _, n_head, c = value.shape
_, Len_q, _, n_levels, n_points, _ = sampling_locations.shape
- value_list = value.split(
- value_spatial_shapes.prod(1).split(n_levels), axis=1)
+ split_shape = [h * w for h, w in value_spatial_shapes]
+ value_list = value.split(split_shape, axis=1)
sampling_grids = 2 * sampling_locations - 1
sampling_value_list = []
for level, (h, w) in enumerate(value_spatial_shapes):
From de851c1317da4fbe521b4fe96e3f8a318d111c7c Mon Sep 17 00:00:00 2001
From: thinkthinking <61035602+thinkthinking@users.noreply.github.com>
Date: Fri, 10 Mar 2023 15:17:50 +0800
Subject: [PATCH 042/116] [Docs]update frontpage readme.cn (#7910)
---
README_cn.md | 110 ++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 88 insertions(+), 22 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index 15b0896cbb7..e61123e5cdd 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -14,6 +14,39 @@
+## 💌目录
+- [💌目录](#目录)
+- [🌈简介](#简介)
+- [📣最新进展](#最新进展)
+- [👫开源社区](#开源社区)
+- [✨主要特性](#主要特性)
+ - [🧩模块化设计](#模块化设计)
+ - [📱丰富的模型库](#丰富的模型库)
+ - [🎗️产业特色模型|产业工具](#️产业特色模型产业工具)
+ - [💡🏆产业级部署实践](#产业级部署实践)
+- [🍱安装](#安装)
+- [🔥教程](#教程)
+- [🔑FAQ](#faq)
+- [🧩模块组件](#模块组件)
+- [📱模型库](#模型库)
+- [⚖️模型性能对比](#️模型性能对比)
+ - [🖥️服务器端模型性能对比](#️服务器端模型性能对比)
+ - [⌚️移动端模型性能对比](#️移动端模型性能对比)
+- [🎗️产业特色模型|产业工具](#️产业特色模型产业工具-1)
+ - [💎PP-YOLOE 高精度目标检测模型](#pp-yoloe-高精度目标检测模型)
+ - [💎PP-YOLOE-R 高性能旋转框检测模型](#pp-yoloe-r-高性能旋转框检测模型)
+ - [💎PP-YOLOE-SOD 高精度小目标检测模型](#pp-yoloe-sod-高精度小目标检测模型)
+ - [💫PP-PicoDet 超轻量实时目标检测模型](#pp-picodet-超轻量实时目标检测模型)
+ - [📡PP-Tracking 实时多目标跟踪系统](#pp-tracking-实时多目标跟踪系统)
+ - [⛷️PP-TinyPose 人体骨骼关键点识别](#️pp-tinypose-人体骨骼关键点识别)
+ - [🏃🏻PP-Human 实时行人分析工具](#pp-human-实时行人分析工具)
+ - [🏎️PP-Vehicle 实时车辆分析工具](#️pp-vehicle-实时车辆分析工具)
+- [💡产业实践范例](#产业实践范例)
+- [🏆企业应用案例](#企业应用案例)
+- [📝许可证书](#许可证书)
+- [📌引用](#引用)
+
+
## 🌈简介
PaddleDetection是一个基于PaddlePaddle的目标检测端到端开发套件,在提供丰富的模型组件和测试基准的同时,注重端到端的产业落地应用,通过打造产业级特色模型|工具、建设产业应用范例等手段,帮助开发者实现数据准备、模型选型、模型训练、模型部署的全流程打通,快速进行落地应用。
@@ -42,29 +75,41 @@ PaddleDetection是一个基于PaddlePaddle的目标检测端到端开发套件
-## ✨主要特性
-
-#### 🧩模块化设计
-PaddleDetection将检测模型解耦成不同的模块组件,通过自定义模块组件组合,用户可以便捷高效地完成检测模型的搭建。`传送门`:[🧩模块组件](#模块组件)。
-
-#### 📱丰富的模型库
-PaddleDetection支持大量的最新主流的算法基准以及预训练模型,涵盖2D/3D目标检测、实例分割、人脸检测、关键点检测、多目标跟踪、半监督学习等方向。`传送门`:[📱模型库](#模型库)、[⚖️模型性能对比](#️模型性能对比)。
-
-#### 🎗️产业特色模型|产业工具
-PaddleDetection打造产业级特色模型以及分析工具:PP-YOLOE+、PP-PicoDet、PP-TinyPose、PP-HumanV2、PP-Vehicle等,针对通用、高频垂类应用场景提供深度优化解决方案以及高度集成的分析工具,降低开发者的试错、选择成本,针对业务场景快速应用落地。`传送门`:[🎗️产业特色模型|产业工具](#️产业特色模型产业工具-1)。
-
-#### 💡🏆产业级部署实践
-PaddleDetection整理工业、农业、林业、交通、医疗、金融、能源电力等AI应用范例,打通数据标注-模型训练-模型调优-预测部署全流程,持续降低目标检测技术产业落地门槛。`传送门`:[💡产业实践范例](#产业实践范例)、[🏆企业应用案例](#企业应用案例)。
-
-
-
-
-
-
-
## 📣最新进展
-PaddleDetection 2.6版本发布! [点击查看版本更新介绍](https://github.com/PaddlePaddle/PaddleDetection/releases/tag/v2.6.0)
+**🔥PaddleDetection v2.6版本更新解读**
+
+
+

+
+
+
+ - `v2.6版本版本更新解读文章传送门`:[《PaddleDetection v2.6发布:目标小?数据缺?标注累?泛化差?PP新员逐一应对!》](https://mp.weixin.qq.com/s/rPwprZeHEpmGOe5wxrmO5g)
+ - `v2.6版本重点更新体验传送门`:
+ - [PP-YOLOE+:高精度通用目标检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
+ - [PP-YOLOE-R:旋转框检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rotate/ppyoloe_r)
+ - [PP-YOLOE-SOD:小目标检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/smalldet)
+ - [PP-YOLOE-DOD:密集检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe/application)
+ - [PP-YOLOE+_t:超轻量通用目标检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
+ - [PP-YOLOE+少样本学习方案](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/few-shot)
+ - [PP-YOLOE+半监督学习方案](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/semi_det/baseline)
+ - [PP-YOLOE+模型蒸馏方案](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe/distill)
+ - [PP-Human:行人分析工具箱,推理提速、多路视频流支持](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/deploy/pipeline)
+ - [PP-Vehicle:车辆分析工具箱,新增逆行、压线分析、推理提速、多路视频流支持](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/deploy/pipeline)
+ - [半监督检测算法合集](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/semi_det)
+ - [少样本学习算法合集](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/configs/few-shot)
+ - [模型蒸馏算法合集](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe/distill)
+ - [YOLO新增模型YOLOv8、YOLOv6-3.0](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs)
+ - [目标检测算法新增DINO](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/dino)
+ - [目标检测算法新增YOLOF](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolof)
+ - [新增ViTDet系列检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/vitdet)
+ - [新增目标检测算法CenterTrack](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/mot/centertrack)
+ - [新增旋转框检测算法FCOSR](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rotate/fcosr)
+ - [新增实例分割算法QueryInst](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/queryinst)
+ - [新增3D关键点检测算法Metro3d](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/pose3d)
+ - [新增检测热力图可视化能力](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/docs/tutorials/GradCAM_cn.md)
+ - [Roadmap of PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/issues/7892)
+ - [飞桨黑客松第四期-PaddleDetection任务专区](https://github.com/PaddlePaddle/PaddleDetection/issues/7906)
## 👫开源社区
@@ -88,7 +133,8 @@ PaddleDetection 2.6版本发布! [点击查看版本更新介绍](https://github
- **🎈社区近期活动**
- **👀YOLO系列专题**
- - `文章传送门`:[YOLOv8来啦!YOLO内卷期模型怎么选?9+款AI硬件如何快速部署?深度解析](https://mp.weixin.qq.com/s/rPwprZeHEpmGOe5wxrmO5g)
+
+ - `文章传送门`:[YOLOv8来啦!YOLO内卷期模型怎么选?9+款AI硬件如何快速部署?深度解析](https://mp.weixin.qq.com/s/SLITj5k120d_fQc7jEO8Vw)
- `代码传送门`:[PaddleYOLO全系列](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/docs/feature_models/PaddleYOLO_MODEL.md)
@@ -126,6 +172,26 @@ PaddleDetection 2.6版本发布! [点击查看版本更新介绍](https://github
- **🏅️社区贡献**
- `活动链接传送门`:[Yes, PP-YOLOE! 基于PP-YOLOE的算法开发](https://github.com/PaddlePaddle/PaddleDetection/issues/7345)
+## ✨主要特性
+
+#### 🧩模块化设计
+PaddleDetection将检测模型解耦成不同的模块组件,通过自定义模块组件组合,用户可以便捷高效地完成检测模型的搭建。`传送门`:[🧩模块组件](#模块组件)。
+
+#### 📱丰富的模型库
+PaddleDetection支持大量的最新主流的算法基准以及预训练模型,涵盖2D/3D目标检测、实例分割、人脸检测、关键点检测、多目标跟踪、半监督学习等方向。`传送门`:[📱模型库](#模型库)、[⚖️模型性能对比](#️模型性能对比)。
+
+#### 🎗️产业特色模型|产业工具
+PaddleDetection打造产业级特色模型以及分析工具:PP-YOLOE+、PP-PicoDet、PP-TinyPose、PP-HumanV2、PP-Vehicle等,针对通用、高频垂类应用场景提供深度优化解决方案以及高度集成的分析工具,降低开发者的试错、选择成本,针对业务场景快速应用落地。`传送门`:[🎗️产业特色模型|产业工具](#️产业特色模型产业工具-1)。
+
+#### 💡🏆产业级部署实践
+PaddleDetection整理工业、农业、林业、交通、医疗、金融、能源电力等AI应用范例,打通数据标注-模型训练-模型调优-预测部署全流程,持续降低目标检测技术产业落地门槛。`传送门`:[💡产业实践范例](#产业实践范例)、[🏆企业应用案例](#企业应用案例)。
+
+
+
+
+
+
+
## 🍱安装
From e3f860d466e480868c3f27474d51eecb9ced4526 Mon Sep 17 00:00:00 2001
From: Zhao-Yian <77494834+Zhao-Yian@users.noreply.github.com>
Date: Fri, 10 Mar 2023 17:50:43 +0800
Subject: [PATCH 043/116] [doc] add README for group detr (#7881)
* [doc] add README for group detr
---
configs/group_detr/README.md | 49 ++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
create mode 100644 configs/group_detr/README.md
diff --git a/configs/group_detr/README.md b/configs/group_detr/README.md
new file mode 100644
index 00000000000..df78c48d3f2
--- /dev/null
+++ b/configs/group_detr/README.md
@@ -0,0 +1,49 @@
+# Group DETR: Fast DETR training with group-wise one-to-many assignment
+# Group DETR v2: Strong object detector with encoder-decoder pretraining
+
+## Introduction
+
+[Group DETR](https://arxiv.org/pdf/2207.13085.pdf) is an object detection model based on DETR. We reproduced the model of the paper.
+
+[Group DETR v2](https://arxiv.org/pdf/2211.03594.pdf) is a strong object detection model based on DINO and Group DETR. We reproduced the model of the paper.
+
+## Model Zoo
+
+| Backbone | Model | Epochs | Resolution |Box AP | Config | Download |
+|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:--------------------------------------------------------------------------------:|:------:|
+| R-50 | dino_r50_4scale | 12 | (800, 1333) | 49.6 | [config](./group_dino_r50_4scale_1x_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/group_dino_r50_4scale_1x_coco.pdparams) |
+| Vit-huge | dino_vit_huge_4scale | 12 | (1184, 2000) | 63.3 | [config](./group_dino_vit_huge_4scale_1x_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/group_dino_vit_huge_4scale_1x_coco.pdparams) |
+
+**Notes:**
+
+- Group DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- Group DETRv2 requires a ViT-Huge encoder pre-trained and fine-tuned on ImageNet-1K in a self-supervised manner, a detector pre-trained on Object365, and finally it is fine-tuned on trainCOCO. Group DETRv2 is also evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- Group DETR and Group DETRv2 are both use 4GPU to train.
+
+GPU multi-card training
+```bash
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/group_detr/group_dino_r50_4scale_1x_coco.yml --fleet --eval
+```
+
+```bash
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/group_detr/group_dino_vit_huge_4scale_1x_coco.yml --fleet --eval
+```
+
+## Citations
+```
+@article{chen2022group,
+ title={Group DETR: Fast DETR training with group-wise one-to-many assignment},
+ author={Chen, Qiang and Chen, Xiaokang and Wang, Jian and Feng, Haocheng and Han, Junyu and Ding, Errui and Zeng, Gang and Wang, Jingdong},
+ journal={arXiv preprint arXiv:2207.13085},
+ volume={1},
+ number={2},
+ year={2022}
+}
+
+@article{chen2022group,
+ title={Group DETR v2: Strong object detector with encoder-decoder pretraining},
+ author={Chen, Qiang and Wang, Jian and Han, Chuchu and Zhang, Shan and Li, Zexian and Chen, Xiaokang and Chen, Jiahui and Wang, Xiaodi and Han, Shuming and Zhang, Gang and others},
+ journal={arXiv preprint arXiv:2211.03594},
+ year={2022}
+}
+```
From 172bca70a05e8b72dcbae0d2c58a5c94b7c39dbd Mon Sep 17 00:00:00 2001
From: thinkthinking <61035602+thinkthinking@users.noreply.github.com>
Date: Fri, 10 Mar 2023 23:58:02 +0800
Subject: [PATCH 044/116] Update README_cn.md
---
README_cn.md | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index e61123e5cdd..a7c587f8ad0 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -84,7 +84,8 @@ PaddleDetection是一个基于PaddlePaddle的目标检测端到端开发套件
- - `v2.6版本版本更新解读文章传送门`:[《PaddleDetection v2.6发布:目标小?数据缺?标注累?泛化差?PP新员逐一应对!》](https://mp.weixin.qq.com/s/rPwprZeHEpmGOe5wxrmO5g)
+ - `v2.6版本版本更新解读文章传送门`:
+ - [《PaddleDetection v2.6发布:目标小?数据缺?标注累?泛化差?PP新员逐一应对!》](https://mp.weixin.qq.com/s/SLITj5k120d_fQc7jEO8Vw)
- `v2.6版本重点更新体验传送门`:
- [PP-YOLOE+:高精度通用目标检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
- [PP-YOLOE-R:旋转框检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rotate/ppyoloe_r)
@@ -134,7 +135,7 @@ PaddleDetection是一个基于PaddlePaddle的目标检测端到端开发套件
- **👀YOLO系列专题**
- - `文章传送门`:[YOLOv8来啦!YOLO内卷期模型怎么选?9+款AI硬件如何快速部署?深度解析](https://mp.weixin.qq.com/s/SLITj5k120d_fQc7jEO8Vw)
+ - `文章传送门`:[YOLOv8来啦!YOLO内卷期模型怎么选?9+款AI硬件如何快速部署?深度解析](https://mp.weixin.qq.com/s/rPwprZeHEpmGOe5wxrmO5g)
- `代码传送门`:[PaddleYOLO全系列](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/docs/feature_models/PaddleYOLO_MODEL.md)
From d9553e34e17585b577ed4560cb19958333292c87 Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Mon, 13 Mar 2023 12:25:45 +0800
Subject: [PATCH 045/116] [Doc] update ppyoloe+ tiny models doc (#7920)
---
configs/ppyoloe/README.md | 4 ++--
configs/ppyoloe/README_cn.md | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/configs/ppyoloe/README.md b/configs/ppyoloe/README.md
index f8f79344e35..12a2b2d9f4c 100644
--- a/configs/ppyoloe/README.md
+++ b/configs/ppyoloe/README.md
@@ -52,10 +52,10 @@ PP-YOLOE is composed of following methods:
| Model | Epoch | GPU number | images/GPU | backbone | input shape | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | download | config |
|:--------:|:-----:|:----------:|:----------:|:----------:|:-----------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------------:| :------: |:--------:|
-| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
+| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.9 | 56.6 | 4.85 | 19.15 | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
| PP-YOLOE+_t-aux(640)-relu | 300 | 8 | 8 | cspresnet-t | 640 | 36.4 | 53.0 | 3.60 | 12.17 | 476.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml) |
| PP-YOLOE+_t-aux(320) | 300 | 8 | 8 | cspresnet-t | 320 | 33.3 | 48.5 | 4.85 | 4.80 | 729.9 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_320_300e_coco.yml) |
-| PP-YOLOE+_t-aux(320)-relu | 300 | 8 | 8 | cspresnet-t | 320 | 29.5 | 43.7 | 3.60 | 3.04 | 984.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml) |
+| PP-YOLOE+_t-aux(320)-relu | 300 | 8 | 8 | cspresnet-t | 320 | 30.1 | 44.7 | 3.60 | 3.04 | 984.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml) |
### Comprehensive Metrics
diff --git a/configs/ppyoloe/README_cn.md b/configs/ppyoloe/README_cn.md
index fcd0624a0ae..99467daedd7 100644
--- a/configs/ppyoloe/README_cn.md
+++ b/configs/ppyoloe/README_cn.md
@@ -52,10 +52,10 @@ PP-YOLOE由以下方法组成
| 模型 | Epoch | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval
0.5:0.95 | Box APtest
0.5:0.95 | Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | 模型下载 | 配置文件 |
|:----------:|:-----:|:--------:|:-----------:|:---------:|:--------:|:--------------------------:|:---------------------------:|:---------:|:--------:|:---------------------:| :------: |:--------:|
-| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.7 | 56.4 | 4.85 | 19.15 | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
+| PP-YOLOE+_t-aux(640) | 300 | 8 | 8 | cspresnet-t | 640 | 39.9 | 56.6 | 4.85 | 19.15 | 344.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_300e_coco.yml) |
| PP-YOLOE+_t-aux(640)-relu | 300 | 8 | 8 | cspresnet-t | 640 | 36.4 | 53.0 | 3.60 | 12.17 | 476.2 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_300e_coco.yml) |
| PP-YOLOE+_t-aux(320) | 300 | 8 | 8 | cspresnet-t | 320 | 33.3 | 48.5 | 4.85 | 4.80 | 729.9 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_320_300e_coco.yml) |
-| PP-YOLOE+_t-aux(320)-relu | 300 | 8 | 8 | cspresnet-t | 320 | 29.5 | 43.7 | 3.60 | 3.04 | 984.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml) |
+| PP-YOLOE+_t-aux(320)-relu | 300 | 8 | 8 | cspresnet-t | 320 | 30.1 | 44.7 | 3.60 | 3.04 | 984.8 | [model](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.pdparams) | [config](./ppyoloe_plus_crn_t_auxhead_relu_320_300e_coco.yml) |
### 综合指标
From 8d0e52a1c34a9c9c39f489ab77fa58c68b6a126d Mon Sep 17 00:00:00 2001
From: shangliang Xu
Date: Mon, 13 Mar 2023 12:57:04 +0800
Subject: [PATCH 046/116] fix bbox decode in detrpostprocess (#7916)
---
ppdet/modeling/post_process.py | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py
index af222444ee6..65e8d0b8bcf 100644
--- a/ppdet/modeling/post_process.py
+++ b/ppdet/modeling/post_process.py
@@ -455,8 +455,11 @@ def __init__(self,
use_focal_loss=False,
with_mask=False,
mask_threshold=0.5,
- use_avg_mask_score=False):
+ use_avg_mask_score=False,
+ bbox_decode_type='origin'):
super(DETRPostProcess, self).__init__()
+ assert bbox_decode_type in ['origin', 'pad']
+
self.num_classes = num_classes
self.num_top_queries = num_top_queries
self.dual_queries = dual_queries
@@ -465,6 +468,7 @@ def __init__(self,
self.with_mask = with_mask
self.mask_threshold = mask_threshold
self.use_avg_mask_score = use_avg_mask_score
+ self.bbox_decode_type = bbox_decode_type
def _mask_postprocess(self, mask_pred, score_pred, index):
mask_score = F.sigmoid(paddle.gather_nd(mask_pred, index))
@@ -478,7 +482,7 @@ def _mask_postprocess(self, mask_pred, score_pred, index):
def __call__(self, head_out, im_shape, scale_factor, pad_shape):
"""
- Decode the bbox.
+ Decode the bbox and mask.
Args:
head_out (tuple): bbox_pred, cls_logit and masks of bbox_head output.
@@ -502,9 +506,15 @@ def __call__(self, head_out, im_shape, scale_factor, pad_shape):
# calculate the original shape of the image
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
img_h, img_w = paddle.split(origin_shape, 2, axis=-1)
- # calculate the shape of the image with padding
- out_shape = pad_shape / im_shape * origin_shape
- out_shape = out_shape.flip(1).tile([1, 2]).unsqueeze(1)
+ if self.bbox_decode_type == 'pad':
+ # calculate the shape of the image with padding
+ out_shape = pad_shape / im_shape * origin_shape
+ out_shape = out_shape.flip(1).tile([1, 2]).unsqueeze(1)
+ elif self.bbox_decode_type == 'origin':
+ out_shape = origin_shape.flip(1).tile([1, 2]).unsqueeze(1)
+ else:
+ raise Exception(
+ f'Wrong `bbox_decode_type`: {self.bbox_decode_type}.')
bbox_pred *= out_shape
scores = F.sigmoid(logits) if self.use_focal_loss else F.softmax(
From 5dc90ee87e58eb0ac9dc613c2a0fe0a1982530b2 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Mon, 13 Mar 2023 15:48:25 +0800
Subject: [PATCH 047/116] fit no tracker info error (#7926)
---
deploy/pipeline/pipeline.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/deploy/pipeline/pipeline.py b/deploy/pipeline/pipeline.py
index 3407f479e82..5a578daa295 100644
--- a/deploy/pipeline/pipeline.py
+++ b/deploy/pipeline/pipeline.py
@@ -534,7 +534,8 @@ def run(self, input, thread_idx=0):
else:
self.predict_image(input)
self.pipe_timer.info()
- self.mot_predictor.det_times.tracking_info(average=True)
+ if hasattr(self, 'mot_predictor'):
+ self.mot_predictor.det_times.tracking_info(average=True)
def predict_image(self, input):
# det
From 08a0d758857174a3ea02697c2c2f7eef99217b48 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Mon, 13 Mar 2023 20:14:11 +0800
Subject: [PATCH 048/116] fix video_vis args and web service args (#7931)
---
deploy/pipeline/pipeline.py | 6 +++---
deploy/serving/python/web_service.py | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/deploy/pipeline/pipeline.py b/deploy/pipeline/pipeline.py
index 5a578daa295..76aebe341ab 100644
--- a/deploy/pipeline/pipeline.py
+++ b/deploy/pipeline/pipeline.py
@@ -805,9 +805,9 @@ def predict_video(self, video_file, thread_idx=0):
self.pipe_timer.total_time.end()
if self.cfg['visual']:
_, _, fps = self.pipe_timer.get_total_time()
- im = self.visualize_video(frame_rgb, mot_res, frame_id,
- fps, entrance, records,
- center_traj) # visualize
+ im = self.visualize_video(
+ frame_rgb, mot_res, self.collector, frame_id, fps,
+ entrance, records, center_traj) # visualize
if len(self.pushurl) > 0:
pushstream.pipe.stdin.write(im.tobytes())
else:
diff --git a/deploy/serving/python/web_service.py b/deploy/serving/python/web_service.py
index 08be7d2c619..53791517a86 100644
--- a/deploy/serving/python/web_service.py
+++ b/deploy/serving/python/web_service.py
@@ -62,7 +62,6 @@ def parse_args(self, argv=None):
assert args.config is not None, \
"Please specify --config=configure_file_path."
args.service_config = self._parse_opt(args.opt, args.config)
- print("args config:", args.service_config)
args.model_config = PredictConfig(args.model_dir)
return args
@@ -254,6 +253,7 @@ def get_model_vars(model_dir, service_config):
GLOBAL_VAR['fetch_vars'] = fetch_vars
GLOBAL_VAR['preprocess_ops'] = FLAGS.model_config.preprocess_infos
GLOBAL_VAR['model_config'] = FLAGS.model_config
+ print(FLAGS)
# define the service
uci_service = DetectorService(name="ppdet")
uci_service.prepare_pipeline_config(yml_dict=FLAGS.service_config)
From a0b35c74761b07ab06441c7868487673cbe371da Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Tue, 14 Mar 2023 01:29:56 +0800
Subject: [PATCH 049/116] add focalnet ppyoloe+ model (#7923)
---
.../ppyoloe_convnext_tiny_36e_coco.yml | 2 +-
configs/focalnet/README.md | 19 ++++++
.../ppyoloe_plus_focalnet_tiny_36e_coco.yml | 61 +++++++++++++++++++
configs/swin/README.md | 2 +-
.../swin/ppyoloe_plus_swin_tiny_36e_coco.yml | 2 +-
5 files changed, 83 insertions(+), 3 deletions(-)
create mode 100644 configs/focalnet/README.md
create mode 100644 configs/focalnet/ppyoloe_plus_focalnet_tiny_36e_coco.yml
diff --git a/configs/convnext/ppyoloe_convnext_tiny_36e_coco.yml b/configs/convnext/ppyoloe_convnext_tiny_36e_coco.yml
index 360a368ec08..c4f27865520 100644
--- a/configs/convnext/ppyoloe_convnext_tiny_36e_coco.yml
+++ b/configs/convnext/ppyoloe_convnext_tiny_36e_coco.yml
@@ -29,7 +29,7 @@ ConvNeXt:
PPYOLOEHead:
static_assigner_epoch: 12
nms:
- nms_top_k: 10000
+ nms_top_k: 1000
keep_top_k: 300
score_threshold: 0.01
nms_threshold: 0.7
diff --git a/configs/focalnet/README.md b/configs/focalnet/README.md
new file mode 100644
index 00000000000..be7c0fd4224
--- /dev/null
+++ b/configs/focalnet/README.md
@@ -0,0 +1,19 @@
+# FocalNet (Focal Modulation Networks)
+
+## 模型库
+### FocalNet on COCO
+
+| 网络网络 | 输入尺寸| 图片数/GPU | 学习率策略 | mAPval
0.5:0.95 | mAPval
0.5 | 下载链接 | 配置文件 |
+| :--------- | :---- | :-------: | :------: | :---------------------: | :----------------: | :-------: |:------: |
+| PP-YOLOE+ FocalNet-tiny | 640 | 8 | 36e | - | 46.6 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_focalnet_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_plus_focalnet_tiny_36e_coco.yml) |
+
+
+## Citations
+```
+@misc{yang2022focal,
+ title={Focal Modulation Networks},
+ author={Jianwei Yang and Chunyuan Li and Xiyang Dai and Jianfeng Gao},
+ journal={Advances in Neural Information Processing Systems (NeurIPS)},
+ year={2022}
+}
+```
diff --git a/configs/focalnet/ppyoloe_plus_focalnet_tiny_36e_coco.yml b/configs/focalnet/ppyoloe_plus_focalnet_tiny_36e_coco.yml
new file mode 100644
index 00000000000..4369d74b466
--- /dev/null
+++ b/configs/focalnet/ppyoloe_plus_focalnet_tiny_36e_coco.yml
@@ -0,0 +1,61 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_crn.yml',
+ '../ppyoloe/_base_/ppyoloe_plus_reader.yml',
+]
+depth_mult: 0.33 # s version
+width_mult: 0.50
+
+log_iter: 100
+snapshot_epoch: 4
+weights: output/ppyoloe_plus_focalnet_tiny_36e_coco/model_final
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_tiny_lrf_pretrained.pdparams
+
+
+architecture: PPYOLOE
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+PPYOLOE:
+ backbone: FocalNet
+ neck: CustomCSPPAN
+ yolo_head: PPYOLOEHead
+ post_process: ~
+
+FocalNet:
+ arch: 'focalnet_T_224_1k_lrf'
+ out_indices: [1, 2, 3]
+
+PPYOLOEHead:
+ static_assigner_epoch: 12
+ nms:
+ nms_top_k: 1000
+ keep_top_k: 300
+ score_threshold: 0.01
+ nms_threshold: 0.7
+
+
+TrainReader:
+ batch_size: 8
+
+
+epoch: 36
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [36]
+ - !LinearWarmup
+ start_factor: 0.1
+ steps: 1000
+
+OptimizerBuilder:
+ regularizer: false
+ optimizer:
+ type: AdamW
+ weight_decay: 0.0005
diff --git a/configs/swin/README.md b/configs/swin/README.md
index 617ee67d3ff..6cef97b0fb0 100644
--- a/configs/swin/README.md
+++ b/configs/swin/README.md
@@ -5,7 +5,7 @@
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| swin_T_224 | Faster R-CNN | 2 | 36e | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
-| swin_T_224 | PP-YOLOE+ | 8 | 36e | ---- | 43.6 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_swin_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_plus_swin_tiny_36e_coco.yml) |
+| swin_T_224 | PP-YOLOE+ | 8 | 36e | ---- | 44.7 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_swin_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_plus_swin_tiny_36e_coco.yml) |
## Citations
diff --git a/configs/swin/ppyoloe_plus_swin_tiny_36e_coco.yml b/configs/swin/ppyoloe_plus_swin_tiny_36e_coco.yml
index a5403d86e84..95d265dc390 100644
--- a/configs/swin/ppyoloe_plus_swin_tiny_36e_coco.yml
+++ b/configs/swin/ppyoloe_plus_swin_tiny_36e_coco.yml
@@ -36,7 +36,7 @@ SwinTransformer:
PPYOLOEHead:
static_assigner_epoch: 12
nms:
- nms_top_k: 10000
+ nms_top_k: 1000
keep_top_k: 300
score_threshold: 0.01
nms_threshold: 0.7
From fdbfbec6417d937ac7bc1557db37ff749922a752 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Fri, 17 Mar 2023 18:55:48 +0800
Subject: [PATCH 050/116] [TIPC] add dino_r50_4scale_1x_coco (#7941)
---
..._r50_4scale_1x_coco_train_infer_python.txt | 60 +++++++++++++++++++
1 file changed, 60 insertions(+)
create mode 100644 test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
diff --git a/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt b/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
new file mode 100644
index 00000000000..6d77fd46e27
--- /dev/null
+++ b/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
@@ -0,0 +1,60 @@
+===========================train_params===========================
+model_name:dino_r50_4scale_1x_coco
+python:python3.7
+gpu_list:0|0,1
+use_gpu:True
+auto_cast:null
+epoch:lite_train_lite_infer=1|lite_train_whole_infer=1|whole_train_whole_infer=300
+save_dir:null
+TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_train_whole_infer=2
+pretrain_weights:https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+trained_model_name:model_final.pdparams
+train_infer_img_dir:./dataset/coco/test2017/
+filename:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml -o
+pact_train:tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --slim_config _template_pact -o
+fpgm_train:tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --slim_config _template_fpgm -o
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c configs/dino/dino_r50_4scale_1x_coco.yml -o
+null:null
+##
+===========================infer_params===========================
+--output_dir:./output_inference
+weights:https://paddledet.bj.bcebos.com/models/dino_r50_4scale_1x_coco.pdparams
+norm_export:tools/export_model.py -c configs/dino/dino_r50_4scale_1x_coco.yml -o
+pact_export:tools/export_model.py -c configs/dino/dino_r50_4scale_1x_coco.yml --slim_config _template_pact -o
+fpgm_export:tools/export_model.py -c configs/dino/dino_r50_4scale_1x_coco.yml --slim_config _template_fpgm -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/dino/dino_r50_4scale_1x_coco.yml --slim_config configs/slim/post_quant/yolov3_darknet53_ptq.yml -o
+##
+infer_mode:norm|kl_quant
+infer_quant:False|True
+inference:./deploy/python/infer.py
+--device:gpu|cpu
+--enable_mkldnn:False
+--cpu_threads:4
+--batch_size:1|2
+--use_tensorrt:null
+--run_mode:paddle
+--model_dir:
+--image_dir:./dataset/coco/test2017/
+--save_log_path:null
+--run_benchmark:False
+--trt_max_shape:1600
+===========================train_benchmark_params==========================
+batch_size:4
+fp_items:fp32|fp16
+epoch:1
+repeat:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:null
+===========================infer_benchmark_params===========================
+numpy_infer_input:3x640x640_2.npy
\ No newline at end of file
From 2f343f5aa332b75a15c38f024cf90ac83c413b0b Mon Sep 17 00:00:00 2001
From: shangliang Xu
Date: Tue, 21 Mar 2023 15:04:02 +0800
Subject: [PATCH 051/116] fix ema_filter_no_grad (#7974)
---
ppdet/optimizer/ema.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/ppdet/optimizer/ema.py b/ppdet/optimizer/ema.py
index 9cd9dca6379..70d006b8fe3 100644
--- a/ppdet/optimizer/ema.py
+++ b/ppdet/optimizer/ema.py
@@ -60,6 +60,12 @@ def __init__(self,
self.cycle_epoch = cycle_epoch
self.ema_black_list = self._match_ema_black_list(
model.state_dict().keys(), ema_black_list)
+ bn_states_names = get_bn_running_state_names(model)
+ if ema_filter_no_grad:
+ for n, p in model.named_parameters():
+ if p.stop_gradient and n not in bn_states_names:
+ self.ema_black_list.add(n)
+
self.state_dict = dict()
for k, v in model.state_dict().items():
if k in self.ema_black_list:
@@ -67,12 +73,6 @@ def __init__(self,
else:
self.state_dict[k] = paddle.zeros_like(v)
- bn_states_names = get_bn_running_state_names(model)
- if ema_filter_no_grad:
- for n, p in model.named_parameters():
- if p.stop_gradient == True and n not in bn_states_names:
- self.ema_black_list.append(n)
-
self._model_state = {
k: weakref.ref(p)
for k, p in model.state_dict().items()
From 4565c59302d27e9ad05c4374ff31180b81ee52a2 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Tue, 21 Mar 2023 16:03:42 +0800
Subject: [PATCH 052/116] [TIPC] fix deformable_detr repeat (#7975)
---
.../deformable_detr_r50_1x_coco_train_infer_python.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/test_tipc/configs/deformable_detr/deformable_detr_r50_1x_coco_train_infer_python.txt b/test_tipc/configs/deformable_detr/deformable_detr_r50_1x_coco_train_infer_python.txt
index f4b23d2d7dd..a2b9ae5c4a0 100644
--- a/test_tipc/configs/deformable_detr/deformable_detr_r50_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/deformable_detr/deformable_detr_r50_1x_coco_train_infer_python.txt
@@ -53,6 +53,6 @@ inference:./deploy/python/infer.py
batch_size:2
fp_items:fp32|fp16
epoch:1
-repeat:1
+repeat:2
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
\ No newline at end of file
From abcaa20112a13d3647e538abce55013b4273e91a Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Thu, 23 Mar 2023 10:17:22 +0800
Subject: [PATCH 053/116] Tipc (#7981)
* [TIPC] fix deformable_detr repeat
* [TIPC] fix dino batchsize
---
.../configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt b/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
index 6d77fd46e27..87117a4cc1f 100644
--- a/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
@@ -50,7 +50,7 @@ inference:./deploy/python/infer.py
--run_benchmark:False
--trt_max_shape:1600
===========================train_benchmark_params==========================
-batch_size:4
+batch_size:2
fp_items:fp32|fp16
epoch:1
repeat:2
From 2b2db894ec5351c858950b92545420c609ac164f Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Fri, 24 Mar 2023 19:36:08 +0800
Subject: [PATCH 054/116] Ppyolo sod qat (#7990)
* add ppyoloe_sod_qat
* ppyoloe sod qat
* add ppyoloe_sod qat demo
* fix ppyoloe sod qat yml
* add ppyoloe-sod ppyoloe_tiny qat
* add ppyoloe_sod_qat readme
---
deploy/auto_compression/README.md | 5 +-
..._crn_l_80e_sliced_visdrone_640_025_qat.yml | 34 ++
...n_l_80e_sliced_visdrone_640_025_reader.yml | 25 +
...yoloe_plus_crn_t_auxhead_300e_coco_qat.yml | 32 ++
.../ppyoloe_plus_sod_crn_l_qat_dis.yaml | 33 ++
.../auto_compression/paddle_inference_eval.py | 499 ++++++++++++++++++
6 files changed, 627 insertions(+), 1 deletion(-)
create mode 100644 deploy/auto_compression/configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_qat.yml
create mode 100644 deploy/auto_compression/configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_reader.yml
create mode 100644 deploy/auto_compression/configs/ppyoloe_plus_crn_t_auxhead_300e_coco_qat.yml
create mode 100644 deploy/auto_compression/configs/ppyoloe_plus_sod_crn_l_qat_dis.yaml
create mode 100644 deploy/auto_compression/paddle_inference_eval.py
diff --git a/deploy/auto_compression/README.md b/deploy/auto_compression/README.md
index 8611e0689bd..20a0cbacdc3 100644
--- a/deploy/auto_compression/README.md
+++ b/deploy/auto_compression/README.md
@@ -44,9 +44,12 @@
| 模型 | Base mAP | 离线量化mAP | ACT量化mAP | TRT-FP32 | TRT-FP16 | TRT-INT8 | 配置文件 | 量化模型 |
| :-------- |:-------- |:--------: | :---------------------: | :----------------: | :----------------: | :---------------: | :----------------------: | :---------------------: |
| PP-YOLOE-l | 50.9 | - | 50.6 | 11.2ms | 7.7ms | **6.7ms** | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/deploy/auto_compression/configs/ppyoloe_l_qat_dis.yaml) | [Quant Model](https://bj.bcebos.com/v1/paddle-slim-models/act/ppyoloe_crn_l_300e_coco_quant.tar) |
+| PP-YOLOE-SOD | 38.5 | - | 37.6 | - | - | - | [config](./configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_qat.yml) | [Quant Model](https://bj.bcebos.com/v1/paddle-slim-models/act/ppyoloe_sod_visdrone.tar) |
-- mAP的指标均在COCO val2017数据集中评测得到,IoU=0.5:0.95。
+git
+- PP-YOLOE-l mAP的指标在COCO val2017数据集中评测得到,IoU=0.5:0.95。
- PP-YOLOE-l模型在Tesla V100的GPU环境下测试,并且开启TensorRT,batch_size=1,包含NMS,测试脚本是[benchmark demo](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/deploy/python)。
+- PP-YOLOE-SOD 的指标在VisDrone-DET数据集切图后的COCO格式[数据集](https://bj.bcebos.com/v1/paddledet/data/smalldet/visdrone_sliced.zip)中评测得到,IoU=0.5:0.95。定义文件[ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml](../../configs/smalldet/ppyoloe_crn_l_80e_sliced_visdrone_640_025.yml)
### PP-PicoDet
diff --git a/deploy/auto_compression/configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_qat.yml b/deploy/auto_compression/configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_qat.yml
new file mode 100644
index 00000000000..84132455cad
--- /dev/null
+++ b/deploy/auto_compression/configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_qat.yml
@@ -0,0 +1,34 @@
+
+Global:
+ reader_config: configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_reader.yml
+ input_list: ['image', 'scale_factor']
+ arch: YOLO
+ include_nms: True
+ Evaluation: True
+ model_dir: ../../output_inference/ppyoloe_crn_l_80e_sliced_visdrone_640_025
+ model_filename: model.pdmodel
+ params_filename: model.pdiparams
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+QuantAware:
+ onnx_format: True
+ use_pact: False
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+
+TrainConfig:
+ train_iter: 8000
+ eval_iter: 500
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 6000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 4.0e-05
diff --git a/deploy/auto_compression/configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_reader.yml b/deploy/auto_compression/configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_reader.yml
new file mode 100644
index 00000000000..0869a4a10ac
--- /dev/null
+++ b/deploy/auto_compression/configs/ppyoloe_crn_l_80e_sliced_visdrone_640_025_reader.yml
@@ -0,0 +1,25 @@
+metric: COCO
+num_classes: 10
+
+# Datset configuration
+TrainDataset:
+ !COCODataSet
+ image_dir: train_images_640_025
+ anno_path: train_640_025.json
+ dataset_dir: dataset/visdrone_sliced
+
+EvalDataset:
+ !COCODataSet
+ image_dir: val_images_640_025
+ anno_path: val_640_025.json
+ dataset_dir: dataset/visdrone_sliced
+worker_num: 0
+
+# preprocess reader in test
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+ #- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+ - Permute: {}
+ batch_size: 16
diff --git a/deploy/auto_compression/configs/ppyoloe_plus_crn_t_auxhead_300e_coco_qat.yml b/deploy/auto_compression/configs/ppyoloe_plus_crn_t_auxhead_300e_coco_qat.yml
new file mode 100644
index 00000000000..7f8c48ea482
--- /dev/null
+++ b/deploy/auto_compression/configs/ppyoloe_plus_crn_t_auxhead_300e_coco_qat.yml
@@ -0,0 +1,32 @@
+
+Global:
+ reader_config: configs/ppyoloe_plus_reader.yml
+ include_nms: True
+ Evaluation: True
+ model_dir: ../../output_inference/ppyoloe_plus_crn_t_auxhead_300e_coco/
+ model_filename: model.pdmodel
+ params_filename: model.pdiparams
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+QuantAware:
+ onnx_format: True
+ use_pact: False
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+
+TrainConfig:
+ train_iter: 8000
+ eval_iter: 1000
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 6000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 4.0e-05
diff --git a/deploy/auto_compression/configs/ppyoloe_plus_sod_crn_l_qat_dis.yaml b/deploy/auto_compression/configs/ppyoloe_plus_sod_crn_l_qat_dis.yaml
new file mode 100644
index 00000000000..793afa1a0eb
--- /dev/null
+++ b/deploy/auto_compression/configs/ppyoloe_plus_sod_crn_l_qat_dis.yaml
@@ -0,0 +1,33 @@
+
+Global:
+ reader_config: configs/ppyoloe_plus_reader.yml
+ include_nms: True
+ Evaluation: True
+ model_dir: ../../output_inference/ppyoloe_plus_sod_crn_l_80e_coco
+ model_filename: model.pdmodel
+ params_filename: model.pdiparams
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+QuantAware:
+ onnx_format: True
+ use_pact: true
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+
+TrainConfig:
+ train_iter: 1
+ eval_iter: 1
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 6000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 4.0e-05
+
diff --git a/deploy/auto_compression/paddle_inference_eval.py b/deploy/auto_compression/paddle_inference_eval.py
new file mode 100644
index 00000000000..053ee35e752
--- /dev/null
+++ b/deploy/auto_compression/paddle_inference_eval.py
@@ -0,0 +1,499 @@
+#opyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+import time
+import sys
+import cv2
+import numpy as np
+
+import paddle
+from paddle.inference import Config
+from paddle.inference import create_predictor
+from ppdet.core.workspace import load_config, create
+from ppdet.metrics import COCOMetric
+
+from post_process import PPYOLOEPostProcess
+
+
+def argsparser():
+ """
+ argsparser func
+ """
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_path", type=str, help="inference model filepath")
+ parser.add_argument(
+ "--image_file",
+ type=str,
+ default=None,
+ help="image path, if set image_file, it will not eval coco.")
+ parser.add_argument(
+ "--reader_config",
+ type=str,
+ default=None,
+ help="path of datset and reader config.")
+ parser.add_argument(
+ "--benchmark",
+ type=bool,
+ default=False,
+ help="Whether run benchmark or not.")
+ parser.add_argument(
+ "--use_trt",
+ type=bool,
+ default=False,
+ help="Whether use TensorRT or not.")
+ parser.add_argument(
+ "--precision",
+ type=str,
+ default="paddle",
+ help="mode of running(fp32/fp16/int8)")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default="GPU",
+ help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU",
+ )
+ parser.add_argument(
+ "--use_dynamic_shape",
+ type=bool,
+ default=True,
+ help="Whether use dynamic shape or not.")
+ parser.add_argument(
+ "--use_mkldnn",
+ type=bool,
+ default=False,
+ help="Whether use mkldnn or not.")
+ parser.add_argument(
+ "--cpu_threads", type=int, default=10, help="Num of cpu threads.")
+ parser.add_argument("--img_shape", type=int, default=640, help="input_size")
+ parser.add_argument(
+ '--include_nms',
+ type=bool,
+ default=True,
+ help="Whether include nms or not.")
+
+ return parser
+
+
+CLASS_LABEL = [
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+ 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
+ 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+ 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
+ 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
+ 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+ 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
+ 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+ 'hair drier', 'toothbrush'
+]
+
+
+def generate_scale(im, target_shape, keep_ratio=True):
+ """
+ Args:
+ im (np.ndarray): image (np.ndarray)
+ Returns:
+ im_scale_x: the resize ratio of X
+ im_scale_y: the resize ratio of Y
+ """
+ origin_shape = im.shape[:2]
+ if keep_ratio:
+ im_size_min = np.min(origin_shape)
+ im_size_max = np.max(origin_shape)
+ target_size_min = np.min(target_shape)
+ target_size_max = np.max(target_shape)
+ im_scale = float(target_size_min) / float(im_size_min)
+ if np.round(im_scale * im_size_max) > target_size_max:
+ im_scale = float(target_size_max) / float(im_size_max)
+ im_scale_x = im_scale
+ im_scale_y = im_scale
+ else:
+ resize_h, resize_w = target_shape
+ im_scale_y = resize_h / float(origin_shape[0])
+ im_scale_x = resize_w / float(origin_shape[1])
+ return im_scale_y, im_scale_x
+
+
+def image_preprocess(img_path, target_shape):
+ """
+ image_preprocess func
+ """
+ img = cv2.imread(img_path)
+ im_scale_y, im_scale_x = generate_scale(img, target_shape, keep_ratio=False)
+ img = cv2.resize(
+ img, (target_shape[0], target_shape[0]),
+ interpolation=cv2.INTER_LANCZOS4)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ img = np.transpose(img, [2, 0, 1]) / 255
+ img = np.expand_dims(img, 0)
+ img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
+ img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
+ img -= img_mean
+ img /= img_std
+ scale_factor = np.array([[im_scale_y, im_scale_x]])
+ return img.astype(np.float32), scale_factor.astype(np.float32)
+
+
+def get_color_map_list(num_classes):
+ """
+ get_color_map_list func
+ """
+ color_map = num_classes * [0, 0, 0]
+ for i in range(0, num_classes):
+ j = 0
+ lab = i
+ while lab:
+ color_map[i * 3] |= ((lab >> 0) & 1) << (7 - j)
+ color_map[i * 3 + 1] |= ((lab >> 1) & 1) << (7 - j)
+ color_map[i * 3 + 2] |= ((lab >> 2) & 1) << (7 - j)
+ j += 1
+ lab >>= 3
+ color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+ return color_map
+
+
+def draw_box(image_file, results, class_label, threshold=0.5):
+ """
+ draw_box func
+ """
+ srcimg = cv2.imread(image_file, 1)
+ for i in range(len(results)):
+ color_list = get_color_map_list(len(class_label))
+ clsid2color = {}
+ classid, conf = int(results[i, 0]), results[i, 1]
+ if conf < threshold:
+ continue
+ xmin, ymin, xmax, ymax = int(results[i, 2]), int(results[i, 3]), int(
+ results[i, 4]), int(results[i, 5])
+
+ if classid not in clsid2color:
+ clsid2color[classid] = color_list[classid]
+ color = tuple(clsid2color[classid])
+
+ cv2.rectangle(srcimg, (xmin, ymin), (xmax, ymax), color, thickness=2)
+ print(class_label[classid] + ": " + str(round(conf, 3)))
+ cv2.putText(
+ srcimg,
+ class_label[classid] + ":" + str(round(conf, 3)),
+ (xmin, ymin - 10),
+ cv2.FONT_HERSHEY_SIMPLEX,
+ 0.8,
+ (0, 255, 0),
+ thickness=2, )
+ return srcimg
+
+
+def load_predictor(
+ model_dir,
+ precision="fp32",
+ use_trt=False,
+ use_mkldnn=False,
+ batch_size=1,
+ device="CPU",
+ min_subgraph_size=3,
+ use_dynamic_shape=False,
+ trt_min_shape=1,
+ trt_max_shape=1280,
+ trt_opt_shape=640,
+ cpu_threads=1, ):
+ """set AnalysisConfig, generate AnalysisPredictor
+ Args:
+ model_dir (str): root path of __model__ and __params__
+ precision (str): mode of running(fp32/fp16/int8)
+ use_trt (bool): whether use TensorRT or not.
+ use_mkldnn (bool): whether use MKLDNN or not in CPU.
+ device (str): Choose the device you want to run, it can be: CPU/GPU, default is CPU
+ use_dynamic_shape (bool): use dynamic shape or not
+ trt_min_shape (int): min shape for dynamic shape in trt
+ trt_max_shape (int): max shape for dynamic shape in trt
+ trt_opt_shape (int): opt shape for dynamic shape in trt
+ Returns:
+ predictor (PaddlePredictor): AnalysisPredictor
+ Raises:
+ ValueError: predict by TensorRT need device == 'GPU'.
+ """
+ rerun_flag = False
+ if device != "GPU" and use_trt:
+ raise ValueError(
+ "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}".
+ format(precision, device))
+ config = Config(
+ os.path.join(model_dir, "model.pdmodel"),
+ os.path.join(model_dir, "model.pdiparams"))
+ if device == "GPU":
+ # initial GPU memory(M), device ID
+ config.enable_use_gpu(200, 0)
+ # optimize graph and fuse op
+ config.switch_ir_optim(True)
+ else:
+ config.disable_gpu()
+ config.set_cpu_math_library_num_threads(cpu_threads)
+ config.switch_ir_optim()
+ if use_mkldnn:
+ config.enable_mkldnn()
+ if precision == "int8":
+ config.enable_mkldnn_int8(
+ {"conv2d", "depthwise_conv2d", "transpose2", "pool2d"})
+
+ precision_map = {
+ "int8": Config.Precision.Int8,
+ "fp32": Config.Precision.Float32,
+ "fp16": Config.Precision.Half,
+ }
+ if precision in precision_map.keys() and use_trt:
+ config.enable_tensorrt_engine(
+ workspace_size=(1 << 25) * batch_size,
+ max_batch_size=batch_size,
+ min_subgraph_size=min_subgraph_size,
+ precision_mode=precision_map[precision],
+ use_static=True,
+ use_calib_mode=False, )
+
+ if use_dynamic_shape:
+ dynamic_shape_file = os.path.join(FLAGS.model_path,
+ "dynamic_shape.txt")
+ if os.path.exists(dynamic_shape_file):
+ config.enable_tuned_tensorrt_dynamic_shape(dynamic_shape_file,
+ True)
+ print("trt set dynamic shape done!")
+ else:
+ config.collect_shape_range_info(dynamic_shape_file)
+ print("Start collect dynamic shape...")
+ rerun_flag = True
+
+ # enable shared memory
+ config.enable_memory_optim()
+ predictor = create_predictor(config)
+ return predictor, rerun_flag
+
+
+def get_current_memory_mb():
+ """
+ It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
+ And this function Current program is time-consuming.
+ """
+ try:
+ pkg.require('pynvml')
+ except:
+ from pip._internal import main
+ main(['install', 'pynvml'])
+ try:
+ pkg.require('psutil')
+ except:
+ from pip._internal import main
+ main(['install', 'psutil'])
+ try:
+ pkg.require('GPUtil')
+ except:
+ from pip._internal import main
+ main(['install', 'GPUtil'])
+ import pynvml
+ import psutil
+ import GPUtil
+
+ gpu_id = int(os.environ.get("CUDA_VISIBLE_DEVICES", 0))
+
+ pid = os.getpid()
+ p = psutil.Process(pid)
+ info = p.memory_full_info()
+ cpu_mem = info.uss / 1024.0 / 1024.0
+ gpu_mem = 0
+ gpu_percent = 0
+ gpus = GPUtil.getGPUs()
+ if gpu_id is not None and len(gpus) > 0:
+ gpu_percent = gpus[gpu_id].load
+ pynvml.nvmlInit()
+ handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+ meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
+ gpu_mem = meminfo.used / 1024.0 / 1024.0
+ return round(cpu_mem, 4), round(gpu_mem, 4)
+
+
+def predict_image(predictor,
+ image_file,
+ image_shape=[640, 640],
+ warmup=1,
+ repeats=1,
+ threshold=0.5):
+ """
+ predict image main func
+ """
+ img, scale_factor = image_preprocess(image_file, image_shape)
+ inputs = {}
+ inputs["image"] = img
+ if FLAGS.include_nms:
+ inputs['scale_factor'] = scale_factor
+ input_names = predictor.get_input_names()
+ for i, _ in enumerate(input_names):
+ input_tensor = predictor.get_input_handle(input_names[i])
+ input_tensor.copy_from_cpu(inputs[input_names[i]])
+
+ for i in range(warmup):
+ predictor.run()
+
+ np_boxes, np_boxes_num = None, None
+ cpu_mems, gpu_mems = 0, 0
+ predict_time = 0.0
+ time_min = float("inf")
+ time_max = float("-inf")
+ for i in range(repeats):
+ start_time = time.time()
+ predictor.run()
+ output_names = predictor.get_output_names()
+ boxes_tensor = predictor.get_output_handle(output_names[0])
+ np_boxes = boxes_tensor.copy_to_cpu()
+ if FLAGS.include_nms:
+ boxes_num = predictor.get_output_handle(output_names[1])
+ np_boxes_num = boxes_num.copy_to_cpu()
+ end_time = time.time()
+ timed = end_time - start_time
+ time_min = min(time_min, timed)
+ time_max = max(time_max, timed)
+ predict_time += timed
+ cpu_mem, gpu_mem = get_current_memory_mb()
+ cpu_mems += cpu_mem
+ gpu_mems += gpu_mem
+
+ time_avg = predict_time / repeats
+ print("[Benchmark]Avg cpu_mem:{} MB, avg gpu_mem: {} MB".format(
+ cpu_mems / repeats, gpu_mems / repeats))
+ print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
+ round(time_min * 1000, 2),
+ round(time_max * 1000, 1), round(time_avg * 1000, 1)))
+ if not FLAGS.include_nms:
+ postprocess = PPYOLOEPostProcess(score_threshold=0.3, nms_threshold=0.6)
+ res = postprocess(np_boxes, scale_factor)
+ else:
+ res = {'bbox': np_boxes, 'bbox_num': np_boxes_num}
+ res_img = draw_box(
+ image_file, res["bbox"], CLASS_LABEL, threshold=threshold)
+ cv2.imwrite("result.jpg", res_img)
+
+
+def eval(predictor, val_loader, metric, rerun_flag=False):
+ """
+ eval main func
+ """
+ cpu_mems, gpu_mems = 0, 0
+ predict_time = 0.0
+ time_min = float("inf")
+ time_max = float("-inf")
+ sample_nums = len(val_loader)
+ input_names = predictor.get_input_names()
+ output_names = predictor.get_output_names()
+ boxes_tensor = predictor.get_output_handle(output_names[0])
+ if FLAGS.include_nms:
+ boxes_num = predictor.get_output_handle(output_names[1])
+ for batch_id, data in enumerate(val_loader):
+ data_all = {k: np.array(v) for k, v in data.items()}
+ for i, _ in enumerate(input_names):
+ input_tensor = predictor.get_input_handle(input_names[i])
+ input_tensor.copy_from_cpu(data_all[input_names[i]])
+ start_time = time.time()
+ predictor.run()
+ np_boxes = boxes_tensor.copy_to_cpu()
+ if FLAGS.include_nms:
+ np_boxes_num = boxes_num.copy_to_cpu()
+ if rerun_flag:
+ return
+ end_time = time.time()
+ timed = end_time - start_time
+ time_min = min(time_min, timed)
+ time_max = max(time_max, timed)
+ predict_time += timed
+ cpu_mem, gpu_mem = get_current_memory_mb()
+ cpu_mems += cpu_mem
+ gpu_mems += gpu_mem
+ if not FLAGS.include_nms:
+ postprocess = PPYOLOEPostProcess(
+ score_threshold=0.3, nms_threshold=0.6)
+ res = postprocess(np_boxes, data_all['scale_factor'])
+ else:
+ res = {'bbox': np_boxes, 'bbox_num': np_boxes_num}
+ metric.update(data_all, res)
+ if batch_id % 100 == 0:
+ print("Eval iter:", batch_id)
+ sys.stdout.flush()
+ metric.accumulate()
+ metric.log()
+ map_res = metric.get_results()
+ metric.reset()
+ time_avg = predict_time / sample_nums
+ print("[Benchmark]Avg cpu_mem:{} MB, avg gpu_mem: {} MB".format(
+ cpu_mems / sample_nums, gpu_mems / sample_nums))
+ print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
+ round(time_min * 1000, 2),
+ round(time_max * 1000, 1), round(time_avg * 1000, 1)))
+ print("[Benchmark] COCO mAP: {}".format(map_res["bbox"][0]))
+ sys.stdout.flush()
+
+
+def main():
+ """
+ main func
+ """
+ predictor, rerun_flag = load_predictor(
+ FLAGS.model_path,
+ device=FLAGS.device,
+ use_trt=FLAGS.use_trt,
+ use_mkldnn=FLAGS.use_mkldnn,
+ precision=FLAGS.precision,
+ use_dynamic_shape=FLAGS.use_dynamic_shape,
+ cpu_threads=FLAGS.cpu_threads)
+
+ if FLAGS.image_file:
+ warmup, repeats = 1, 1
+ if FLAGS.benchmark:
+ warmup, repeats = 50, 100
+ predict_image(
+ predictor,
+ FLAGS.image_file,
+ image_shape=[FLAGS.img_shape, FLAGS.img_shape],
+ warmup=warmup,
+ repeats=repeats)
+ else:
+ reader_cfg = load_config(FLAGS.reader_config)
+
+ dataset = reader_cfg["EvalDataset"]
+ global val_loader
+ val_loader = create("EvalReader")(reader_cfg["EvalDataset"],
+ reader_cfg["worker_num"],
+ return_list=True)
+ clsid2catid = {v: k for k, v in dataset.catid2clsid.items()}
+ anno_file = dataset.get_anno()
+ metric = COCOMetric(
+ anno_file=anno_file, clsid2catid=clsid2catid, IouType="bbox")
+ eval(predictor, val_loader, metric, rerun_flag=rerun_flag)
+
+ if rerun_flag:
+ print(
+ "***** Collect dynamic shape done, Please rerun the program to get correct results. *****"
+ )
+
+
+if __name__ == "__main__":
+ paddle.enable_static()
+ parser = argsparser()
+ FLAGS = parser.parse_args()
+
+ # DataLoader need run on cpu
+ paddle.set_device("cpu")
+
+ main()
From 5f14efd5be5b9dff069000388a7998e3a9029cd6 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Mon, 27 Mar 2023 14:48:56 +0800
Subject: [PATCH 055/116] [TIPC] * benchamrk use cocomini dataset (#7995)
* benchamrk set run 5m finish
* static train pop data['image_file']
---
ppdet/engine/trainer.py | 3 +++
test_tipc/benchmark_train.sh | 4 ++--
.../deformable_detr_r50_1x_coco_train_infer_python.txt | 2 +-
.../dino/dino_r50_4scale_1x_coco_train_infer_python.txt | 2 +-
.../fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt | 2 +-
.../mask_rcnn_r50_fpn_1x_coco_train_infer_python.txt | 2 +-
.../picodet/picodet_s_320_coco_lcnet_train_infer_python.txt | 2 +-
.../ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt | 4 +++-
.../yolov3_darknet53_270e_coco_train_infer_python.txt | 2 +-
test_tipc/prepare.sh | 6 +++---
test_tipc/test_train_inference_python.sh | 2 +-
11 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py
index 0378e00ecb5..730b99f2828 100644
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -487,6 +487,9 @@ def train(self, validate=False):
profiler.add_profiler_step(profiler_options)
self._compose_callback.on_step_begin(self.status)
data['epoch_id'] = epoch_id
+ if self.cfg.get('to_static',
+ False) and 'image_file' in data.keys():
+ data.pop('image_file')
if self.use_amp:
if isinstance(
diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index bb2324f00c5..b4dced75acf 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -243,7 +243,7 @@ for batch_size in ${batch_size_list[*]}; do
--run_mode ${run_mode} \
--fp_item ${precision} \
--keyword ips: \
- --skip_steps 2 \
+ --skip_steps 4 \
--device_num ${device_num} \
--speed_unit images/s \
--convergence_key loss: "
@@ -279,7 +279,7 @@ for batch_size in ${batch_size_list[*]}; do
--run_mode ${run_mode} \
--fp_item ${precision} \
--keyword ips: \
- --skip_steps 2 \
+ --skip_steps 4 \
--device_num ${device_num} \
--speed_unit images/s \
--convergence_key loss: "
diff --git a/test_tipc/configs/deformable_detr/deformable_detr_r50_1x_coco_train_infer_python.txt b/test_tipc/configs/deformable_detr/deformable_detr_r50_1x_coco_train_infer_python.txt
index a2b9ae5c4a0..f4b23d2d7dd 100644
--- a/test_tipc/configs/deformable_detr/deformable_detr_r50_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/deformable_detr/deformable_detr_r50_1x_coco_train_infer_python.txt
@@ -53,6 +53,6 @@ inference:./deploy/python/infer.py
batch_size:2
fp_items:fp32|fp16
epoch:1
-repeat:2
+repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
\ No newline at end of file
diff --git a/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt b/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
index 87117a4cc1f..ee694fdd7ab 100644
--- a/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
@@ -53,7 +53,7 @@ inference:./deploy/python/infer.py
batch_size:2
fp_items:fp32|fp16
epoch:1
-repeat:2
+repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
diff --git a/test_tipc/configs/fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt b/test_tipc/configs/fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt
index f95e1a04032..325148db5f6 100644
--- a/test_tipc/configs/fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/fcos/fcos_r50_fpn_1x_coco_train_infer_python.txt
@@ -53,7 +53,7 @@ inference:./deploy/python/infer.py
batch_size:2|8
fp_items:fp32|fp16
epoch:1
-repeat:3
+repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
diff --git a/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_train_infer_python.txt b/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_train_infer_python.txt
index db6d2b00a3b..f03b573c0fb 100644
--- a/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_train_infer_python.txt
@@ -53,7 +53,7 @@ inference:./deploy/python/infer.py
batch_size:2|4
fp_items:fp32|fp16
epoch:1
-repeat:2
+repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
diff --git a/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_infer_python.txt b/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_infer_python.txt
index 57e7e3c3cb9..cb10d21b343 100644
--- a/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_infer_python.txt
+++ b/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_infer_python.txt
@@ -53,7 +53,7 @@ null:null
batch_size:64
fp_items:fp32|fp16
epoch:1
-repeat:25
+repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
diff --git a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
index 19fa1673b0f..8464d70a774 100644
--- a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
+++ b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_infer_python.txt
@@ -53,8 +53,10 @@ inference:./deploy/python/infer.py
batch_size:8
fp_items:fp32|fp16
epoch:1
-repeat:12
+repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
numpy_infer_input:3x640x640_2.npy
+===========================to_static_train_benchmark_params===========================
+to_static_train:--to_static
diff --git a/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_train_infer_python.txt b/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_train_infer_python.txt
index 7c0b3aa5b8e..9b3553e03fd 100644
--- a/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_train_infer_python.txt
+++ b/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_train_infer_python.txt
@@ -53,7 +53,7 @@ null:null
batch_size:8
fp_items:fp32|fp16
epoch:1
-repeat:3
+repeat:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:null
===========================infer_benchmark_params===========================
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 5d3d890f880..989966849e9 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -99,9 +99,9 @@ elif [ ${MODE} = "benchmark_train" ];then
cd ../../
else
# prepare lite benchmark coco data
- wget -nc -P ./dataset/coco/ https://paddledet.bj.bcebos.com/data/coco_benchmark.tar --no-check-certificate
- cd ./dataset/coco/ && tar -xf coco_benchmark.tar
- mv -u coco_benchmark/* ./
+ wget -nc -P ./dataset/coco/ https://bj.bcebos.com/v1/paddledet/data/cocomini.zip --no-check-certificate
+ cd ./dataset/coco/ && unzip cocomini.zip
+ mv -u cocomini/* ./
ls ./
cd ../../
# prepare lite benchmark mot data
diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh
index d5c09ccf09d..072fb89cf85 100644
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@@ -311,7 +311,7 @@ else
fi
# run train
train_log_path="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}.log"
- eval "${cmd} > ${train_log_path} 2>&1"
+ eval "timeout 5m ${cmd} > ${train_log_path} 2>&1"
last_status=$?
cat ${train_log_path}
status_check $last_status "${cmd}" "${status_log}" "${model_name}" "${train_log_path}"
From a08a3f2db0b4fe5134337744a94a1b32c3b41b69 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Thu, 30 Mar 2023 15:05:43 +0800
Subject: [PATCH 056/116] auto compression README.md add
paddle_inference_eval.py test=document_fix (#8012)
---
deploy/auto_compression/README.md | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/deploy/auto_compression/README.md b/deploy/auto_compression/README.md
index 20a0cbacdc3..7b50430693b 100644
--- a/deploy/auto_compression/README.md
+++ b/deploy/auto_compression/README.md
@@ -150,8 +150,15 @@ export CUDA_VISIBLE_DEVICES=0
python eval.py --config_path=./configs/ppyoloe_l_qat_dis.yaml
```
+使用paddle inference并使用trt int8得到模型的mAP:
+```
+export CUDA_VISIBLE_DEVICES=0
+python paddle_inference_eval.py --model_path ./output/ --reader_config configs/ppyoloe_reader.yml --precision int8 --use_trt=True
+```
+
**注意**:
- 要测试的模型路径可以在配置文件中`model_dir`字段下进行修改。
+- --precision 默认为paddle,如果使用trt,需要设置--use_trt=True,同时--precision 可设置为fp32/fp16/int8
## 4.预测部署
From a18c811349c89069650b807ad08418b7f537c075 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Thu, 30 Mar 2023 17:09:49 +0800
Subject: [PATCH 057/116] support deformable_detr amp (#8011)
---
ppdet/modeling/transformers/deformable_transformer.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ppdet/modeling/transformers/deformable_transformer.py b/ppdet/modeling/transformers/deformable_transformer.py
index b46fb298709..fcb5a0aab1c 100644
--- a/ppdet/modeling/transformers/deformable_transformer.py
+++ b/ppdet/modeling/transformers/deformable_transformer.py
@@ -450,7 +450,8 @@ def __init__(self,
temperature=pe_temperature,
normalize=True if position_embed_type == 'sine' else False,
embed_type=position_embed_type,
- offset=pe_offset)
+ offset=pe_offset,
+ eps=1e-4)
self._reset_parameters()
From 1e211871aa95d8772f8b1990ffc4f3c84bb79a17 Mon Sep 17 00:00:00 2001
From: wjm <897383984@qq.com>
Date: Sat, 1 Apr 2023 15:05:35 +0800
Subject: [PATCH 058/116] Support ARSL(CVPR2023) for semi-supervised object
detection (#7980)
* add SSOD_asrl
* modify traniner name
* add modelzoo
* add config
* add config
* add config
* modify cfg name
* modify cfg
* modify cfg
* modify checkpoint
* modify cfg
* add voc and lsj
* add voc and lsj
* del export
* modify
* modify
* refine codes
* fix fcos_head get_loss
* add export
* fix bug
* add export infer
* change
* retry
* fix eval infer
---------
Co-authored-by: nemonameless
---
configs/semi_det/README.md | 22 +-
.../semi_det/_base_/coco_detection_voc.yml | 31 +
configs/semi_det/_base_/voc2coco.py | 213 +++++
configs/semi_det/arsl/README.md | 48 ++
.../arsl/_base_/arsl_fcos_r50_fpn.yml | 56 ++
.../semi_det/arsl/_base_/arsl_fcos_reader.yml | 55 ++
.../semi_det/arsl/_base_/optimizer_360k.yml | 29 +
.../semi_det/arsl/_base_/optimizer_90k.yml | 30 +
.../arsl/arsl_fcos_r50_fpn_coco_full.yml | 12 +
.../arsl/arsl_fcos_r50_fpn_coco_semi001.yml | 12 +
.../arsl/arsl_fcos_r50_fpn_coco_semi005.yml | 12 +
.../arsl/arsl_fcos_r50_fpn_coco_semi010.yml | 12 +
.../arsl_fcos_r50_fpn_coco_semi010_lsj.yml | 47 ++
ppdet/engine/trainer.py | 10 +-
ppdet/engine/trainer_ssod.py | 401 ++++++++-
ppdet/modeling/architectures/__init__.py | 2 +-
ppdet/modeling/architectures/fcos.py | 130 ++-
ppdet/modeling/heads/fcos_head.py | 155 +++-
ppdet/modeling/losses/fcos_loss.py | 759 +++++++++++++++++-
ppdet/utils/checkpoint.py | 182 +++--
tools/eval.py | 19 +-
tools/export_model.py | 18 +-
tools/infer.py | 15 +-
tools/train.py | 6 +-
24 files changed, 2159 insertions(+), 117 deletions(-)
create mode 100644 configs/semi_det/_base_/coco_detection_voc.yml
create mode 100644 configs/semi_det/_base_/voc2coco.py
create mode 100644 configs/semi_det/arsl/README.md
create mode 100644 configs/semi_det/arsl/_base_/arsl_fcos_r50_fpn.yml
create mode 100644 configs/semi_det/arsl/_base_/arsl_fcos_reader.yml
create mode 100644 configs/semi_det/arsl/_base_/optimizer_360k.yml
create mode 100644 configs/semi_det/arsl/_base_/optimizer_90k.yml
create mode 100644 configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_full.yml
create mode 100644 configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi001.yml
create mode 100644 configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi005.yml
create mode 100644 configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010.yml
create mode 100644 configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010_lsj.yml
diff --git a/configs/semi_det/README.md b/configs/semi_det/README.md
index 996a1decfec..5256026a344 100644
--- a/configs/semi_det/README.md
+++ b/configs/semi_det/README.md
@@ -7,6 +7,7 @@
- [模型库](#模型库)
- [Baseline](#Baseline)
- [DenseTeacher](#DenseTeacher)
+ - [ARSL](#ARSL)
- [半监督数据集准备](#半监督数据集准备)
- [半监督检测配置](#半监督检测配置)
- [训练集配置](#训练集配置)
@@ -23,7 +24,7 @@
- [引用](#引用)
## 简介
-半监督目标检测(Semi DET)是**同时使用有标注数据和无标注数据**进行训练的目标检测,既可以极大地节省标注成本,也可以充分利用无标注数据进一步提高检测精度。PaddleDetection团队复现了[DenseTeacher](denseteacher)半监督检测算法,用户可以下载使用。
+半监督目标检测(Semi DET)是**同时使用有标注数据和无标注数据**进行训练的目标检测,既可以极大地节省标注成本,也可以充分利用无标注数据进一步提高检测精度。PaddleDetection团队提供了[DenseTeacher](denseteacher/)和[ARSL](arsl/)等最前沿的半监督检测算法,用户可以下载使用。
## 模型库
@@ -41,6 +42,25 @@
| DenseTeacher-FCOS(LSJ)| 10% | [sup_config](./baseline/fcos_r50_fpn_2x_coco_sup010.yml) | 24 (17424) | 26.3 | **37.1(LSJ)** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi010_lsj.pdparams) | [config](denseteacher/denseteacher_fcos_r50_fpn_coco_semi010_lsj.yml) |
| DenseTeacher-FCOS |100%(full)| [sup_config](./../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.ymll) | 24 (175896) | 42.6 | **44.2** | 24 (175896)| [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_full.pdparams) | [config](denseteacher/denseteacher_fcos_r50_fpn_coco_full.yml) |
+| 模型 | 监督数据比例 | Sup Baseline | Sup Epochs (Iters) | Sup mAPval
0.5:0.95 | Semi mAPval
0.5:0.95 | Semi Epochs (Iters) | 模型下载 | 配置文件 |
+| :------------: | :---------: | :---------------------: | :---------------------: |:---------------------------: |:----------------------------: | :------------------: |:--------: |:----------: |
+| DenseTeacher-PPYOLOE+_s | 5% | [sup_config](./baseline/ppyoloe_plus_crn_s_80e_coco_sup005.yml) | 80 (14480) | 32.8 | **34.0** | 200 (36200) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_ppyoloe_plus_crn_s_coco_semi005.pdparams) | [config](denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi005.yml) |
+| DenseTeacher-PPYOLOE+_s | 10% | [sup_config](./baseline/ppyoloe_plus_crn_s_80e_coco_sup010.yml) | 80 (14480) | 35.3 | **37.5** | 200 (36200) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_ppyoloe_plus_crn_s_coco_semi010.pdparams) | [config](denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi010.yml) |
+| DenseTeacher-PPYOLOE+_l | 5% | [sup_config](./baseline/ppyoloe_plus_crn_s_80e_coco_sup005.yml) | 80 (14480) | 42.9 | **45.4** | 200 (36200) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_ppyoloe_plus_crn_l_coco_semi005.pdparams) | [config](denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi005.yml) |
+| DenseTeacher-PPYOLOE+_l | 10% | [sup_config](./baseline/ppyoloe_plus_crn_l_80e_coco_sup010.yml) | 80 (14480) | 45.7 | **47.4** | 200 (36200) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_ppyoloe_plus_crn_l_coco_semi010.pdparams) | [config](denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi010.yml) |
+
+
+### [ARSL](arsl)
+
+| 模型 | COCO监督数据比例 | Semi mAPval
0.5:0.95 | Semi Epochs (Iters) | 模型下载 | 配置文件 |
+| :------------: | :---------:|:----------------------------: | :------------------: |:--------: |:----------: |
+| ARSL-FCOS | 1% | **22.8** | 240 (87120) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_semi001.pdparams) | [config](arsl/arsl_fcos_r50_fpn_coco_semi001.yml) |
+| ARSL-FCOS | 5% | **33.1** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_semi005.pdparams) | [config](arsl/arsl_fcos_r50_fpn_coco_semi005.yml ) |
+| ARSL-FCOS | 10% | **36.9** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_semi010.pdparams) | [config](arsl/arsl_fcos_r50_fpn_coco_semi010.yml ) |
+| ARSL-FCOS | 10% | **38.5(LSJ)** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_semi010_lsj.pdparams) | [config](arsl/arsl_fcos_r50_fpn_coco_semi010_lsj.yml ) |
+| ARSL-FCOS | full(100%) | **45.1** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_full.pdparams) | [config](arsl/arsl_fcos_r50_fpn_coco_full.yml ) |
+
+
## 半监督数据集准备
diff --git a/configs/semi_det/_base_/coco_detection_voc.yml b/configs/semi_det/_base_/coco_detection_voc.yml
new file mode 100644
index 00000000000..8548081cf9e
--- /dev/null
+++ b/configs/semi_det/_base_/coco_detection_voc.yml
@@ -0,0 +1,31 @@
+metric: COCO
+num_classes: 20
+# before training, change VOC to COCO format by 'python voc2coco.py'
+# partial labeled COCO, use `SemiCOCODataSet` rather than `COCODataSet`
+TrainDataset:
+ !SemiCOCODataSet
+ image_dir: VOC2007/JPEGImages
+ anno_path: PseudoAnnotations/VOC2007_trainval.json
+ dataset_dir: dataset/voc/VOCdevkit
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+# partial unlabeled COCO, use `SemiCOCODataSet` rather than `COCODataSet`
+UnsupTrainDataset:
+ !SemiCOCODataSet
+ image_dir: VOC2012/JPEGImages
+ anno_path: PseudoAnnotations/VOC2012_trainval.json
+ dataset_dir: dataset/voc/VOCdevkit
+ data_fields: ['image']
+ supervised: False
+
+EvalDataset:
+ !COCODataSet
+ image_dir: VOC2007/JPEGImages
+ anno_path: PseudoAnnotations/VOC2007_test.json
+ dataset_dir: dataset/voc/VOCdevkit/
+ allow_empty: true
+
+TestDataset:
+ !ImageFolder
+ anno_path: PseudoAnnotations/VOC2007_test.json # also support txt (like VOC's label_list.txt)
+ dataset_dir: dataset/voc/VOCdevkit/ # if set, anno_path will be 'dataset_dir/anno_path'
diff --git a/configs/semi_det/_base_/voc2coco.py b/configs/semi_det/_base_/voc2coco.py
new file mode 100644
index 00000000000..87bfe809de9
--- /dev/null
+++ b/configs/semi_det/_base_/voc2coco.py
@@ -0,0 +1,213 @@
+# convert VOC xml to COCO format json
+import xml.etree.ElementTree as ET
+import os
+import json
+import argparse
+
+
+# create and init coco json, img set, and class set
+def init_json():
+ # create coco json
+ coco = dict()
+ coco['images'] = []
+ coco['type'] = 'instances'
+ coco['annotations'] = []
+ coco['categories'] = []
+ # voc classes
+ voc_class = [
+ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
+ 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
+ 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
+ ]
+ # init json categories
+ image_set = set()
+ class_set = dict()
+ for cat_id, cat_name in enumerate(voc_class):
+ cat_item = dict()
+ cat_item['supercategory'] = 'none'
+ cat_item['id'] = cat_id
+ cat_item['name'] = cat_name
+ coco['categories'].append(cat_item)
+ class_set[cat_name] = cat_id
+ return coco, class_set, image_set
+
+
+def getImgItem(file_name, size, img_id):
+ if file_name is None:
+ raise Exception('Could not find filename tag in xml file.')
+ if size['width'] is None:
+ raise Exception('Could not find width tag in xml file.')
+ if size['height'] is None:
+ raise Exception('Could not find height tag in xml file.')
+ image_item = dict()
+ image_item['id'] = img_id
+ image_item['file_name'] = file_name
+ image_item['width'] = size['width']
+ image_item['height'] = size['height']
+ return image_item
+
+
+def getAnnoItem(object_name, image_id, ann_id, category_id, bbox):
+ annotation_item = dict()
+ annotation_item['segmentation'] = []
+ seg = []
+ # bbox[] is x,y,w,h
+ # left_top
+ seg.append(bbox[0])
+ seg.append(bbox[1])
+ # left_bottom
+ seg.append(bbox[0])
+ seg.append(bbox[1] + bbox[3])
+ # right_bottom
+ seg.append(bbox[0] + bbox[2])
+ seg.append(bbox[1] + bbox[3])
+ # right_top
+ seg.append(bbox[0] + bbox[2])
+ seg.append(bbox[1])
+
+ annotation_item['segmentation'].append(seg)
+
+ annotation_item['area'] = bbox[2] * bbox[3]
+ annotation_item['iscrowd'] = 0
+ annotation_item['ignore'] = 0
+ annotation_item['image_id'] = image_id
+ annotation_item['bbox'] = bbox
+ annotation_item['category_id'] = category_id
+ annotation_item['id'] = ann_id
+ return annotation_item
+
+
+def convert_voc_to_coco(txt_path, json_path, xml_path):
+
+ # create and init coco json, img set, and class set
+ coco_json, class_set, image_set = init_json()
+
+ ### collect img and ann info into coco json
+ # read img_name in txt, e.g., 000005 for voc2007, 2008_000002 for voc2012
+ img_txt = open(txt_path, 'r')
+ img_line = img_txt.readline().strip()
+
+ # loop xml
+ img_id = 0
+ ann_id = 0
+ while img_line:
+ print('img_id:', img_id)
+
+ # find corresponding xml
+ xml_name = img_line.split('Annotations/', 1)[1]
+ xml_file = os.path.join(xml_path, xml_name)
+ if not os.path.exists(xml_file):
+ print('{} is not exists.'.format(xml_name))
+ img_line = img_txt.readline().strip()
+ continue
+
+ # decode xml
+ tree = ET.parse(xml_file)
+ root = tree.getroot()
+ if root.tag != 'annotation':
+ raise Exception(
+ 'xml {} root element should be annotation, rather than {}'.
+ format(xml_name, root.tag))
+
+ # init img and ann info
+ bndbox = dict()
+ size = dict()
+ size['width'] = None
+ size['height'] = None
+ size['depth'] = None
+
+ # filename
+ fileNameNode = root.find('filename')
+ file_name = fileNameNode.text
+
+ # img size
+ sizeNode = root.find('size')
+ if not sizeNode:
+ raise Exception('xml {} structure broken at size tag.'.format(
+ xml_name))
+ for subNode in sizeNode:
+ size[subNode.tag] = int(subNode.text)
+
+ # add img into json
+ if file_name not in image_set:
+ img_id += 1
+ format_img_id = int("%04d" % img_id)
+ # print('line 120. format_img_id:', format_img_id)
+ image_item = getImgItem(file_name, size, img_id)
+ image_set.add(file_name)
+ coco_json['images'].append(image_item)
+ else:
+ raise Exception(' xml {} duplicated image: {}'.format(xml_name,
+ file_name))
+
+ ### add objAnn into json
+ objectAnns = root.findall('object')
+ for objectAnn in objectAnns:
+ bndbox['xmin'] = None
+ bndbox['xmax'] = None
+ bndbox['ymin'] = None
+ bndbox['ymax'] = None
+
+ #add obj category
+ object_name = objectAnn.find('name').text
+ if object_name not in class_set:
+ raise Exception('xml {} Unrecognized category: {}'.format(
+ xml_name, object_name))
+ else:
+ current_category_id = class_set[object_name]
+
+ #add obj bbox ann
+ objectBboxNode = objectAnn.find('bndbox')
+ for coordinate in objectBboxNode:
+ if bndbox[coordinate.tag] is not None:
+ raise Exception('xml {} structure corrupted at bndbox tag.'.
+ format(xml_name))
+ bndbox[coordinate.tag] = int(float(coordinate.text))
+ bbox = []
+ # x
+ bbox.append(bndbox['xmin'])
+ # y
+ bbox.append(bndbox['ymin'])
+ # w
+ bbox.append(bndbox['xmax'] - bndbox['xmin'])
+ # h
+ bbox.append(bndbox['ymax'] - bndbox['ymin'])
+ ann_id += 1
+ ann_item = getAnnoItem(object_name, img_id, ann_id,
+ current_category_id, bbox)
+ coco_json['annotations'].append(ann_item)
+
+ img_line = img_txt.readline().strip()
+
+ print('Saving json.')
+ json.dump(coco_json, open(json_path, 'w'))
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--type', type=str, default='VOC2007_test', help="data type")
+ parser.add_argument(
+ '--base_path',
+ type=str,
+ default='dataset/voc/VOCdevkit',
+ help="base VOC path.")
+ args = parser.parse_args()
+
+ # image info path
+ txt_name = args.type + '.txt'
+ json_name = args.type + '.json'
+ txt_path = os.path.join(args.base_path, 'PseudoAnnotations', txt_name)
+ json_path = os.path.join(args.base_path, 'PseudoAnnotations', json_name)
+
+ # xml path
+ xml_path = os.path.join(args.base_path,
+ args.type.split('_')[0], 'Annotations')
+
+ print('txt_path:', txt_path)
+ print('json_path:', json_path)
+ print('xml_path:', xml_path)
+
+ print('Converting {} to COCO json.'.format(args.type))
+ convert_voc_to_coco(txt_path, json_path, xml_path)
+ print('Finished.')
diff --git a/configs/semi_det/arsl/README.md b/configs/semi_det/arsl/README.md
new file mode 100644
index 00000000000..aee750ecd7b
--- /dev/null
+++ b/configs/semi_det/arsl/README.md
@@ -0,0 +1,48 @@
+简体中文 | [English](README_en.md)
+
+# Ambiguity-Resistant Semi-Supervised Learning for Dense Object Detection (ARSL)
+
+## ARSL-FCOS 模型库
+
+| 模型 | COCO监督数据比例 | Semi mAPval
0.5:0.95 | Semi Epochs (Iters) | 模型下载 | 配置文件 |
+| :------------: | :---------:|:----------------------------: | :------------------: |:--------: |:----------: |
+| ARSL-FCOS | 1% | **22.8** | 240 (87120) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_semi001.pdparams) | [config](./arsl_fcos_r50_fpn_coco_semi001.yml) |
+| ARSL-FCOS | 5% | **33.1** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_semi005.pdparams) | [config](./arsl_fcos_r50_fpn_coco_semi005.yml ) |
+| ARSL-FCOS | 10% | **36.9** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_semi010.pdparams) | [config](./arsl_fcos_r50_fpn_coco_semi010.yml ) |
+| ARSL-FCOS | 10% | **38.5(LSJ)** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_semi010_lsj.pdparams) | [config](./arsl_fcos_r50_fpn_coco_semi010_lsj.yml ) |
+| ARSL-FCOS | full(100%) | **45.1** | 240 (174240) | [download](https://paddledet.bj.bcebos.com/models/arsl_fcos_r50_fpn_coco_full.pdparams) | [config](./arsl_fcos_r50_fpn_coco_full.yml ) |
+
+
+
+## 使用说明
+
+仅训练时必须使用半监督检测的配置文件去训练,评估、预测、部署也可以按基础检测器的配置文件去执行。
+
+### 训练
+
+```bash
+# 单卡训练 (不推荐,需按线性比例相应地调整学习率)
+CUDA_VISIBLE_DEVICES=0 python tools/train.py -c configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010.yml --eval
+
+# 多卡训练
+python -m paddle.distributed.launch --log_dir=arsl_fcos_r50_fpn_coco_semi010/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010.yml --eval
+```
+
+### 评估
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010.yml -o weights=output/arsl_fcos_r50_fpn_coco_semi010/model_final.pdparams
+```
+
+### 预测
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010.yml -o weights=output/arsl_fcos_r50_fpn_coco_semi010/model_final.pdparams --infer_img=demo/000000014439.jpg
+```
+
+
+## 引用
+
+```
+
+```
diff --git a/configs/semi_det/arsl/_base_/arsl_fcos_r50_fpn.yml b/configs/semi_det/arsl/_base_/arsl_fcos_r50_fpn.yml
new file mode 100644
index 00000000000..95733bc871a
--- /dev/null
+++ b/configs/semi_det/arsl/_base_/arsl_fcos_r50_fpn.yml
@@ -0,0 +1,56 @@
+architecture: ARSL_FCOS
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+ARSL_FCOS:
+ backbone: ResNet
+ neck: FPN
+ fcos_head: FCOSHead_ARSL
+ fcos_cr_loss: FCOSLossCR
+
+
+ResNet:
+ depth: 50
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [1,2,3]
+ num_stages: 4
+
+FPN:
+ out_channel: 256
+ spatial_scales: [0.125, 0.0625, 0.03125]
+ extra_stage: 2
+ has_extra_convs: true
+ use_c5: false
+
+FCOSHead_ARSL:
+ fcos_feat:
+ name: FCOSFeat
+ feat_in: 256
+ feat_out: 256
+ num_convs: 4
+ norm_type: "gn"
+ use_dcn: false
+ fpn_stride: [8, 16, 32, 64, 128]
+ prior_prob: 0.01
+ norm_reg_targets: True
+ centerness_on_reg: True
+ fcos_loss:
+ name: FCOSLossMILC
+ loss_alpha: 0.25
+ loss_gamma: 2.0
+ iou_loss_type: "giou"
+ reg_weights: 1.0
+ nms:
+ name: MultiClassNMS
+ nms_top_k: 1000
+ keep_top_k: 100
+ score_threshold: 0.025
+ nms_threshold: 0.6
+
+
+FCOSLossCR:
+ iou_loss_type: "giou"
+ cls_weight: 2.0
+ reg_weight: 2.0
+ iou_weight: 0.5
+ hard_neg_mining_flag: true
diff --git a/configs/semi_det/arsl/_base_/arsl_fcos_reader.yml b/configs/semi_det/arsl/_base_/arsl_fcos_reader.yml
new file mode 100644
index 00000000000..30dddffcb97
--- /dev/null
+++ b/configs/semi_det/arsl/_base_/arsl_fcos_reader.yml
@@ -0,0 +1,55 @@
+worker_num: 2
+SemiTrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 1}
+ - RandomFlip: {}
+ weak_aug:
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ strong_aug:
+ - StrongAugImage: {transforms: [
+ RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1},
+ RandomErasingCrop: {},
+ RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]},
+ RandomGrayscale: {prob: 0.2},
+ ]}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ sup_batch_transforms:
+ - Permute: {}
+ - PadBatch: {pad_to_stride: 32}
+ - Gt2FCOSTarget:
+ object_sizes_boundary: [64, 128, 256, 512]
+ center_sampling_radius: 1.5
+ downsample_ratios: [8, 16, 32, 64, 128]
+ num_shift: 0. # default 0.5
+ multiply_strides_reg_targets: False
+ norm_reg_targets: True
+ unsup_batch_transforms:
+ - Permute: {}
+ - PadBatch: {pad_to_stride: 32}
+ sup_batch_size: 2
+ unsup_batch_size: 2
+ shuffle: True
+ drop_last: True
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 1
+
+
+TestReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ - Permute: {}
+ batch_transforms:
+ - PadBatch: {pad_to_stride: 32}
+ batch_size: 1
diff --git a/configs/semi_det/arsl/_base_/optimizer_360k.yml b/configs/semi_det/arsl/_base_/optimizer_360k.yml
new file mode 100644
index 00000000000..99072de55ee
--- /dev/null
+++ b/configs/semi_det/arsl/_base_/optimizer_360k.yml
@@ -0,0 +1,29 @@
+epoch: 120 # employ iter to control shedule
+LearningRate:
+ base_lr: 0.02 # 0.02 for 8*(4+4) batch
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [3000] # do not decay lr
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 1000
+
+max_iter: 360000 # 360k for 32 batch, 720k for 16 batch
+epoch_iter: 1000 # set epoch_iter for saving checkpoint and eval
+optimize_rate: 1
+SEMISUPNET:
+ BBOX_THRESHOLD: 0.5 # # not used
+ TEACHER_UPDATE_ITER: 1
+ BURN_UP_STEP: 30000
+ EMA_KEEP_RATE: 0.9996
+ UNSUP_LOSS_WEIGHT: 1.0 # detailed weights for cls and loc task can be seen in cr_loss
+ PSEUDO_WARM_UP_STEPS: 2000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
diff --git a/configs/semi_det/arsl/_base_/optimizer_90k.yml b/configs/semi_det/arsl/_base_/optimizer_90k.yml
new file mode 100644
index 00000000000..623d7f33e1f
--- /dev/null
+++ b/configs/semi_det/arsl/_base_/optimizer_90k.yml
@@ -0,0 +1,30 @@
+epoch: 30 # employ iter to control shedule
+LearningRate:
+ base_lr: 0.02 # 0.02 for 8*(4+4) batch
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones: [300] # do not decay lr
+ - !LinearWarmup
+ start_factor: 0.3333333333333333
+ steps: 1000
+
+max_iter: 90000 # 90k for 32 batch, 180k for 16 batch
+epoch_iter: 1000 # set epoch_iter for saving checkpoint and eval
+# update student params according to loss_grad every X iter.
+optimize_rate: 1
+SEMISUPNET:
+ BBOX_THRESHOLD: 0.5 # not used
+ TEACHER_UPDATE_ITER: 1
+ BURN_UP_STEP: 9000
+ EMA_KEEP_RATE: 0.9996
+ UNSUP_LOSS_WEIGHT: 1.0 # detailed weights for cls and loc task can be seen in cr_loss
+ PSEUDO_WARM_UP_STEPS: 2000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0001
+ type: L2
diff --git a/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_full.yml b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_full.yml
new file mode 100644
index 00000000000..a868aaf7778
--- /dev/null
+++ b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_full.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+ '../_base_/coco_detection_full.yml',
+ '../../runtime.yml',
+ '_base_/arsl_fcos_r50_fpn.yml',
+ '_base_/optimizer_360k.yml',
+ '_base_/arsl_fcos_reader.yml',
+]
+
+weights: output/fcos_r50_fpn_arsl_360k_coco_full/model_final
+
+#semi detector type
+ssod_method: ARSL
diff --git a/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi001.yml b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi001.yml
new file mode 100644
index 00000000000..136483e5432
--- /dev/null
+++ b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi001.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+ '../_base_/coco_detection_percent_1.yml',
+ '../../runtime.yml',
+ '_base_/arsl_fcos_r50_fpn.yml',
+ '_base_/optimizer_90k.yml',
+ '_base_/arsl_fcos_reader.yml',
+]
+
+weights: output/arsl_fcos_r50_fpn_coco_semi001/model_final
+
+#semi detector type
+ssod_method: ARSL
diff --git a/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi005.yml b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi005.yml
new file mode 100644
index 00000000000..7c2a779470a
--- /dev/null
+++ b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi005.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+ '../_base_/coco_detection_percent_5.yml',
+ '../../runtime.yml',
+ '_base_/arsl_fcos_r50_fpn.yml',
+ '_base_/optimizer_90k.yml',
+ '_base_/arsl_fcos_reader.yml',
+]
+
+weights: output/arsl_fcos_r50_fpn_coco_semi005/model_final
+
+#semi detector type
+ssod_method: ARSL
diff --git a/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010.yml b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010.yml
new file mode 100644
index 00000000000..7abfa59d66a
--- /dev/null
+++ b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+ '../_base_/coco_detection_percent_10.yml',
+ '../../runtime.yml',
+ '_base_/arsl_fcos_r50_fpn.yml',
+ '_base_/optimizer_360k.yml',
+ '_base_/arsl_fcos_reader.yml',
+]
+
+weights: output/arsl_fcos_r50_fpn_coco_semi010/model_final
+
+#semi detector type
+ssod_method: ARSL
diff --git a/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010_lsj.yml b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010_lsj.yml
new file mode 100644
index 00000000000..258a7a8e4a6
--- /dev/null
+++ b/configs/semi_det/arsl/arsl_fcos_r50_fpn_coco_semi010_lsj.yml
@@ -0,0 +1,47 @@
+_BASE_: [
+ '../_base_/coco_detection_percent_10.yml',
+ '../../runtime.yml',
+ '_base_/arsl_fcos_r50_fpn.yml',
+ '_base_/optimizer_360k.yml',
+ '_base_/arsl_fcos_reader.yml',
+]
+
+weights: output/arsl_fcos_r50_fpn_coco_semi010/model_final
+
+#semi detector type
+ssod_method: ARSL
+
+worker_num: 2
+SemiTrainReader:
+ sample_transforms:
+ - Decode: {}
+ # large-scale jittering
+ - RandomResize: {target_size: [[400, 1333], [1200, 1333]], keep_ratio: True, interp: 1, random_range: True}
+ - RandomFlip: {}
+ weak_aug:
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ strong_aug:
+ - StrongAugImage: {transforms: [
+ RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1},
+ RandomErasingCrop: {},
+ RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]},
+ RandomGrayscale: {prob: 0.2},
+ ]}
+ - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
+ sup_batch_transforms:
+ - Permute: {}
+ - PadBatch: {pad_to_stride: 32}
+ - Gt2FCOSTarget:
+ object_sizes_boundary: [64, 128, 256, 512]
+ center_sampling_radius: 1.5
+ downsample_ratios: [8, 16, 32, 64, 128]
+ num_shift: 0. # default 0.5
+ multiply_strides_reg_targets: False
+ norm_reg_targets: True
+ unsup_batch_transforms:
+ - Permute: {}
+ - PadBatch: {pad_to_stride: 32}
+ sup_batch_size: 2
+ unsup_batch_size: 2
+ shuffle: True
+ drop_last: True
diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py
index 730b99f2828..55890a979ec 100644
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -394,11 +394,11 @@ def register_metrics(self, metrics):
"metrics shoule be instances of subclass of Metric"
self._metrics.extend(metrics)
- def load_weights(self, weights):
+ def load_weights(self, weights, ARSL_eval=False):
if self.is_loaded_weights:
return
self.start_epoch = 0
- load_pretrain_weight(self.model, weights)
+ load_pretrain_weight(self.model, weights, ARSL_eval)
logger.debug("Load weights {} to start training".format(weights))
def load_weights_sde(self, det_weights, reid_weights):
@@ -985,8 +985,10 @@ def setup_metrics_for_loader():
for step_id, data in enumerate(tqdm(loader)):
self.status['step_id'] = step_id
# forward
- outs = self.model(data)
-
+ if hasattr(self.model, 'modelTeacher'):
+ outs = self.model.modelTeacher(data)
+ else:
+ outs = self.model(data)
for _m in metrics:
_m.update(data, outs)
diff --git a/ppdet/engine/trainer_ssod.py b/ppdet/engine/trainer_ssod.py
index ef2409b09b4..ac39c9a97d8 100644
--- a/ppdet/engine/trainer_ssod.py
+++ b/ppdet/engine/trainer_ssod.py
@@ -16,6 +16,7 @@
from __future__ import division
from __future__ import print_function
+import os
import copy
import time
import typing
@@ -26,18 +27,20 @@
import paddle.distributed as dist
from paddle.distributed import fleet
from ppdet.optimizer import ModelEMA, SimpleModelEMA
-
from ppdet.core.workspace import create
-from ppdet.utils.checkpoint import load_weight, load_pretrain_weight
+from ppdet.utils.checkpoint import load_weight, load_pretrain_weight, save_model
import ppdet.utils.stats as stats
from ppdet.utils import profiler
from ppdet.modeling.ssod.utils import align_weak_strong_shape
from .trainer import Trainer
-
from ppdet.utils.logger import setup_logger
+from paddle.static import InputSpec
+from ppdet.engine.export_utils import _dump_infer_config, _prune_input_spec
+MOT_ARCH = ['JDE', 'FairMOT', 'DeepSORT', 'ByteTrack', 'CenterTrack']
+
logger = setup_logger('ppdet.engine')
-__all__ = ['Trainer_DenseTeacher']
+__all__ = ['Trainer_DenseTeacher', 'Trainer_ARSL']
class Trainer_DenseTeacher(Trainer):
@@ -199,11 +202,6 @@ def train(self, validate=False):
self.status['data_time'] = stats.SmoothedValue(
self.cfg.log_iter, fmt='{avg:.4f}')
self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
-
- if self.cfg.get('print_flops', False):
- flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
- self.dataset, self.cfg.worker_num)
- self._flops(flops_loader)
profiler_options = self.cfg.get('profiler_options', None)
self._compose_callback.on_train_begin(self.status)
@@ -466,6 +464,365 @@ def _eval_with_loader(self, loader):
self.status['sample_num'] = sample_num
self.status['cost_time'] = time.time() - tic
+ # accumulate metric to log out
+ for metric in self._metrics:
+ metric.accumulate()
+ metric.log()
+ self._compose_callback.on_epoch_end(self.status)
+ self._reset_metrics()
+
+
+class Trainer_ARSL(Trainer):
+ def __init__(self, cfg, mode='train'):
+ self.cfg = cfg
+ assert mode.lower() in ['train', 'eval', 'test'], \
+ "mode should be 'train', 'eval' or 'test'"
+ self.mode = mode.lower()
+ self.optimizer = None
+ self.is_loaded_weights = False
+ capital_mode = self.mode.capitalize()
+ self.use_ema = False
+ self.dataset = self.cfg['{}Dataset'.format(capital_mode)] = create(
+ '{}Dataset'.format(capital_mode))()
+ if self.mode == 'train':
+ self.dataset_unlabel = self.cfg['UnsupTrainDataset'] = create(
+ 'UnsupTrainDataset')
+ self.loader = create('SemiTrainReader')(
+ self.dataset, self.dataset_unlabel, cfg.worker_num)
+
+ # build model
+ if 'model' not in self.cfg:
+ self.student_model = create(cfg.architecture)
+ self.teacher_model = create(cfg.architecture)
+ self.model = EnsembleTSModel(self.teacher_model, self.student_model)
+ else:
+ self.model = self.cfg.model
+ self.is_loaded_weights = True
+ # save path for burn-in model
+ self.base_path = cfg.get('weights')
+ self.base_path = os.path.dirname(self.base_path)
+
+ # EvalDataset build with BatchSampler to evaluate in single device
+ # TODO: multi-device evaluate
+ if self.mode == 'eval':
+ self._eval_batch_sampler = paddle.io.BatchSampler(
+ self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
+ self.loader = create('{}Reader'.format(self.mode.capitalize()))(
+ self.dataset, cfg.worker_num, self._eval_batch_sampler)
+ # TestDataset build after user set images, skip loader creation here
+
+ self.start_epoch = 0
+ self.end_epoch = 0 if 'epoch' not in cfg else cfg.epoch
+ self.epoch_iter = self.cfg.epoch_iter # set fixed iter in each epoch to control checkpoint
+
+ # build optimizer in train mode
+ if self.mode == 'train':
+ steps_per_epoch = self.epoch_iter
+ self.lr = create('LearningRate')(steps_per_epoch)
+ self.optimizer = create('OptimizerBuilder')(self.lr,
+ self.model.modelStudent)
+
+ self._nranks = dist.get_world_size()
+ self._local_rank = dist.get_rank()
+
+ self.status = {}
+
+ # initial default callbacks
+ self._init_callbacks()
+
+ # initial default metrics
+ self._init_metrics()
+ self._reset_metrics()
+ self.iter = 0
+
+ def resume_weights(self, weights):
+ # support Distill resume weights
+ if hasattr(self.model, 'student_model'):
+ self.start_epoch = load_weight(self.model.student_model, weights,
+ self.optimizer)
+ else:
+ self.start_epoch = load_weight(self.model, weights, self.optimizer)
+ logger.debug("Resume weights of epoch {}".format(self.start_epoch))
+
+ def train(self, validate=False):
+ assert self.mode == 'train', "Model not in 'train' mode"
+ Init_mark = False
+
+ # if validation in training is enabled, metrics should be re-init
+ if validate:
+ self._init_metrics(validate=validate)
+ self._reset_metrics()
+
+ if self.cfg.get('fleet', False):
+ self.model.modelStudent = fleet.distributed_model(
+ self.model.modelStudent)
+ self.optimizer = fleet.distributed_optimizer(self.optimizer)
+ elif self._nranks > 1:
+ find_unused_parameters = self.cfg[
+ 'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False
+ self.model.modelStudent = paddle.DataParallel(
+ self.model.modelStudent,
+ find_unused_parameters=find_unused_parameters)
+
+ # set fixed iter in each epoch to control checkpoint
+ self.status.update({
+ 'epoch_id': self.start_epoch,
+ 'step_id': 0,
+ 'steps_per_epoch': self.epoch_iter
+ })
+ print('338 Len of DataLoader: {}'.format(len(self.loader)))
+
+ self.status['batch_time'] = stats.SmoothedValue(
+ self.cfg.log_iter, fmt='{avg:.4f}')
+ self.status['data_time'] = stats.SmoothedValue(
+ self.cfg.log_iter, fmt='{avg:.4f}')
+ self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
+
+ self._compose_callback.on_train_begin(self.status)
+
+ epoch_id = self.start_epoch
+ self.iter = self.start_epoch * self.epoch_iter
+ # use iter rather than epoch to control training schedule
+ while self.iter < self.cfg.max_iter:
+ # epoch loop
+ self.status['mode'] = 'train'
+ self.status['epoch_id'] = epoch_id
+ self._compose_callback.on_epoch_begin(self.status)
+ self.loader.dataset_label.set_epoch(epoch_id)
+ self.loader.dataset_unlabel.set_epoch(epoch_id)
+ paddle.device.cuda.empty_cache() # clear GPU memory
+ # set model status
+ self.model.modelStudent.train()
+ self.model.modelTeacher.eval()
+ iter_tic = time.time()
+
+ # iter loop in each eopch
+ for step_id in range(self.epoch_iter):
+ data = next(self.loader)
+ self.status['data_time'].update(time.time() - iter_tic)
+ self.status['step_id'] = step_id
+ # profiler.add_profiler_step(profiler_options)
+ self._compose_callback.on_step_begin(self.status)
+
+ # model forward and calculate loss
+ loss_dict = self.run_step_full_semisup(data)
+
+ if (step_id + 1) % self.cfg.optimize_rate == 0:
+ self.optimizer.step()
+ self.optimizer.clear_grad()
+ curr_lr = self.optimizer.get_lr()
+ self.lr.step()
+
+ # update log status
+ self.status['learning_rate'] = curr_lr
+ if self._nranks < 2 or self._local_rank == 0:
+ self.status['training_staus'].update(loss_dict)
+ self.status['batch_time'].update(time.time() - iter_tic)
+ self._compose_callback.on_step_end(self.status)
+ self.iter += 1
+ iter_tic = time.time()
+
+ self._compose_callback.on_epoch_end(self.status)
+
+ if validate and (self._nranks < 2 or self._local_rank == 0) \
+ and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 \
+ or epoch_id == self.end_epoch - 1):
+ if not hasattr(self, '_eval_loader'):
+ # build evaluation dataset and loader
+ self._eval_dataset = self.cfg.EvalDataset
+ self._eval_batch_sampler = \
+ paddle.io.BatchSampler(
+ self._eval_dataset,
+ batch_size=self.cfg.EvalReader['batch_size'])
+ self._eval_loader = create('EvalReader')(
+ self._eval_dataset,
+ self.cfg.worker_num,
+ batch_sampler=self._eval_batch_sampler)
+ if validate and Init_mark == False:
+ Init_mark = True
+ self._init_metrics(validate=validate)
+ self._reset_metrics()
+ with paddle.no_grad():
+ self.status['save_best_model'] = True
+ # before burn-in stage, eval student. after burn-in stage, eval teacher
+ if self.iter <= self.cfg.SEMISUPNET['BURN_UP_STEP']:
+ print("start eval student model")
+ self._eval_with_loader(
+ self._eval_loader, mode="student")
+ else:
+ print("start eval teacher model")
+ self._eval_with_loader(
+ self._eval_loader, mode="teacher")
+
+ epoch_id += 1
+
+ self._compose_callback.on_train_end(self.status)
+
+ def merge_data(self, data1, data2):
+ data = copy.deepcopy(data1)
+ for k, v in data1.items():
+ if type(v) is paddle.Tensor:
+ data[k] = paddle.concat(x=[data[k], data2[k]], axis=0)
+ elif type(v) is list:
+ data[k].extend(data2[k])
+ return data
+
+ def run_step_full_semisup(self, data):
+ label_data_k, label_data_q, unlabel_data_k, unlabel_data_q = data
+ data_merge = self.merge_data(label_data_k, label_data_q)
+ loss_sup_dict = self.model.modelStudent(data_merge, branch="supervised")
+ loss_dict = {}
+ for key in loss_sup_dict.keys():
+ if key[:4] == "loss":
+ loss_dict[key] = loss_sup_dict[key] * 1
+ losses_sup = paddle.add_n(list(loss_dict.values()))
+ # norm loss when using gradient accumulation
+ losses_sup = losses_sup / self.cfg.optimize_rate
+ losses_sup.backward()
+
+ for key in loss_sup_dict.keys():
+ loss_dict[key + "_pseudo"] = paddle.to_tensor([0])
+ loss_dict["loss_tot"] = losses_sup
+ """
+ semi-supervised training after burn-in stage
+ """
+ if self.iter >= self.cfg.SEMISUPNET['BURN_UP_STEP']:
+ # init teacher model with burn-up weight
+ if self.iter == self.cfg.SEMISUPNET['BURN_UP_STEP']:
+ print(
+ 'Starting semi-supervised learning and load the teacher model.'
+ )
+ self._update_teacher_model(keep_rate=0.00)
+ # save burn-in model
+ if dist.get_world_size() < 2 or dist.get_rank() == 0:
+ print('saving burn-in model.')
+ save_name = 'burnIn'
+ epoch_id = self.iter // self.epoch_iter
+ save_model(self.model, self.optimizer, self.base_path,
+ save_name, epoch_id)
+ # Update teacher model with EMA
+ elif (self.iter + 1) % self.cfg.optimize_rate == 0:
+ self._update_teacher_model(
+ keep_rate=self.cfg.SEMISUPNET['EMA_KEEP_RATE'])
+
+ #warm-up weight for pseudo loss
+ pseudo_weight = self.cfg.SEMISUPNET['UNSUP_LOSS_WEIGHT']
+ pseudo_warmup_iter = self.cfg.SEMISUPNET['PSEUDO_WARM_UP_STEPS']
+ temp = self.iter - self.cfg.SEMISUPNET['BURN_UP_STEP']
+ if temp <= pseudo_warmup_iter:
+ pseudo_weight *= (temp / pseudo_warmup_iter)
+
+ # get teacher predictions on weak-augmented unlabeled data
+ with paddle.no_grad():
+ teacher_pred = self.model.modelTeacher(
+ unlabel_data_k, branch='semi_supervised')
+
+ # calculate unsupervised loss on strong-augmented unlabeled data
+ loss_unsup_dict = self.model.modelStudent(
+ unlabel_data_q,
+ branch="semi_supervised",
+ teacher_prediction=teacher_pred, )
+
+ for key in loss_unsup_dict.keys():
+ if key[-6:] == "pseudo":
+ loss_unsup_dict[key] = loss_unsup_dict[key] * pseudo_weight
+ losses_unsup = paddle.add_n(list(loss_unsup_dict.values()))
+ # norm loss when using gradient accumulation
+ losses_unsup = losses_unsup / self.cfg.optimize_rate
+ losses_unsup.backward()
+
+ loss_dict.update(loss_unsup_dict)
+ loss_dict["loss_tot"] += losses_unsup
+ return loss_dict
+
+ def export(self, output_dir='output_inference'):
+ self.model.eval()
+ model_name = os.path.splitext(os.path.split(self.cfg.filename)[-1])[0]
+ save_dir = os.path.join(output_dir, model_name)
+ if not os.path.exists(save_dir):
+ os.makedirs(save_dir)
+ image_shape = None
+ if self.cfg.architecture in MOT_ARCH:
+ test_reader_name = 'TestMOTReader'
+ else:
+ test_reader_name = 'TestReader'
+ if 'inputs_def' in self.cfg[test_reader_name]:
+ inputs_def = self.cfg[test_reader_name]['inputs_def']
+ image_shape = inputs_def.get('image_shape', None)
+ # set image_shape=[3, -1, -1] as default
+ if image_shape is None:
+ image_shape = [3, -1, -1]
+
+ self.model.modelTeacher.eval()
+ if hasattr(self.model.modelTeacher, 'deploy'):
+ self.model.modelTeacher.deploy = True
+
+ # Save infer cfg
+ _dump_infer_config(self.cfg,
+ os.path.join(save_dir, 'infer_cfg.yml'), image_shape,
+ self.model.modelTeacher)
+
+ input_spec = [{
+ "image": InputSpec(
+ shape=[None] + image_shape, name='image'),
+ "im_shape": InputSpec(
+ shape=[None, 2], name='im_shape'),
+ "scale_factor": InputSpec(
+ shape=[None, 2], name='scale_factor')
+ }]
+ if self.cfg.architecture == 'DeepSORT':
+ input_spec[0].update({
+ "crops": InputSpec(
+ shape=[None, 3, 192, 64], name='crops')
+ })
+
+ static_model = paddle.jit.to_static(
+ self.model.modelTeacher, input_spec=input_spec)
+ # NOTE: dy2st do not pruned program, but jit.save will prune program
+ # input spec, prune input spec here and save with pruned input spec
+ pruned_input_spec = _prune_input_spec(input_spec,
+ static_model.forward.main_program,
+ static_model.forward.outputs)
+
+ # dy2st and save model
+ if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
+ paddle.jit.save(
+ static_model,
+ os.path.join(save_dir, 'model'),
+ input_spec=pruned_input_spec)
+ else:
+ self.cfg.slim.save_quantized_model(
+ self.model.modelTeacher,
+ os.path.join(save_dir, 'model'),
+ input_spec=pruned_input_spec)
+ logger.info("Export model and saved in {}".format(save_dir))
+
+ def _eval_with_loader(self, loader, mode="teacher"):
+ sample_num = 0
+ tic = time.time()
+ self._compose_callback.on_epoch_begin(self.status)
+ self.status['mode'] = 'eval'
+ # self.model.eval()
+ self.model.modelTeacher.eval()
+ self.model.modelStudent.eval()
+ for step_id, data in enumerate(loader):
+ self.status['step_id'] = step_id
+ self._compose_callback.on_step_begin(self.status)
+ if mode == "teacher":
+ outs = self.model.modelTeacher(data)
+ else:
+ outs = self.model.modelStudent(data)
+
+ # update metrics
+ for metric in self._metrics:
+ metric.update(data, outs)
+
+ sample_num += data['im_id'].numpy().shape[0]
+ self._compose_callback.on_step_end(self.status)
+
+ self.status['sample_num'] = sample_num
+ self.status['cost_time'] = time.time() - tic
+
# accumulate metric to log out
for metric in self._metrics:
metric.accumulate()
@@ -473,3 +830,29 @@ def _eval_with_loader(self, loader):
self._compose_callback.on_epoch_end(self.status)
# reset metric states for metric may performed multiple times
self._reset_metrics()
+
+ def evaluate(self):
+ with paddle.no_grad():
+ self._eval_with_loader(self.loader)
+
+ @paddle.no_grad()
+ def _update_teacher_model(self, keep_rate=0.996):
+ student_model_dict = copy.deepcopy(self.model.modelStudent.state_dict())
+ new_teacher_dict = dict()
+ for key, value in self.model.modelTeacher.state_dict().items():
+ if key in student_model_dict.keys():
+ v = student_model_dict[key] * (1 - keep_rate
+ ) + value * keep_rate
+ v.stop_gradient = True
+ new_teacher_dict[key] = v
+ else:
+ raise Exception("{} is not found in student model".format(key))
+
+ self.model.modelTeacher.set_dict(new_teacher_dict)
+
+
+class EnsembleTSModel(nn.Layer):
+ def __init__(self, modelTeacher, modelStudent):
+ super(EnsembleTSModel, self).__init__()
+ self.modelTeacher = modelTeacher
+ self.modelStudent = modelStudent
diff --git a/ppdet/modeling/architectures/__init__.py b/ppdet/modeling/architectures/__init__.py
index 4c6c5ed0ac1..eb5ff75c2f9 100644
--- a/ppdet/modeling/architectures/__init__.py
+++ b/ppdet/modeling/architectures/__init__.py
@@ -74,4 +74,4 @@
from .pose3d_metro import *
from .centertrack import *
from .queryinst import *
-from .keypoint_petr import *
+from .keypoint_petr import *
\ No newline at end of file
diff --git a/ppdet/modeling/architectures/fcos.py b/ppdet/modeling/architectures/fcos.py
index efebb6efb8a..8c338cabf72 100644
--- a/ppdet/modeling/architectures/fcos.py
+++ b/ppdet/modeling/architectures/fcos.py
@@ -16,10 +16,11 @@
from __future__ import division
from __future__ import print_function
+import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
-__all__ = ['FCOS']
+__all__ = ['FCOS', 'ARSL_FCOS']
@register
@@ -31,7 +32,7 @@ class FCOS(BaseArch):
backbone (object): backbone instance
neck (object): 'FPN' instance
fcos_head (object): 'FCOSHead' instance
- ssod_loss (object): 'SSODFCOSLoss' instance, only used for semi-det(ssod)
+ ssod_loss (object): 'SSODFCOSLoss' instance, only used for semi-det(ssod) by DenseTeacher
"""
__category__ = 'architecture'
@@ -94,3 +95,128 @@ def get_ssod_loss(self, student_head_outs, teacher_head_outs, train_cfg):
ssod_losses = self.ssod_loss(student_head_outs, teacher_head_outs,
train_cfg)
return ssod_losses
+
+
+@register
+class ARSL_FCOS(BaseArch):
+ """
+ FCOS ARSL network, see https://arxiv.org/abs/
+
+ Args:
+ backbone (object): backbone instance
+ neck (object): 'FPN' instance
+ fcos_head (object): 'FCOSHead_ARSL' instance
+ fcos_cr_loss (object): 'FCOSLossCR' instance, only used for semi-det(ssod) by ARSL
+ """
+
+ __category__ = 'architecture'
+ __inject__ = ['fcos_cr_loss']
+
+ def __init__(self,
+ backbone,
+ neck,
+ fcos_head='FCOSHead_ARSL',
+ fcos_cr_loss='FCOSLossCR'):
+ super(ARSL_FCOS, self).__init__()
+ self.backbone = backbone
+ self.neck = neck
+ self.fcos_head = fcos_head
+ self.fcos_cr_loss = fcos_cr_loss
+
+ @classmethod
+ def from_config(cls, cfg, *args, **kwargs):
+ backbone = create(cfg['backbone'])
+
+ kwargs = {'input_shape': backbone.out_shape}
+ neck = create(cfg['neck'], **kwargs)
+
+ kwargs = {'input_shape': neck.out_shape}
+ fcos_head = create(cfg['fcos_head'], **kwargs)
+
+ # consistency regularization loss
+ fcos_cr_loss = create(cfg['fcos_cr_loss'])
+
+ return {
+ 'backbone': backbone,
+ 'neck': neck,
+ 'fcos_head': fcos_head,
+ 'fcos_cr_loss': fcos_cr_loss,
+ }
+
+ def forward(self, inputs, branch="supervised", teacher_prediction=None):
+ assert branch in ['supervised', 'semi_supervised'], \
+ print('In ARSL, type must be supervised or semi_supervised.')
+
+ if self.data_format == 'NHWC':
+ image = inputs['image']
+ inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
+ self.inputs = inputs
+
+ if self.training:
+ if branch == "supervised":
+ out = self.get_loss()
+ else:
+ out = self.get_pseudo_loss(teacher_prediction)
+ else:
+ # norm test
+ if branch == "supervised":
+ out = self.get_pred()
+ # predict pseudo labels
+ else:
+ out = self.get_pseudo_pred()
+ return out
+
+ # model forward
+ def model_forward(self):
+ body_feats = self.backbone(self.inputs)
+ fpn_feats = self.neck(body_feats)
+ fcos_head_outs = self.fcos_head(fpn_feats)
+ return fcos_head_outs
+
+ # supervised loss for labeled data
+ def get_loss(self):
+ loss = {}
+ tag_labels, tag_bboxes, tag_centerness = [], [], []
+ for i in range(len(self.fcos_head.fpn_stride)):
+ # labels, reg_target, centerness
+ k_lbl = 'labels{}'.format(i)
+ if k_lbl in self.inputs:
+ tag_labels.append(self.inputs[k_lbl])
+ k_box = 'reg_target{}'.format(i)
+ if k_box in self.inputs:
+ tag_bboxes.append(self.inputs[k_box])
+ k_ctn = 'centerness{}'.format(i)
+ if k_ctn in self.inputs:
+ tag_centerness.append(self.inputs[k_ctn])
+ fcos_head_outs = self.model_forward()
+ loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels,
+ tag_bboxes, tag_centerness)
+ loss.update(loss_fcos)
+ return loss
+
+ # unsupervised loss for unlabeled data
+ def get_pseudo_loss(self, teacher_prediction):
+ loss = {}
+ fcos_head_outs = self.model_forward()
+ unsup_loss = self.fcos_cr_loss(fcos_head_outs, teacher_prediction)
+ for k in unsup_loss.keys():
+ loss[k + '_pseudo'] = unsup_loss[k]
+ return loss
+
+ # get detection results for test, decode and rescale the results to original size
+ def get_pred(self):
+ fcos_head_outs = self.model_forward()
+ scale_factor = self.inputs['scale_factor']
+ bbox_pred, bbox_num = self.fcos_head.post_process(fcos_head_outs,
+ scale_factor)
+ output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
+ return output
+
+ # generate pseudo labels to guide student
+ def get_pseudo_pred(self):
+ fcos_head_outs = self.model_forward()
+ pred_cls, pred_loc, pred_iou = fcos_head_outs[1:] # 0 is locations
+ for lvl, _ in enumerate(pred_loc):
+ pred_loc[lvl] = pred_loc[lvl] / self.fcos_head.fpn_stride[lvl]
+
+ return [pred_cls, pred_loc, pred_iou, self.fcos_head.fpn_stride]
diff --git a/ppdet/modeling/heads/fcos_head.py b/ppdet/modeling/heads/fcos_head.py
index d6dab8c8d85..89c933fe531 100644
--- a/ppdet/modeling/heads/fcos_head.py
+++ b/ppdet/modeling/heads/fcos_head.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -26,7 +26,7 @@
from ppdet.core.workspace import register
from ppdet.modeling.layers import ConvNormLayer, MultiClassNMS
-__all__ = ['FCOSFeat', 'FCOSHead']
+__all__ = ['FCOSFeat', 'FCOSHead', 'FCOSHead_ARSL']
class ScaleReg(nn.Layer):
@@ -263,10 +263,23 @@ def forward(self, fpn_feats, targets=None):
centerness_list.append(centerness)
if targets is not None:
- self.is_teacher = targets.get('is_teacher', False)
+ self.is_teacher = targets.get('ARSL_teacher', False)
if self.is_teacher:
return [cls_logits_list, bboxes_reg_list, centerness_list]
+ if targets is not None:
+ self.is_student = targets.get('ARSL_student', False)
+ if self.is_student:
+ return [cls_logits_list, bboxes_reg_list, centerness_list]
+
+ if targets is not None:
+ self.is_teacher = targets.get('is_teacher', False)
+ if self.is_teacher:
+ return [
+ locations_list, cls_logits_list, bboxes_reg_list,
+ centerness_list
+ ]
+
if self.training and targets is not None:
get_data = targets.get('get_data', False)
if get_data:
@@ -361,3 +374,139 @@ def post_process(self, fcos_head_outs, scale_factor):
pred_scores = pred_scores.transpose([0, 2, 1])
bbox_pred, bbox_num, _ = self.nms(pred_bboxes, pred_scores)
return bbox_pred, bbox_num
+
+
+@register
+class FCOSHead_ARSL(FCOSHead):
+ """
+ FCOSHead of ARSL for semi-det(ssod)
+ Args:
+ fcos_feat (object): Instance of 'FCOSFeat'
+ num_classes (int): Number of classes
+ fpn_stride (list): The stride of each FPN Layer
+ prior_prob (float): Used to set the bias init for the class prediction layer
+ fcos_loss (object): Instance of 'FCOSLoss'
+ norm_reg_targets (bool): Normalization the regression target if true
+ centerness_on_reg (bool): The prediction of centerness on regression or clssification branch
+ nms (object): Instance of 'MultiClassNMS'
+ trt (bool): Whether to use trt in nms of deploy
+ """
+ __inject__ = ['fcos_feat', 'fcos_loss', 'nms']
+ __shared__ = ['num_classes', 'trt']
+
+ def __init__(self,
+ num_classes=80,
+ fcos_feat='FCOSFeat',
+ fpn_stride=[8, 16, 32, 64, 128],
+ prior_prob=0.01,
+ multiply_strides_reg_targets=False,
+ norm_reg_targets=True,
+ centerness_on_reg=True,
+ num_shift=0.5,
+ sqrt_score=False,
+ fcos_loss='FCOSLossMILC',
+ nms='MultiClassNMS',
+ trt=False):
+ super(FCOSHead_ARSL, self).__init__()
+ self.fcos_feat = fcos_feat
+ self.num_classes = num_classes
+ self.fpn_stride = fpn_stride
+ self.prior_prob = prior_prob
+ self.fcos_loss = fcos_loss
+ self.norm_reg_targets = norm_reg_targets
+ self.centerness_on_reg = centerness_on_reg
+ self.multiply_strides_reg_targets = multiply_strides_reg_targets
+ self.num_shift = num_shift
+ self.nms = nms
+ if isinstance(self.nms, MultiClassNMS) and trt:
+ self.nms.trt = trt
+ self.sqrt_score = sqrt_score
+
+ conv_cls_name = "fcos_head_cls"
+ bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
+ self.fcos_head_cls = self.add_sublayer(
+ conv_cls_name,
+ nn.Conv2D(
+ in_channels=256,
+ out_channels=self.num_classes,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ weight_attr=ParamAttr(initializer=Normal(
+ mean=0., std=0.01)),
+ bias_attr=ParamAttr(
+ initializer=Constant(value=bias_init_value))))
+
+ conv_reg_name = "fcos_head_reg"
+ self.fcos_head_reg = self.add_sublayer(
+ conv_reg_name,
+ nn.Conv2D(
+ in_channels=256,
+ out_channels=4,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ weight_attr=ParamAttr(initializer=Normal(
+ mean=0., std=0.01)),
+ bias_attr=ParamAttr(initializer=Constant(value=0))))
+
+ conv_centerness_name = "fcos_head_centerness"
+ self.fcos_head_centerness = self.add_sublayer(
+ conv_centerness_name,
+ nn.Conv2D(
+ in_channels=256,
+ out_channels=1,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ weight_attr=ParamAttr(initializer=Normal(
+ mean=0., std=0.01)),
+ bias_attr=ParamAttr(initializer=Constant(value=0))))
+
+ self.scales_regs = []
+ for i in range(len(self.fpn_stride)):
+ lvl = int(math.log(int(self.fpn_stride[i]), 2))
+ feat_name = 'p{}_feat'.format(lvl)
+ scale_reg = self.add_sublayer(feat_name, ScaleReg())
+ self.scales_regs.append(scale_reg)
+
+ def forward(self, fpn_feats, targets=None):
+ assert len(fpn_feats) == len(
+ self.fpn_stride
+ ), "The size of fpn_feats is not equal to size of fpn_stride"
+ cls_logits_list = []
+ bboxes_reg_list = []
+ centerness_list = []
+ for scale_reg, fpn_stride, fpn_feat in zip(self.scales_regs,
+ self.fpn_stride, fpn_feats):
+ fcos_cls_feat, fcos_reg_feat = self.fcos_feat(fpn_feat)
+ cls_logits = self.fcos_head_cls(fcos_cls_feat)
+ bbox_reg = scale_reg(self.fcos_head_reg(fcos_reg_feat))
+ if self.centerness_on_reg:
+ centerness = self.fcos_head_centerness(fcos_reg_feat)
+ else:
+ centerness = self.fcos_head_centerness(fcos_cls_feat)
+ if self.norm_reg_targets:
+ bbox_reg = F.relu(bbox_reg)
+ if not self.training:
+ bbox_reg = bbox_reg * fpn_stride
+ else:
+ bbox_reg = paddle.exp(bbox_reg)
+ cls_logits_list.append(cls_logits)
+ bboxes_reg_list.append(bbox_reg)
+ centerness_list.append(centerness)
+
+ if not self.training:
+ locations_list = []
+ for fpn_stride, feature in zip(self.fpn_stride, fpn_feats):
+ location = self._compute_locations_by_level(fpn_stride, feature)
+ locations_list.append(location)
+
+ return locations_list, cls_logits_list, bboxes_reg_list, centerness_list
+ else:
+ return cls_logits_list, bboxes_reg_list, centerness_list
+
+ def get_loss(self, fcos_head_outs, tag_labels, tag_bboxes, tag_centerness):
+ cls_logits, bboxes_reg, centerness = fcos_head_outs
+ return self.fcos_loss(cls_logits, bboxes_reg, centerness, tag_labels,
+ tag_bboxes, tag_centerness)
diff --git a/ppdet/modeling/losses/fcos_loss.py b/ppdet/modeling/losses/fcos_loss.py
index b3eac7b4ecb..e9bbc27aaf2 100644
--- a/ppdet/modeling/losses/fcos_loss.py
+++ b/ppdet/modeling/losses/fcos_loss.py
@@ -21,8 +21,9 @@
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling import ops
+from functools import partial
-__all__ = ['FCOSLoss']
+__all__ = ['FCOSLoss', 'FCOSLossMILC', 'FCOSLossCR']
def flatten_tensor(inputs, channel_first=False):
@@ -261,3 +262,759 @@ def forward(self, cls_logits, bboxes_reg, centerness, tag_labels,
"loss_quality": paddle.sum(quality_loss),
}
return loss_all
+
+
+@register
+class FCOSLossMILC(FCOSLoss):
+ """
+ FCOSLossMILC for ARSL in semi-det(ssod)
+ Args:
+ loss_alpha (float): alpha in focal loss
+ loss_gamma (float): gamma in focal loss
+ iou_loss_type (str): location loss type, IoU/GIoU/LINEAR_IoU
+ reg_weights (float): weight for location loss
+ """
+
+ def __init__(self,
+ loss_alpha=0.25,
+ loss_gamma=2.0,
+ iou_loss_type="giou",
+ reg_weights=1.0):
+ super(FCOSLossMILC, self).__init__()
+ self.loss_alpha = loss_alpha
+ self.loss_gamma = loss_gamma
+ self.iou_loss_type = iou_loss_type
+ self.reg_weights = reg_weights
+
+ def iou_loss(self, pred, targets, weights=None, avg_factor=None):
+ """
+ Calculate the loss for location prediction
+ Args:
+ pred (Tensor): bounding boxes prediction
+ targets (Tensor): targets for positive samples
+ weights (Tensor): weights for each positive samples
+ Return:
+ loss (Tensor): location loss
+ """
+ plw = pred[:, 0]
+ pth = pred[:, 1]
+ prw = pred[:, 2]
+ pbh = pred[:, 3]
+
+ tlw = targets[:, 0]
+ tth = targets[:, 1]
+ trw = targets[:, 2]
+ tbh = targets[:, 3]
+ tlw.stop_gradient = True
+ trw.stop_gradient = True
+ tth.stop_gradient = True
+ tbh.stop_gradient = True
+
+ ilw = paddle.minimum(plw, tlw)
+ irw = paddle.minimum(prw, trw)
+ ith = paddle.minimum(pth, tth)
+ ibh = paddle.minimum(pbh, tbh)
+
+ clw = paddle.maximum(plw, tlw)
+ crw = paddle.maximum(prw, trw)
+ cth = paddle.maximum(pth, tth)
+ cbh = paddle.maximum(pbh, tbh)
+
+ area_predict = (plw + prw) * (pth + pbh)
+ area_target = (tlw + trw) * (tth + tbh)
+ area_inter = (ilw + irw) * (ith + ibh)
+ ious = (area_inter + 1.0) / (
+ area_predict + area_target - area_inter + 1.0)
+ ious = ious
+
+ if self.iou_loss_type.lower() == "linear_iou":
+ loss = 1.0 - ious
+ elif self.iou_loss_type.lower() == "giou":
+ area_uniou = area_predict + area_target - area_inter
+ area_circum = (clw + crw) * (cth + cbh) + 1e-7
+ giou = ious - (area_circum - area_uniou) / area_circum
+ loss = 1.0 - giou
+ elif self.iou_loss_type.lower() == "iou":
+ loss = 0.0 - paddle.log(ious)
+ else:
+ raise KeyError
+ if weights is not None:
+ loss = loss * weights
+ loss = paddle.sum(loss)
+ if avg_factor is not None:
+ loss = loss / avg_factor
+ return loss
+
+ # temp function: calcualate iou between bbox and target
+ def _bbox_overlap_align(self, pred, targets):
+ assert pred.shape[0] == targets.shape[0], \
+ 'the pred should be aligned with target.'
+
+ plw = pred[:, 0]
+ pth = pred[:, 1]
+ prw = pred[:, 2]
+ pbh = pred[:, 3]
+
+ tlw = targets[:, 0]
+ tth = targets[:, 1]
+ trw = targets[:, 2]
+ tbh = targets[:, 3]
+
+ ilw = paddle.minimum(plw, tlw)
+ irw = paddle.minimum(prw, trw)
+ ith = paddle.minimum(pth, tth)
+ ibh = paddle.minimum(pbh, tbh)
+
+ area_predict = (plw + prw) * (pth + pbh)
+ area_target = (tlw + trw) * (tth + tbh)
+ area_inter = (ilw + irw) * (ith + ibh)
+ ious = (area_inter + 1.0) / (
+ area_predict + area_target - area_inter + 1.0)
+
+ return ious
+
+ def iou_based_soft_label_loss(self,
+ pred,
+ target,
+ alpha=0.75,
+ gamma=2.0,
+ iou_weighted=False,
+ implicit_iou=None,
+ avg_factor=None):
+ assert pred.shape == target.shape
+ pred = F.sigmoid(pred)
+ target = target.cast(pred.dtype)
+
+ if implicit_iou is not None:
+ pred = pred * implicit_iou
+
+ if iou_weighted:
+ focal_weight = (pred - target).abs().pow(gamma) * target * (target > 0.0).cast('float32') + \
+ alpha * (pred - target).abs().pow(gamma) * \
+ (target <= 0.0).cast('float32')
+ else:
+ focal_weight = (pred - target).abs().pow(gamma) * (target > 0.0).cast('float32') + \
+ alpha * (pred - target).abs().pow(gamma) * \
+ (target <= 0.0).cast('float32')
+
+ # focal loss
+ loss = F.binary_cross_entropy(
+ pred, target, reduction='none') * focal_weight
+ if avg_factor is not None:
+ loss = loss / avg_factor
+ return loss
+
+ def forward(self, cls_logits, bboxes_reg, centerness, tag_labels,
+ tag_bboxes, tag_center):
+ """
+ Calculate the loss for classification, location and centerness
+ Args:
+ cls_logits (list): list of Tensor, which is predicted
+ score for all anchor points with shape [N, M, C]
+ bboxes_reg (list): list of Tensor, which is predicted
+ offsets for all anchor points with shape [N, M, 4]
+ centerness (list): list of Tensor, which is predicted
+ centerness for all anchor points with shape [N, M, 1]
+ tag_labels (list): list of Tensor, which is category
+ targets for each anchor point
+ tag_bboxes (list): list of Tensor, which is bounding
+ boxes targets for positive samples
+ tag_center (list): list of Tensor, which is centerness
+ targets for positive samples
+ Return:
+ loss (dict): loss composed by classification loss, bounding box
+ """
+ cls_logits_flatten_list = []
+ bboxes_reg_flatten_list = []
+ centerness_flatten_list = []
+ tag_labels_flatten_list = []
+ tag_bboxes_flatten_list = []
+ tag_center_flatten_list = []
+ num_lvl = len(cls_logits)
+ for lvl in range(num_lvl):
+ cls_logits_flatten_list.append(
+ flatten_tensor(cls_logits[lvl], True))
+ bboxes_reg_flatten_list.append(
+ flatten_tensor(bboxes_reg[lvl], True))
+ centerness_flatten_list.append(
+ flatten_tensor(centerness[lvl], True))
+
+ tag_labels_flatten_list.append(
+ flatten_tensor(tag_labels[lvl], False))
+ tag_bboxes_flatten_list.append(
+ flatten_tensor(tag_bboxes[lvl], False))
+ tag_center_flatten_list.append(
+ flatten_tensor(tag_center[lvl], False))
+
+ cls_logits_flatten = paddle.concat(cls_logits_flatten_list, axis=0)
+ bboxes_reg_flatten = paddle.concat(bboxes_reg_flatten_list, axis=0)
+ centerness_flatten = paddle.concat(centerness_flatten_list, axis=0)
+
+ tag_labels_flatten = paddle.concat(tag_labels_flatten_list, axis=0)
+ tag_bboxes_flatten = paddle.concat(tag_bboxes_flatten_list, axis=0)
+ tag_center_flatten = paddle.concat(tag_center_flatten_list, axis=0)
+ tag_labels_flatten.stop_gradient = True
+ tag_bboxes_flatten.stop_gradient = True
+ tag_center_flatten.stop_gradient = True
+
+ # find positive index
+ mask_positive_bool = tag_labels_flatten > 0
+ mask_positive_bool.stop_gradient = True
+ mask_positive_float = paddle.cast(mask_positive_bool, dtype="float32")
+ mask_positive_float.stop_gradient = True
+
+ num_positive_fp32 = paddle.sum(mask_positive_float)
+ num_positive_fp32.stop_gradient = True
+ num_positive_int32 = paddle.cast(num_positive_fp32, dtype="int32")
+ num_positive_int32 = num_positive_int32 * 0 + 1
+ num_positive_int32.stop_gradient = True
+
+ # centerness target is used as reg weight
+ normalize_sum = paddle.sum(tag_center_flatten * mask_positive_float)
+ normalize_sum.stop_gradient = True
+
+ # 1. IoU-Based soft label loss
+ # calculate iou
+ with paddle.no_grad():
+ pos_ind = paddle.nonzero(
+ tag_labels_flatten.reshape([-1]) > 0).reshape([-1])
+ pos_pred = bboxes_reg_flatten[pos_ind]
+ pos_target = tag_bboxes_flatten[pos_ind]
+ bbox_iou = self._bbox_overlap_align(pos_pred, pos_target)
+ # pos labels
+ pos_labels = tag_labels_flatten[pos_ind].squeeze(1)
+ cls_target = paddle.zeros(cls_logits_flatten.shape)
+ cls_target[pos_ind, pos_labels - 1] = bbox_iou
+ cls_loss = self.iou_based_soft_label_loss(
+ cls_logits_flatten,
+ cls_target,
+ implicit_iou=F.sigmoid(centerness_flatten),
+ avg_factor=num_positive_fp32)
+
+ # 2. bboxes_reg: giou_loss
+ mask_positive_float = paddle.squeeze(mask_positive_float, axis=-1)
+ tag_center_flatten = paddle.squeeze(tag_center_flatten, axis=-1)
+ reg_loss = self._iou_loss(
+ bboxes_reg_flatten,
+ tag_bboxes_flatten,
+ mask_positive_float,
+ weights=tag_center_flatten)
+ reg_loss = reg_loss * mask_positive_float / normalize_sum
+
+ # 3. iou loss
+ pos_iou_pred = paddle.squeeze(centerness_flatten, axis=-1)[pos_ind]
+ loss_iou = ops.sigmoid_cross_entropy_with_logits(pos_iou_pred, bbox_iou)
+ loss_iou = loss_iou / num_positive_fp32 * 0.5
+
+ loss_all = {
+ "loss_cls": paddle.sum(cls_loss),
+ "loss_box": paddle.sum(reg_loss),
+ 'loss_iou': paddle.sum(loss_iou),
+ }
+
+ return loss_all
+
+
+# Concat multi-level feature maps by image
+def levels_to_images(mlvl_tensor):
+ batch_size = mlvl_tensor[0].shape[0]
+ batch_list = [[] for _ in range(batch_size)]
+ channels = mlvl_tensor[0].shape[1]
+ for t in mlvl_tensor:
+ t = t.transpose([0, 2, 3, 1])
+ t = t.reshape([batch_size, -1, channels])
+ for img in range(batch_size):
+ batch_list[img].append(t[img])
+ return [paddle.concat(item, axis=0) for item in batch_list]
+
+
+def multi_apply(func, *args, **kwargs):
+ """Apply function to a list of arguments.
+
+ Note:
+ This function applies the ``func`` to multiple inputs and
+ map the multiple outputs of the ``func`` into different
+ list. Each list contains the same type of outputs corresponding
+ to different inputs.
+
+ Args:
+ func (Function): A function that will be applied to a list of
+ arguments
+
+ Returns:
+ tuple(list): A tuple containing multiple list, each list contains \
+ a kind of returned results by the function
+ """
+ pfunc = partial(func, **kwargs) if kwargs else func
+ map_results = map(pfunc, *args)
+ return tuple(map(list, zip(*map_results)))
+
+
+@register
+class FCOSLossCR(FCOSLossMILC):
+ """
+ FCOSLoss of Consistency Regularization
+ """
+
+ def __init__(self,
+ iou_loss_type="giou",
+ cls_weight=2.0,
+ reg_weight=2.0,
+ iou_weight=0.5,
+ hard_neg_mining_flag=True):
+ super(FCOSLossCR, self).__init__()
+ self.iou_loss_type = iou_loss_type
+ self.cls_weight = cls_weight
+ self.reg_weight = reg_weight
+ self.iou_weight = iou_weight
+ self.hard_neg_mining_flag = hard_neg_mining_flag
+
+ def iou_loss(self, pred, targets, weights=None, avg_factor=None):
+ """
+ Calculate the loss for location prediction
+ Args:
+ pred (Tensor): bounding boxes prediction
+ targets (Tensor): targets for positive samples
+ weights (Tensor): weights for each positive samples
+ Return:
+ loss (Tensor): location loss
+ """
+ plw = pred[:, 0]
+ pth = pred[:, 1]
+ prw = pred[:, 2]
+ pbh = pred[:, 3]
+
+ tlw = targets[:, 0]
+ tth = targets[:, 1]
+ trw = targets[:, 2]
+ tbh = targets[:, 3]
+ tlw.stop_gradient = True
+ trw.stop_gradient = True
+ tth.stop_gradient = True
+ tbh.stop_gradient = True
+
+ ilw = paddle.minimum(plw, tlw)
+ irw = paddle.minimum(prw, trw)
+ ith = paddle.minimum(pth, tth)
+ ibh = paddle.minimum(pbh, tbh)
+
+ clw = paddle.maximum(plw, tlw)
+ crw = paddle.maximum(prw, trw)
+ cth = paddle.maximum(pth, tth)
+ cbh = paddle.maximum(pbh, tbh)
+
+ area_predict = (plw + prw) * (pth + pbh)
+ area_target = (tlw + trw) * (tth + tbh)
+ area_inter = (ilw + irw) * (ith + ibh)
+ ious = (area_inter + 1.0) / (
+ area_predict + area_target - area_inter + 1.0)
+ ious = ious
+
+ if self.iou_loss_type.lower() == "linear_iou":
+ loss = 1.0 - ious
+ elif self.iou_loss_type.lower() == "giou":
+ area_uniou = area_predict + area_target - area_inter
+ area_circum = (clw + crw) * (cth + cbh) + 1e-7
+ giou = ious - (area_circum - area_uniou) / area_circum
+ loss = 1.0 - giou
+ elif self.iou_loss_type.lower() == "iou":
+ loss = 0.0 - paddle.log(ious)
+ else:
+ raise KeyError
+ if weights is not None:
+ loss = loss * weights
+ loss = paddle.sum(loss)
+ if avg_factor is not None:
+ loss = loss / avg_factor
+ return loss
+
+ # calcualate iou between bbox and target
+ def bbox_overlap_align(self, pred, targets):
+ assert pred.shape[0] == targets.shape[0], \
+ 'the pred should be aligned with target.'
+
+ plw = pred[:, 0]
+ pth = pred[:, 1]
+ prw = pred[:, 2]
+ pbh = pred[:, 3]
+
+ tlw = targets[:, 0]
+ tth = targets[:, 1]
+ trw = targets[:, 2]
+ tbh = targets[:, 3]
+
+ ilw = paddle.minimum(plw, tlw)
+ irw = paddle.minimum(prw, trw)
+ ith = paddle.minimum(pth, tth)
+ ibh = paddle.minimum(pbh, tbh)
+
+ area_predict = (plw + prw) * (pth + pbh)
+ area_target = (tlw + trw) * (tth + tbh)
+ area_inter = (ilw + irw) * (ith + ibh)
+ ious = (area_inter + 1.0) / (
+ area_predict + area_target - area_inter + 1.0)
+ return ious
+
+ # cls loss: iou-based soft lable with joint iou
+ def quality_focal_loss(self,
+ stu_cls,
+ targets,
+ quality=None,
+ weights=None,
+ alpha=0.75,
+ gamma=2.0,
+ avg_factor='sum'):
+ stu_cls = F.sigmoid(stu_cls)
+ if quality is not None:
+ stu_cls = stu_cls * F.sigmoid(quality)
+
+ focal_weight = (stu_cls - targets).abs().pow(gamma) * (targets > 0.0).cast('float32') + \
+ alpha * (stu_cls - targets).abs().pow(gamma) * \
+ (targets <= 0.0).cast('float32')
+
+ loss = F.binary_cross_entropy(
+ stu_cls, targets, reduction='none') * focal_weight
+
+ if weights is not None:
+ loss = loss * weights.reshape([-1, 1])
+ loss = paddle.sum(loss)
+ if avg_factor is not None:
+ loss = loss / avg_factor
+ return loss
+
+ # generate points according to feature maps
+ def compute_locations_by_level(self, fpn_stride, h, w):
+ """
+ Compute locations of anchor points of each FPN layer
+ Return:
+ Anchor points locations of current FPN feature map
+ """
+ shift_x = paddle.arange(0, w * fpn_stride, fpn_stride)
+ shift_y = paddle.arange(0, h * fpn_stride, fpn_stride)
+ shift_x = paddle.unsqueeze(shift_x, axis=0)
+ shift_y = paddle.unsqueeze(shift_y, axis=1)
+ shift_x = paddle.expand(shift_x, shape=[h, w])
+ shift_y = paddle.expand(shift_y, shape=[h, w])
+ shift_x = paddle.reshape(shift_x, shape=[-1])
+ shift_y = paddle.reshape(shift_y, shape=[-1])
+ location = paddle.stack(
+ [shift_x, shift_y], axis=-1) + float(fpn_stride) / 2
+ return location
+
+ # decode bbox from ltrb to x1y1x2y2
+ def decode_bbox(self, ltrb, points):
+ assert ltrb.shape[0] == points.shape[0], \
+ "When decoding bbox in one image, the num of loc should be same with points."
+ bbox_decoding = paddle.stack(
+ [
+ points[:, 0] - ltrb[:, 0], points[:, 1] - ltrb[:, 1],
+ points[:, 0] + ltrb[:, 2], points[:, 1] + ltrb[:, 3]
+ ],
+ axis=1)
+ return bbox_decoding
+
+ # encode bbox from x1y1x2y2 to ltrb
+ def encode_bbox(self, bbox, points):
+ assert bbox.shape[0] == points.shape[0], \
+ "When encoding bbox in one image, the num of bbox should be same with points."
+ bbox_encoding = paddle.stack(
+ [
+ points[:, 0] - bbox[:, 0], points[:, 1] - bbox[:, 1],
+ bbox[:, 2] - points[:, 0], bbox[:, 3] - points[:, 1]
+ ],
+ axis=1)
+ return bbox_encoding
+
+ def calcualate_iou(self, gt_bbox, predict_bbox):
+ # bbox area
+ gt_area = (gt_bbox[:, 2] - gt_bbox[:, 0]) * \
+ (gt_bbox[:, 3] - gt_bbox[:, 1])
+ predict_area = (predict_bbox[:, 2] - predict_bbox[:, 0]) * \
+ (predict_bbox[:, 3] - predict_bbox[:, 1])
+ # overlop area
+ lt = paddle.fmax(gt_bbox[:, None, :2], predict_bbox[None, :, :2])
+ rb = paddle.fmin(gt_bbox[:, None, 2:], predict_bbox[None, :, 2:])
+ wh = paddle.clip(rb - lt, min=0)
+ overlap = wh[..., 0] * wh[..., 1]
+ # iou
+ iou = overlap / (gt_area[:, None] + predict_area[None, :] - overlap)
+ return iou
+
+ # select potential positives from hard negatives
+ def hard_neg_mining(self,
+ cls_score,
+ loc_ltrb,
+ quality,
+ pos_ind,
+ hard_neg_ind,
+ loc_mask,
+ loc_targets,
+ iou_thresh=0.6):
+ # get points locations and strides
+ points_list = []
+ strides_list = []
+ scale_list = []
+ scale = [0, 1, 2, 3, 4]
+ for fpn_scale, fpn_stride, HW in zip(scale, self.fpn_stride,
+ self.lvl_hw):
+ h, w = HW
+ lvl_points = self.compute_locations_by_level(fpn_stride, h, w)
+ points_list.append(lvl_points)
+ lvl_strides = paddle.full([h * w, 1], fpn_stride)
+ strides_list.append(lvl_strides)
+ lvl_scales = paddle.full([h * w, 1], fpn_scale)
+ scale_list.append(lvl_scales)
+ points = paddle.concat(points_list, axis=0)
+ strides = paddle.concat(strides_list, axis=0)
+ scales = paddle.concat(scale_list, axis=0)
+
+ # cls scores
+ cls_vals = F.sigmoid(cls_score) * F.sigmoid(quality)
+ max_vals = paddle.max(cls_vals, axis=-1)
+ class_ind = paddle.argmax(cls_vals, axis=-1)
+
+ ### calculate iou between positive and hard negative
+ # decode pos bbox
+ pos_cls = max_vals[pos_ind]
+ pos_loc = loc_ltrb[pos_ind].reshape([-1, 4])
+ pos_strides = strides[pos_ind]
+ pos_points = points[pos_ind].reshape([-1, 2])
+ pos_loc = pos_loc * pos_strides
+ pos_bbox = self.decode_bbox(pos_loc, pos_points)
+ pos_scales = scales[pos_ind]
+ # decode hard negative bbox
+ hard_neg_loc = loc_ltrb[hard_neg_ind].reshape([-1, 4])
+ hard_neg_strides = strides[hard_neg_ind]
+ hard_neg_points = points[hard_neg_ind].reshape([-1, 2])
+ hard_neg_loc = hard_neg_loc * hard_neg_strides
+ hard_neg_bbox = self.decode_bbox(hard_neg_loc, hard_neg_points)
+ hard_neg_scales = scales[hard_neg_ind]
+ # iou between pos bbox and hard negative bbox
+ hard_neg_pos_iou = self.calcualate_iou(hard_neg_bbox, pos_bbox)
+
+ ### select potential positives from hard negatives
+ # scale flag
+ scale_temp = paddle.abs(
+ pos_scales.reshape([-1])[None, :] - hard_neg_scales.reshape([-1])
+ [:, None])
+ scale_flag = (scale_temp <= 1.)
+ # iou flag
+ iou_flag = (hard_neg_pos_iou >= iou_thresh)
+ # same class flag
+ pos_class = class_ind[pos_ind]
+ hard_neg_class = class_ind[hard_neg_ind]
+ class_flag = pos_class[None, :] - hard_neg_class[:, None]
+ class_flag = (class_flag == 0)
+ # hard negative point inside positive bbox flag
+ ltrb_temp = paddle.stack(
+ [
+ hard_neg_points[:, None, 0] - pos_bbox[None, :, 0],
+ hard_neg_points[:, None, 1] - pos_bbox[None, :, 1],
+ pos_bbox[None, :, 2] - hard_neg_points[:, None, 0],
+ pos_bbox[None, :, 3] - hard_neg_points[:, None, 1]
+ ],
+ axis=-1)
+ inside_flag = ltrb_temp.min(axis=-1) > 0
+ # reset iou
+ valid_flag = (iou_flag & class_flag & inside_flag & scale_flag)
+ invalid_iou = paddle.zeros_like(hard_neg_pos_iou)
+ hard_neg_pos_iou = paddle.where(valid_flag, hard_neg_pos_iou,
+ invalid_iou)
+ pos_hard_neg_max_iou = hard_neg_pos_iou.max(axis=-1)
+ # selece potential pos
+ potential_pos_ind = (pos_hard_neg_max_iou > 0.)
+ num_potential_pos = paddle.nonzero(potential_pos_ind).shape[0]
+ if num_potential_pos == 0:
+ return None
+
+ ### calculate loc target:aggregate all matching bboxes as the bbox targets of potential pos
+ # prepare data
+ potential_points = hard_neg_points[potential_pos_ind].reshape([-1, 2])
+ potential_strides = hard_neg_strides[potential_pos_ind]
+ potential_valid_flag = valid_flag[potential_pos_ind]
+ potential_pos_ind = hard_neg_ind[potential_pos_ind]
+
+ # get cls and box of matching positives
+ pos_cls = max_vals[pos_ind]
+ expand_pos_bbox = paddle.expand(
+ pos_bbox,
+ shape=[num_potential_pos, pos_bbox.shape[0], pos_bbox.shape[1]])
+ expand_pos_cls = paddle.expand(
+ pos_cls, shape=[num_potential_pos, pos_cls.shape[0]])
+ invalid_cls = paddle.zeros_like(expand_pos_cls)
+ expand_pos_cls = paddle.where(potential_valid_flag, expand_pos_cls,
+ invalid_cls)
+ expand_pos_cls = paddle.unsqueeze(expand_pos_cls, axis=-1)
+ # aggregate box based on cls_score
+ agg_bbox = (expand_pos_bbox * expand_pos_cls).sum(axis=1) \
+ / expand_pos_cls.sum(axis=1)
+ agg_ltrb = self.encode_bbox(agg_bbox, potential_points)
+ agg_ltrb = agg_ltrb / potential_strides
+
+ # loc target for all pos
+ loc_targets[potential_pos_ind] = agg_ltrb
+ loc_mask[potential_pos_ind] = 1.
+
+ return loc_mask, loc_targets
+
+ # get training targets
+ def get_targets_per_img(self, tea_cls, tea_loc, tea_iou, stu_cls, stu_loc,
+ stu_iou):
+
+ ### sample selection
+ # prepare datas
+ tea_cls_scores = F.sigmoid(tea_cls) * F.sigmoid(tea_iou)
+ class_ind = paddle.argmax(tea_cls_scores, axis=-1)
+ max_vals = paddle.max(tea_cls_scores, axis=-1)
+ cls_mask = paddle.zeros_like(
+ max_vals
+ ) # set cls valid mask: pos is 1, hard_negative and negative are 0.
+ num_pos, num_hard_neg = 0, 0
+
+ # mean-std selection
+ # using nonzero to turn index from bool to int, because the index will be used to compose two-dim index in following.
+ # using squeeze rather than reshape to avoid errors when no score is larger than thresh.
+ candidate_ind = paddle.nonzero(max_vals >= 0.1).squeeze(axis=-1)
+ num_candidate = candidate_ind.shape[0]
+ if num_candidate > 0:
+ # pos thresh = mean + std to select pos samples
+ candidate_score = max_vals[candidate_ind]
+ candidate_score_mean = candidate_score.mean()
+ candidate_score_std = candidate_score.std()
+ pos_thresh = (candidate_score_mean + candidate_score_std).clip(
+ max=0.4)
+ # select pos
+ pos_ind = paddle.nonzero(max_vals >= pos_thresh).squeeze(axis=-1)
+ num_pos = pos_ind.shape[0]
+ # select hard negatives as potential pos
+ hard_neg_ind = (max_vals >= 0.1) & (max_vals < pos_thresh)
+ hard_neg_ind = paddle.nonzero(hard_neg_ind).squeeze(axis=-1)
+ num_hard_neg = hard_neg_ind.shape[0]
+ # if not positive, directly select top-10 as pos.
+ if (num_pos == 0):
+ num_pos = 10
+ _, pos_ind = paddle.topk(max_vals, k=num_pos)
+ cls_mask[pos_ind] = 1.
+
+ ### Consistency Regularization Training targets
+ # cls targets
+ pos_class_ind = class_ind[pos_ind]
+ cls_targets = paddle.zeros_like(tea_cls)
+ cls_targets[pos_ind, pos_class_ind] = tea_cls_scores[pos_ind,
+ pos_class_ind]
+ # hard negative cls target
+ if num_hard_neg != 0:
+ cls_targets[hard_neg_ind] = tea_cls_scores[hard_neg_ind]
+ # loc targets
+ loc_targets = paddle.zeros_like(tea_loc)
+ loc_targets[pos_ind] = tea_loc[pos_ind]
+ # iou targets
+ iou_targets = paddle.zeros(
+ shape=[tea_iou.shape[0]], dtype=tea_iou.dtype)
+ iou_targets[pos_ind] = F.sigmoid(
+ paddle.squeeze(
+ tea_iou, axis=-1)[pos_ind])
+
+ loc_mask = cls_mask.clone()
+ # select potential positive from hard negatives for loc_task training
+ if (num_hard_neg > 0) and self.hard_neg_mining_flag:
+ results = self.hard_neg_mining(tea_cls, tea_loc, tea_iou, pos_ind,
+ hard_neg_ind, loc_mask, loc_targets)
+ if results is not None:
+ loc_mask, loc_targets = results
+ loc_pos_ind = paddle.nonzero(loc_mask > 0.).squeeze(axis=-1)
+ iou_targets[loc_pos_ind] = F.sigmoid(
+ paddle.squeeze(
+ tea_iou, axis=-1)[loc_pos_ind])
+
+ return cls_mask, loc_mask, \
+ cls_targets, loc_targets, iou_targets
+
+ def forward(self, student_prediction, teacher_prediction):
+ stu_cls_lvl, stu_loc_lvl, stu_iou_lvl = student_prediction
+ tea_cls_lvl, tea_loc_lvl, tea_iou_lvl, self.fpn_stride = teacher_prediction
+
+ # H and W of level (used for aggregating targets)
+ self.lvl_hw = []
+ for t in tea_cls_lvl:
+ _, _, H, W = t.shape
+ self.lvl_hw.append([H, W])
+
+ # levels to images
+ stu_cls_img = levels_to_images(stu_cls_lvl)
+ stu_loc_img = levels_to_images(stu_loc_lvl)
+ stu_iou_img = levels_to_images(stu_iou_lvl)
+ tea_cls_img = levels_to_images(tea_cls_lvl)
+ tea_loc_img = levels_to_images(tea_loc_lvl)
+ tea_iou_img = levels_to_images(tea_iou_lvl)
+
+ with paddle.no_grad():
+ cls_mask, loc_mask, \
+ cls_targets, loc_targets, iou_targets = multi_apply(
+ self.get_targets_per_img,
+ tea_cls_img,
+ tea_loc_img,
+ tea_iou_img,
+ stu_cls_img,
+ stu_loc_img,
+ stu_iou_img
+ )
+
+ # flatten preditction
+ stu_cls = paddle.concat(stu_cls_img, axis=0)
+ stu_loc = paddle.concat(stu_loc_img, axis=0)
+ stu_iou = paddle.concat(stu_iou_img, axis=0)
+ # flatten targets
+ cls_mask = paddle.concat(cls_mask, axis=0)
+ loc_mask = paddle.concat(loc_mask, axis=0)
+ cls_targets = paddle.concat(cls_targets, axis=0)
+ loc_targets = paddle.concat(loc_targets, axis=0)
+ iou_targets = paddle.concat(iou_targets, axis=0)
+
+ ### Training Weights and avg factor
+ # find positives
+ cls_pos_ind = paddle.nonzero(cls_mask > 0.).squeeze(axis=-1)
+ loc_pos_ind = paddle.nonzero(loc_mask > 0.).squeeze(axis=-1)
+ # cls weight
+ cls_sample_weights = paddle.ones([cls_targets.shape[0]])
+ cls_avg_factor = paddle.max(cls_targets[cls_pos_ind],
+ axis=-1).sum().item()
+ # loc weight
+ loc_sample_weights = paddle.max(cls_targets[loc_pos_ind], axis=-1)
+ loc_avg_factor = loc_sample_weights.sum().item()
+ # iou weight
+ iou_sample_weights = paddle.ones([loc_pos_ind.shape[0]])
+ iou_avg_factor = loc_pos_ind.shape[0]
+
+ ### unsupervised loss
+ # cls loss
+ loss_cls = self.quality_focal_loss(
+ stu_cls,
+ cls_targets,
+ quality=stu_iou,
+ weights=cls_sample_weights,
+ avg_factor=cls_avg_factor) * self.cls_weight
+ # iou loss
+ pos_stu_iou = paddle.squeeze(stu_iou, axis=-1)[loc_pos_ind]
+ pos_iou_targets = iou_targets[loc_pos_ind]
+ loss_iou = F.binary_cross_entropy(
+ F.sigmoid(pos_stu_iou), pos_iou_targets,
+ reduction='none') * iou_sample_weights
+ loss_iou = loss_iou.sum() / iou_avg_factor * self.iou_weight
+ # box loss
+ pos_stu_loc = stu_loc[loc_pos_ind]
+ pos_loc_targets = loc_targets[loc_pos_ind]
+
+ loss_box = self.iou_loss(
+ pos_stu_loc,
+ pos_loc_targets,
+ weights=loc_sample_weights,
+ avg_factor=loc_avg_factor)
+ loss_box = loss_box * self.reg_weight
+
+ loss_all = {
+ "loss_cls": loss_cls,
+ "loss_box": loss_box,
+ "loss_iou": loss_iou,
+ }
+ return loss_all
diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py
index f57ef0227c6..ed0433764ba 100644
--- a/ppdet/utils/checkpoint.py
+++ b/ppdet/utils/checkpoint.py
@@ -17,9 +17,7 @@
from __future__ import print_function
from __future__ import unicode_literals
-import errno
import os
-import time
import numpy as np
import paddle
import paddle.nn as nn
@@ -40,21 +38,6 @@ def is_url(path):
or path.startswith('ppdet://')
-def _get_unique_endpoints(trainer_endpoints):
- # Sorting is to avoid different environmental variables for each card
- trainer_endpoints.sort()
- ips = set()
- unique_endpoints = set()
- for endpoint in trainer_endpoints:
- ip = endpoint.split(":")[0]
- if ip in ips:
- continue
- ips.add(ip)
- unique_endpoints.add(endpoint)
- logger.info("unique_endpoints {}".format(unique_endpoints))
- return unique_endpoints
-
-
def _strip_postfix(path):
path, ext = os.path.splitext(path)
assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
@@ -92,28 +75,35 @@ def load_weight(model, weight, optimizer=None, ema=None, exchange=True):
ema_state_dict = None
param_state_dict = paddle.load(pdparam_path)
- model_dict = model.state_dict()
- model_weight = {}
- incorrect_keys = 0
+ if hasattr(model, 'modelTeacher') and hasattr(model, 'modelStudent'):
+ print('Loading pretrain weights for Teacher-Student framework.')
+ print('Loading pretrain weights for Student model.')
+ student_model_dict = model.modelStudent.state_dict()
+ student_param_state_dict = match_state_dict(
+ student_model_dict, param_state_dict, mode='student')
+ model.modelStudent.set_dict(student_param_state_dict)
+ print('Loading pretrain weights for Teacher model.')
+ teacher_model_dict = model.modelTeacher.state_dict()
- for key, value in model_dict.items():
- if key in param_state_dict.keys():
- if isinstance(param_state_dict[key], np.ndarray):
- param_state_dict[key] = paddle.to_tensor(param_state_dict[key])
- if value.dtype == param_state_dict[key].dtype:
+ teacher_param_state_dict = match_state_dict(
+ teacher_model_dict, param_state_dict, mode='teacher')
+ model.modelTeacher.set_dict(teacher_param_state_dict)
+
+ else:
+ model_dict = model.state_dict()
+ model_weight = {}
+ incorrect_keys = 0
+ for key in model_dict.keys():
+ if key in param_state_dict.keys():
model_weight[key] = param_state_dict[key]
else:
- model_weight[key] = param_state_dict[key].astype(value.dtype)
- else:
- logger.info('Unmatched key: {}'.format(key))
- incorrect_keys += 1
-
- assert incorrect_keys == 0, "Load weight {} incorrectly, \
- {} keys unmatched, please check again.".format(weight,
- incorrect_keys)
- logger.info('Finish resuming model weights: {}'.format(pdparam_path))
-
- model.set_dict(model_weight)
+ logger.info('Unmatched key: {}'.format(key))
+ incorrect_keys += 1
+ assert incorrect_keys == 0, "Load weight {} incorrectly, \
+ {} keys unmatched, please check again.".format(weight,
+ incorrect_keys)
+ logger.info('Finish resuming model weights: {}'.format(pdparam_path))
+ model.set_dict(model_weight)
last_epoch = 0
if optimizer is not None and os.path.exists(path + '.pdopt'):
@@ -134,7 +124,7 @@ def load_weight(model, weight, optimizer=None, ema=None, exchange=True):
return last_epoch
-def match_state_dict(model_state_dict, weight_state_dict):
+def match_state_dict(model_state_dict, weight_state_dict, mode='default'):
"""
Match between the model state dict and pretrained weight state dict.
Return the matched state dict.
@@ -152,33 +142,47 @@ def match_state_dict(model_state_dict, weight_state_dict):
model_keys = sorted(model_state_dict.keys())
weight_keys = sorted(weight_state_dict.keys())
+ def teacher_match(a, b):
+ # skip student params
+ if b.startswith('modelStudent'):
+ return False
+ return a == b or a.endswith("." + b) or b.endswith("." + a)
+
+ def student_match(a, b):
+ # skip teacher params
+ if b.startswith('modelTeacher'):
+ return False
+ return a == b or a.endswith("." + b) or b.endswith("." + a)
+
def match(a, b):
- if b.startswith('backbone.res5'):
- # In Faster RCNN, res5 pretrained weights have prefix of backbone,
- # however, the corresponding model weights have difficult prefix,
- # bbox_head.
+ if a.startswith('backbone.res5'):
b = b[9:]
return a == b or a.endswith("." + b)
+ if mode == 'student':
+ match_op = student_match
+ elif mode == 'teacher':
+ match_op = teacher_match
+ else:
+ match_op = match
+
match_matrix = np.zeros([len(model_keys), len(weight_keys)])
for i, m_k in enumerate(model_keys):
for j, w_k in enumerate(weight_keys):
- if match(m_k, w_k):
+ if match_op(m_k, w_k):
match_matrix[i, j] = len(w_k)
max_id = match_matrix.argmax(1)
max_len = match_matrix.max(1)
max_id[max_len == 0] = -1
-
- load_id = set(max_id)
- load_id.discard(-1)
not_load_weight_name = []
- for idx in range(len(weight_keys)):
- if idx not in load_id:
- not_load_weight_name.append(weight_keys[idx])
+ for match_idx in range(len(max_id)):
+ if max_id[match_idx] == -1:
+ not_load_weight_name.append(model_keys[match_idx])
if len(not_load_weight_name) > 0:
- logger.info('{} in pretrained weight is not used in the model, '
- 'and its will not be loaded'.format(not_load_weight_name))
+ logger.info('{} in model is not matched with pretrained weights, '
+ 'and its will be trained from scratch'.format(
+ not_load_weight_name))
matched_keys = {}
result_state_dict = {}
for model_id, weight_id in enumerate(max_id):
@@ -208,7 +212,7 @@ def match(a, b):
return result_state_dict
-def load_pretrain_weight(model, pretrain_weight):
+def load_pretrain_weight(model, pretrain_weight, ARSL_eval=False):
if is_url(pretrain_weight):
pretrain_weight = get_weights_path(pretrain_weight)
@@ -219,21 +223,48 @@ def load_pretrain_weight(model, pretrain_weight):
"If you don't want to load pretrain model, "
"please delete `pretrain_weights` field in "
"config file.".format(path))
+ teacher_student_flag = False
+ if not ARSL_eval:
+ if hasattr(model, 'modelTeacher') and hasattr(model, 'modelStudent'):
+ print('Loading pretrain weights for Teacher-Student framework.')
+ print(
+ 'Assert Teacher model has the same structure with Student model.'
+ )
+ model_dict = model.modelStudent.state_dict()
+ teacher_student_flag = True
+ else:
+ model_dict = model.state_dict()
+
+ weights_path = path + '.pdparams'
+ param_state_dict = paddle.load(weights_path)
+ param_state_dict = match_state_dict(model_dict, param_state_dict)
+ for k, v in param_state_dict.items():
+ if isinstance(v, np.ndarray):
+ v = paddle.to_tensor(v)
+ if model_dict[k].dtype != v.dtype:
+ param_state_dict[k] = v.astype(model_dict[k].dtype)
+
+ if teacher_student_flag:
+ model.modelStudent.set_dict(param_state_dict)
+ model.modelTeacher.set_dict(param_state_dict)
+ else:
+ model.set_dict(param_state_dict)
+ logger.info('Finish loading model weights: {}'.format(weights_path))
- model_dict = model.state_dict()
-
- weights_path = path + '.pdparams'
- param_state_dict = paddle.load(weights_path)
- param_state_dict = match_state_dict(model_dict, param_state_dict)
-
- for k, v in param_state_dict.items():
- if isinstance(v, np.ndarray):
- v = paddle.to_tensor(v)
- if model_dict[k].dtype != v.dtype:
- param_state_dict[k] = v.astype(model_dict[k].dtype)
+ else:
+ weights_path = path + '.pdparams'
+ param_state_dict = paddle.load(weights_path)
+ student_model_dict = model.modelStudent.state_dict()
+ student_param_state_dict = match_state_dict(
+ student_model_dict, param_state_dict, mode='student')
+ model.modelStudent.set_dict(student_param_state_dict)
+ print('Loading pretrain weights for Teacher model.')
+ teacher_model_dict = model.modelTeacher.state_dict()
- model.set_dict(param_state_dict)
- logger.info('Finish loading model weights: {}'.format(weights_path))
+ teacher_param_state_dict = match_state_dict(
+ teacher_model_dict, param_state_dict, mode='teacher')
+ model.modelTeacher.set_dict(teacher_param_state_dict)
+ logger.info('Finish loading model weights: {}'.format(weights_path))
def save_model(model,
@@ -256,21 +287,24 @@ def save_model(model,
"""
if paddle.distributed.get_rank() != 0:
return
- assert isinstance(model, dict), ("model is not a instance of dict, "
- "please call model.state_dict() to get.")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
save_path = os.path.join(save_dir, save_name)
# save model
- if ema_model is None:
- paddle.save(model, save_path + ".pdparams")
+ if isinstance(model, nn.Layer):
+ paddle.save(model.state_dict(), save_path + ".pdparams")
else:
- assert isinstance(ema_model,
- dict), ("ema_model is not a instance of dict, "
- "please call model.state_dict() to get.")
- # Exchange model and ema_model to save
- paddle.save(ema_model, save_path + ".pdparams")
- paddle.save(model, save_path + ".pdema")
+ assert isinstance(model,
+ dict), 'model is not a instance of nn.layer or dict'
+ if ema_model is None:
+ paddle.save(model, save_path + ".pdparams")
+ else:
+ assert isinstance(ema_model,
+ dict), ("ema_model is not a instance of dict, "
+ "please call model.state_dict() to get.")
+ # Exchange model and ema_model to save
+ paddle.save(ema_model, save_path + ".pdparams")
+ paddle.save(model, save_path + ".pdema")
# save optimizer
state_dict = optimizer.state_dict()
state_dict['last_epoch'] = last_epoch
diff --git a/tools/eval.py b/tools/eval.py
index 40cbbecd8b5..fc34686f05d 100755
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -32,7 +32,7 @@
from ppdet.core.workspace import create, load_config, merge_config
from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
from ppdet.utils.cli import ArgsParser, merge_args
-from ppdet.engine import Trainer, init_parallel_env
+from ppdet.engine import Trainer, Trainer_ARSL, init_parallel_env
from ppdet.metrics.coco_utils import json_eval_results
from ppdet.slim import build_slim_model
@@ -135,12 +135,17 @@ def run(FLAGS, cfg):
# init parallel environment if nranks > 1
init_parallel_env()
-
- # build trainer
- trainer = Trainer(cfg, mode='eval')
-
- # load weights
- trainer.load_weights(cfg.weights)
+ ssod_method = cfg.get('ssod_method', None)
+ if ssod_method == 'ARSL':
+ # build ARSL_trainer
+ trainer = Trainer_ARSL(cfg, mode='eval')
+ # load ARSL_weights
+ trainer.load_weights(cfg.weights, ARSL_eval=True)
+ else:
+ # build trainer
+ trainer = Trainer(cfg, mode='eval')
+ #load weights
+ trainer.load_weights(cfg.weights)
# training
if FLAGS.slice_infer:
diff --git a/tools/export_model.py b/tools/export_model.py
index 20cfcfaa572..f4ffcb50038 100644
--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -32,6 +32,7 @@
from ppdet.utils.check import check_gpu, check_version, check_config
from ppdet.utils.cli import ArgsParser
from ppdet.engine import Trainer
+from ppdet.engine.trainer_ssod import Trainer_ARSL
from ppdet.slim import build_slim_model
from ppdet.utils.logger import setup_logger
@@ -60,14 +61,19 @@ def parse_args():
def run(FLAGS, cfg):
+ ssod_method = cfg.get('ssod_method', None)
+ if ssod_method is not None and ssod_method == 'ARSL':
+ trainer = Trainer_ARSL(cfg, mode='test')
+ trainer.load_weights(cfg.weights, ARSL_eval=True)
# build detector
- trainer = Trainer(cfg, mode='test')
-
- # load weights
- if cfg.architecture in ['DeepSORT', 'ByteTrack']:
- trainer.load_weights_sde(cfg.det_weights, cfg.reid_weights)
else:
- trainer.load_weights(cfg.weights)
+ trainer = Trainer(cfg, mode='test')
+
+ # load weights
+ if cfg.architecture in ['DeepSORT', 'ByteTrack']:
+ trainer.load_weights_sde(cfg.det_weights, cfg.reid_weights)
+ else:
+ trainer.load_weights(cfg.weights)
# export model
trainer.export(FLAGS.output_dir)
diff --git a/tools/infer.py b/tools/infer.py
index 65fb3b7253c..9d99237a128 100755
--- a/tools/infer.py
+++ b/tools/infer.py
@@ -31,7 +31,7 @@
import paddle
from ppdet.core.workspace import load_config, merge_config
-from ppdet.engine import Trainer
+from ppdet.engine import Trainer, Trainer_ARSL
from ppdet.utils.check import check_gpu, check_npu, check_xpu, check_mlu, check_version, check_config
from ppdet.utils.cli import ArgsParser, merge_args
from ppdet.slim import build_slim_model
@@ -156,12 +156,13 @@ def get_test_images(infer_dir, infer_img):
def run(FLAGS, cfg):
- # build trainer
- trainer = Trainer(cfg, mode='test')
-
- # load weights
- trainer.load_weights(cfg.weights)
-
+ ssod_method = cfg.get('ssod_method', None)
+ if ssod_method == 'ARSL':
+ trainer = Trainer_ARSL(cfg, mode='test')
+ trainer.load_weights(cfg.weights, ARSL_eval=True)
+ else:
+ trainer = Trainer(cfg, mode='test')
+ trainer.load_weights(cfg.weights)
# get inference images
images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
diff --git a/tools/train.py b/tools/train.py
index ec846519e99..3aa0a21a7bd 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -32,7 +32,7 @@
from ppdet.core.workspace import load_config, merge_config
from ppdet.engine import Trainer, TrainerCot, init_parallel_env, set_random_seed, init_fleet_env
-from ppdet.engine.trainer_ssod import Trainer_DenseTeacher
+from ppdet.engine.trainer_ssod import Trainer_DenseTeacher, Trainer_ARSL
from ppdet.slim import build_slim_model
@@ -132,9 +132,11 @@ def run(FLAGS, cfg):
if ssod_method is not None:
if ssod_method == 'DenseTeacher':
trainer = Trainer_DenseTeacher(cfg, mode='train')
+ elif ssod_method == 'ARSL':
+ trainer = Trainer_ARSL(cfg, mode='train')
else:
raise ValueError(
- "Semi-Supervised Object Detection only support DenseTeacher now."
+ "Semi-Supervised Object Detection only support DenseTeacher and ARSL now."
)
elif cfg.get('use_cot', False):
trainer = TrainerCot(cfg, mode='train')
From 315006020635a3c68202bf2c7777513291d5316d Mon Sep 17 00:00:00 2001
From: DefTruth <31974251+DefTruth@users.noreply.github.com>
Date: Tue, 4 Apr 2023 16:38:30 +0800
Subject: [PATCH 059/116] [FastDeploy] support ppdet with fastdeploy on many
hardwares (#7950)
* [FastDeploy] Add FastDeploy CPU/GPU deploy sources
* [FastDeploy] Add FastDeploy CPU/GPU deploy sources
* [FastDeploy] Add FastDeploy CPU/GPU deploy sources
* [FastDeploy] Add FastDeploy CPU/GPU deploy sources
* [FastDeploy] Add FastDeploy CPU/GPU deploy sources
* [Bug Fix] fixed python paddle_trt backend settings
* [Docs] Update ppdet docs for fastdeploy
* [Docs] fixed typos
* [Docs] Update ppdet-fastdeploy android docs
* Update fastdeploy pptinypose docs
* fixed cmake
---
deploy/fastdeploy/README.md | 93 ++++++++
deploy/fastdeploy/amlogic/a311d/README.md | 20 ++
.../amlogic/a311d/cpp/CMakeLists.txt | 27 +++
deploy/fastdeploy/amlogic/a311d/cpp/README.md | 77 +++++++
deploy/fastdeploy/amlogic/a311d/cpp/infer.cc | 65 ++++++
.../amlogic/a311d/cpp/run_with_adb.sh | 47 ++++
deploy/fastdeploy/ascend/README.md | 87 ++++++++
deploy/fastdeploy/ascend/cpp/CMakeLists.txt | 11 +
deploy/fastdeploy/ascend/cpp/README.md | 52 +++++
deploy/fastdeploy/ascend/cpp/infer.cc | 62 ++++++
deploy/fastdeploy/ascend/python/README.md | 44 ++++
deploy/fastdeploy/ascend/python/infer.py | 46 ++++
deploy/fastdeploy/cpu-gpu/README.md | 97 +++++++++
deploy/fastdeploy/cpu-gpu/cpp/CMakeLists.txt | 13 ++
deploy/fastdeploy/cpu-gpu/cpp/README.md | 142 ++++++++++++
.../cpp/det_keypoint_unite/CMakeLists.txt | 11 +
.../cpu-gpu/cpp/det_keypoint_unite/README.md | 74 +++++++
.../det_keypoint_unite_infer.cc | 205 ++++++++++++++++++
deploy/fastdeploy/cpu-gpu/cpp/infer.cc | 134 ++++++++++++
.../cpu-gpu/cpp/pptinypose_infer.cc | 149 +++++++++++++
deploy/fastdeploy/cpu-gpu/python/README.md | 126 +++++++++++
.../python/det_keypoint_unite/README.md | 70 ++++++
.../det_keypoint_unite_infer.py | 101 +++++++++
deploy/fastdeploy/cpu-gpu/python/infer.py | 74 +++++++
.../cpu-gpu/python/pptinypose_infer.py | 67 ++++++
deploy/fastdeploy/kunlunxin/README.md | 105 +++++++++
.../fastdeploy/kunlunxin/cpp/CMakeLists.txt | 14 ++
deploy/fastdeploy/kunlunxin/cpp/README.md | 127 +++++++++++
.../cpp/det_keypoint_unite/CMakeLists.txt | 11 +
.../cpp/det_keypoint_unite/README.md | 70 ++++++
.../det_keypoint_unite_infer.cc | 84 +++++++
deploy/fastdeploy/kunlunxin/cpp/infer.cc | 60 +++++
.../kunlunxin/cpp/pptinypose_infer.cc | 65 ++++++
deploy/fastdeploy/kunlunxin/python/README.md | 117 ++++++++++
.../python/det_keypoint_unite/README.md | 65 ++++++
.../det_keypoint_unite_infer.py | 67 ++++++
deploy/fastdeploy/kunlunxin/python/infer.py | 45 ++++
.../kunlunxin/python/pptinypose_infer.py | 42 ++++
deploy/fastdeploy/quantize/README.md | 64 ++++++
deploy/fastdeploy/rockchip/rknpu2/README.md | 121 +++++++++++
.../rockchip/rknpu2/cpp/CMakeLists.txt | 11 +
.../fastdeploy/rockchip/rknpu2/cpp/README.md | 47 ++++
.../fastdeploy/rockchip/rknpu2/cpp/infer.cc | 96 ++++++++
.../rockchip/rknpu2/python/README.md | 41 ++++
.../rockchip/rknpu2/python/infer.py | 68 ++++++
deploy/fastdeploy/rockchip/rv1126/README.md | 17 ++
.../rockchip/rv1126/cpp/CMakeLists.txt | 27 +++
.../fastdeploy/rockchip/rv1126/cpp/README.md | 64 ++++++
.../fastdeploy/rockchip/rv1126/cpp/infer.cc | 66 ++++++
.../rockchip/rv1126/cpp/run_with_adb.sh | 47 ++++
deploy/fastdeploy/serving/README.md | 111 ++++++++++
.../serving/models/postprocess/1/model.py | 110 ++++++++++
.../serving/models/postprocess/config.pbtxt | 30 +++
.../models/postprocess/mask_config.pbtxt | 34 +++
.../serving/models/ppdet/1/README.md | 3 +
.../models/ppdet/faster_rcnn_config.pbtxt | 80 +++++++
.../models/ppdet/mask_rcnn_config.pbtxt | 88 ++++++++
.../serving/models/ppdet/ppyolo_config.pbtxt | 80 +++++++
.../serving/models/ppdet/ppyoloe_config.pbtxt | 72 ++++++
.../serving/models/preprocess/1/model.py | 114 ++++++++++
.../serving/models/preprocess/config.pbtxt | 35 +++
.../serving/models/runtime/1/README.md | 5 +
.../runtime/faster_rcnn_runtime_config.pbtxt | 58 +++++
.../runtime/mask_rcnn_runtime_config.pbtxt | 63 ++++++
.../runtime/ppyolo_runtime_config.pbtxt | 58 +++++
.../runtime/ppyoloe_runtime_config.pbtxt | 55 +++++
.../serving/paddledet_grpc_client.py | 109 ++++++++++
deploy/fastdeploy/sophgo/README.md | 108 +++++++++
deploy/fastdeploy/sophgo/cpp/CMakeLists.txt | 14 ++
deploy/fastdeploy/sophgo/cpp/README.md | 57 +++++
deploy/fastdeploy/sophgo/cpp/infer.cc | 60 +++++
deploy/fastdeploy/sophgo/python/README.md | 30 +++
deploy/fastdeploy/sophgo/python/infer.py | 59 +++++
73 files changed, 4868 insertions(+)
create mode 100644 deploy/fastdeploy/README.md
create mode 100644 deploy/fastdeploy/amlogic/a311d/README.md
create mode 100755 deploy/fastdeploy/amlogic/a311d/cpp/CMakeLists.txt
create mode 100755 deploy/fastdeploy/amlogic/a311d/cpp/README.md
create mode 100755 deploy/fastdeploy/amlogic/a311d/cpp/infer.cc
create mode 100755 deploy/fastdeploy/amlogic/a311d/cpp/run_with_adb.sh
create mode 100644 deploy/fastdeploy/ascend/README.md
create mode 100644 deploy/fastdeploy/ascend/cpp/CMakeLists.txt
create mode 100644 deploy/fastdeploy/ascend/cpp/README.md
create mode 100644 deploy/fastdeploy/ascend/cpp/infer.cc
create mode 100644 deploy/fastdeploy/ascend/python/README.md
create mode 100755 deploy/fastdeploy/ascend/python/infer.py
create mode 100644 deploy/fastdeploy/cpu-gpu/README.md
create mode 100644 deploy/fastdeploy/cpu-gpu/cpp/CMakeLists.txt
create mode 100644 deploy/fastdeploy/cpu-gpu/cpp/README.md
create mode 100644 deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/CMakeLists.txt
create mode 100644 deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/README.md
create mode 100755 deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
create mode 100644 deploy/fastdeploy/cpu-gpu/cpp/infer.cc
create mode 100644 deploy/fastdeploy/cpu-gpu/cpp/pptinypose_infer.cc
create mode 100644 deploy/fastdeploy/cpu-gpu/python/README.md
create mode 100644 deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/README.md
create mode 100755 deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/det_keypoint_unite_infer.py
create mode 100644 deploy/fastdeploy/cpu-gpu/python/infer.py
create mode 100644 deploy/fastdeploy/cpu-gpu/python/pptinypose_infer.py
create mode 100644 deploy/fastdeploy/kunlunxin/README.md
create mode 100644 deploy/fastdeploy/kunlunxin/cpp/CMakeLists.txt
create mode 100644 deploy/fastdeploy/kunlunxin/cpp/README.md
create mode 100644 deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/CMakeLists.txt
create mode 100644 deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/README.md
create mode 100755 deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
create mode 100644 deploy/fastdeploy/kunlunxin/cpp/infer.cc
create mode 100644 deploy/fastdeploy/kunlunxin/cpp/pptinypose_infer.cc
create mode 100644 deploy/fastdeploy/kunlunxin/python/README.md
create mode 100644 deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/README.md
create mode 100755 deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/det_keypoint_unite_infer.py
create mode 100755 deploy/fastdeploy/kunlunxin/python/infer.py
create mode 100644 deploy/fastdeploy/kunlunxin/python/pptinypose_infer.py
create mode 100644 deploy/fastdeploy/quantize/README.md
create mode 100644 deploy/fastdeploy/rockchip/rknpu2/README.md
create mode 100644 deploy/fastdeploy/rockchip/rknpu2/cpp/CMakeLists.txt
create mode 100644 deploy/fastdeploy/rockchip/rknpu2/cpp/README.md
create mode 100644 deploy/fastdeploy/rockchip/rknpu2/cpp/infer.cc
create mode 100644 deploy/fastdeploy/rockchip/rknpu2/python/README.md
create mode 100644 deploy/fastdeploy/rockchip/rknpu2/python/infer.py
create mode 100644 deploy/fastdeploy/rockchip/rv1126/README.md
create mode 100755 deploy/fastdeploy/rockchip/rv1126/cpp/CMakeLists.txt
create mode 100644 deploy/fastdeploy/rockchip/rv1126/cpp/README.md
create mode 100644 deploy/fastdeploy/rockchip/rv1126/cpp/infer.cc
create mode 100755 deploy/fastdeploy/rockchip/rv1126/cpp/run_with_adb.sh
create mode 100644 deploy/fastdeploy/serving/README.md
create mode 100644 deploy/fastdeploy/serving/models/postprocess/1/model.py
create mode 100644 deploy/fastdeploy/serving/models/postprocess/config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/postprocess/mask_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/ppdet/1/README.md
create mode 100644 deploy/fastdeploy/serving/models/ppdet/faster_rcnn_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/ppdet/mask_rcnn_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/ppdet/ppyolo_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/ppdet/ppyoloe_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/preprocess/1/model.py
create mode 100644 deploy/fastdeploy/serving/models/preprocess/config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/runtime/1/README.md
create mode 100644 deploy/fastdeploy/serving/models/runtime/faster_rcnn_runtime_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/runtime/mask_rcnn_runtime_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/runtime/ppyolo_runtime_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/models/runtime/ppyoloe_runtime_config.pbtxt
create mode 100644 deploy/fastdeploy/serving/paddledet_grpc_client.py
create mode 100644 deploy/fastdeploy/sophgo/README.md
create mode 100644 deploy/fastdeploy/sophgo/cpp/CMakeLists.txt
create mode 100644 deploy/fastdeploy/sophgo/cpp/README.md
create mode 100644 deploy/fastdeploy/sophgo/cpp/infer.cc
create mode 100644 deploy/fastdeploy/sophgo/python/README.md
create mode 100644 deploy/fastdeploy/sophgo/python/infer.py
diff --git a/deploy/fastdeploy/README.md b/deploy/fastdeploy/README.md
new file mode 100644
index 00000000000..10979ffc6b5
--- /dev/null
+++ b/deploy/fastdeploy/README.md
@@ -0,0 +1,93 @@
+# PaddleDetection高性能全场景模型部署方案—FastDeploy
+
+## 目录
+- [FastDeploy介绍](#FastDeploy介绍)
+- [PaddleDetection模型部署](#PaddleDetection模型部署)
+- [常见问题](#常见问题)
+
+## 1. FastDeploy介绍
+
+
+**[⚡️FastDeploy](https://github.com/PaddlePaddle/FastDeploy)**是一款**全场景**、**易用灵活**、**极致高效**的AI推理部署工具,支持**云边端**部署。使用FastDeploy可以简单高效的在X86 CPU、NVIDIA GPU、飞腾CPU、ARM CPU、Intel GPU、昆仑、昇腾、瑞芯微、晶晨、算能等10+款硬件上对PaddleDetection模型进行快速部署,并且支持Paddle Inference、Paddle Lite、TensorRT、OpenVINO、ONNXRuntime、RKNPU2、SOPHGO等多种推理后端。
+
+
+
+

+
+
+
+## 2. PaddleDetection模型部署
+
+
+### 2.1 硬件支持列表
+
+|硬件类型|该硬件是否支持|使用指南|Python|C++|
+|:---:|:---:|:---:|:---:|:---:|
+|X86 CPU|✅|[链接](./cpu-gpu)|✅|✅|
+|NVIDIA GPU|✅|[链接](./cpu-gpu)|✅|✅|
+|飞腾CPU|✅|[链接](./cpu-gpu)|✅|✅|
+|ARM CPU|✅|[链接](./cpu-gpu)|✅|✅|
+|Intel GPU(集成显卡)|✅|[链接](./cpu-gpu)|✅|✅|
+|Intel GPU(独立显卡)|✅|[链接](./cpu-gpu)|✅|✅|
+|昆仑|✅|[链接](./kunlunxin)|✅|✅|
+|昇腾|✅|[链接](./ascend)|✅|✅|
+|瑞芯微|✅|[链接](./rockchip)|✅|✅|
+|晶晨|✅|[链接](./amlogic)|-|✅|✅|
+|算能|✅|[链接](./sophgo)|✅|✅|
+
+### 2.2. 详细使用文档
+- X86 CPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- NVIDIA GPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- 飞腾CPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- ARM CPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- Intel GPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- 昆仑 XPU
+ - [部署模型准备](./kunlunxin)
+ - [Python部署示例](./kunlunxin/python/)
+ - [C++部署示例](./kunlunxin/cpp/)
+- 昇腾 Ascend
+ - [部署模型准备](./ascend)
+ - [Python部署示例](./ascend/python/)
+ - [C++部署示例](./ascend/cpp/)
+- 瑞芯微 Rockchip
+ - [部署模型准备](./rockchip/)
+ - [Python部署示例](./rockchip/rknpu2/)
+ - [C++部署示例](./rockchip/rknpu2/)
+- 晶晨 Amlogic
+ - [部署模型准备](./amlogic/a311d/)
+ - [C++部署示例](./amlogic/a311d/cpp/)
+- 算能 Sophgo
+ - [部署模型准备](./sophgo/)
+ - [Python部署示例](./sophgo/python/)
+ - [C++部署示例](./sophgo/cpp/)
+
+### 2.3 更多部署方式
+
+- [Android ARM CPU部署](https://github.com/PaddlePaddle/FastDeploy/tree/develop/java/android#Detection)
+- [服务化Serving部署](./serving)
+- [web部署](./web)
+- [模型自动化压缩工具](./quantize)
+
+
+## 3. 常见问题
+
+
+遇到问题可查看常见问题集合,搜索FastDeploy issue,*或给FastDeploy提交[issue](https://github.com/PaddlePaddle/FastDeploy/issues)*:
+
+[常见问题集合](https://github.com/PaddlePaddle/FastDeploy/tree/develop/docs/cn/faq)
+[FastDeploy issues](https://github.com/PaddlePaddle/FastDeploy/issues)
diff --git a/deploy/fastdeploy/amlogic/a311d/README.md b/deploy/fastdeploy/amlogic/a311d/README.md
new file mode 100644
index 00000000000..4af0750220a
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/README.md
@@ -0,0 +1,20 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection 检测模型在晶晨NPU上的部署方案部署方案—FastDeploy
+
+目前 FastDeploy 已经支持基于 Paddle Lite 部署 PP-YOLOE 量化模型到 A311D 上。
+
+## 1. 说明
+
+晶晨A311D是一款先进的AI应用处理器。PaddleDetection支持通过FastDeploy在A311D上基于Paddle-Lite部署相关检测模型。**注意**:需要注意的是,芯原(verisilicon)作为 IP 设计厂商,本身并不提供实体SoC产品,而是授权其 IP 给芯片厂商,如:晶晨(Amlogic),瑞芯微(Rockchip)等。因此本文是适用于被芯原授权了 NPU IP 的芯片产品。只要芯片产品没有大副修改芯原的底层库,则该芯片就可以使用本文档作为 Paddle Lite 推理部署的参考和教程。在本文中,晶晨 SoC 中的 NPU 和 瑞芯微 SoC 中的 NPU 统称为芯原 NPU。目前支持如下芯片的部署:
+- Amlogic A311D
+- Amlogic C308X
+- Amlogic S905D3
+
+模型的量化和量化模型的下载请参考:[模型量化](../quantize/README.md)
+
+## 2. 详细的部署示例
+
+在 A311D 上只支持 C++ 的部署。
+
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/deploy/fastdeploy/amlogic/a311d/cpp/CMakeLists.txt b/deploy/fastdeploy/amlogic/a311d/cpp/CMakeLists.txt
new file mode 100755
index 00000000000..af493f6b67d
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/cpp/CMakeLists.txt
@@ -0,0 +1,27 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+include_directories(${FastDeploy_INCLUDE_DIRS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+
+set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
+
+install(TARGETS infer_demo DESTINATION ./)
+
+install(DIRECTORY models DESTINATION ./)
+install(DIRECTORY images DESTINATION ./)
+
+file(GLOB_RECURSE FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/lib*.so*)
+file(GLOB_RECURSE ALL_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/lib*.so*)
+list(APPEND ALL_LIBS ${FASTDEPLOY_LIBS})
+install(PROGRAMS ${ALL_LIBS} DESTINATION lib)
+
+file(GLOB ADB_TOOLS run_with_adb.sh)
+install(PROGRAMS ${ADB_TOOLS} DESTINATION ./)
diff --git a/deploy/fastdeploy/amlogic/a311d/cpp/README.md b/deploy/fastdeploy/amlogic/a311d/cpp/README.md
new file mode 100755
index 00000000000..830c47e6d6d
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/cpp/README.md
@@ -0,0 +1,77 @@
+[English](README.md) | 简体中文
+# PaddleDetection A311D 量化模型 C++ 部署示例
+
+本目录下提供的 `infer.cc`,可以帮助用户快速完成 PP-YOLOE 量化模型在 A311D 上的部署推理加速。
+
+## 1. 部署环境准备
+软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 晶晨 A311d 编译文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 2. 部署模型准备
+1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
+2. 用户可以先使用 PaddleDetection 自行导出 Float32 模型,注意导出模型模型时设置参数:use_shared_conv=False,更多细节请参考:[PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe)
+3. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的检测模型仍然需要FP32模型文件夹下的 infer_cfg.yml 文件,自行量化的模型文件夹内不包含此 yaml 文件,用户从 FP32 模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。)
+4. 模型需要异构计算,异构计算文件可以参考:[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了模型,可以先测试我们提供的异构文件,验证精度是否符合要求。
+
+更多量化相关相关信息可查阅[模型量化](../../../quantize/README.md)
+
+## 3. 在 A311D 上部署量化后的 PP-YOLOE 检测模型
+请按照以下步骤完成在 A311D 上部署 PP-YOLOE 量化模型:
+
+1. 交叉编译编译 FastDeploy 库,具体请参考:[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md)
+
+2. 将编译后的库拷贝到当前目录,可使用如下命令:
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+cp -r FastDeploy/build/fastdeploy-timvx/ PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp
+```
+
+3. 在当前路径下载部署所需的模型和示例图片:
+```bash
+cd PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp
+mkdir models && mkdir images
+wget https://bj.bcebos.com/fastdeploy/models/ppyoloe_noshare_qat.tar.gz
+tar -xvf ppyoloe_noshare_qat.tar.gz
+cp -r ppyoloe_noshare_qat models
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+cp -r 000000014439.jpg images
+```
+
+4. 编译部署示例,可使入如下命令:
+```bash
+cd PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp
+mkdir build && cd build
+cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=arm64 ..
+make -j8
+make install
+# 成功编译之后,会生成 install 文件夹,里面有一个运行 demo 和部署所需的库
+```
+
+5. 基于 adb 工具部署 PP-YOLOE 检测模型到晶晨 A311D
+```bash
+# 进入 install 目录
+cd PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp/build/install/
+# 如下命令表示:bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
+bash run_with_adb.sh infer_demo ppyoloe_noshare_qat 000000014439.jpg $DEVICE_ID
+```
+
+部署成功后运行结果如下:
+
+
+
+需要特别注意的是,在 A311D 上部署的模型需要是量化后的模型,模型的量化请参考:[模型量化](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
+
+## 4. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+
+## 5. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/amlogic/a311d/cpp/infer.cc b/deploy/fastdeploy/amlogic/a311d/cpp/infer.cc
new file mode 100755
index 00000000000..c7b81f9f98e
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/cpp/infer.cc
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+ auto subgraph_file = model_dir + sep + "subgraph.txt";
+ fastdeploy::vision::EnableFlyCV();
+ fastdeploy::RuntimeOption option;
+ option.UseTimVX();
+ option.SetLiteSubgraphPartitionPath(subgraph_file);
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ assert(model.Initialized());
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout << "Usage: infer_demo path/to/quant_model "
+ "path/to/image "
+ "e.g ./infer_demo ./PPYOLOE_L_quant ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+
+ std::string model_dir = argv[1];
+ std::string test_image = argv[2];
+ InitAndInfer(model_dir, test_image);
+ return 0;
+}
diff --git a/deploy/fastdeploy/amlogic/a311d/cpp/run_with_adb.sh b/deploy/fastdeploy/amlogic/a311d/cpp/run_with_adb.sh
new file mode 100755
index 00000000000..dd7d7b47d2c
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/cpp/run_with_adb.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+HOST_SPACE=${PWD}
+echo ${HOST_SPACE}
+WORK_SPACE=/data/local/tmp/test
+
+# The first parameter represents the demo name
+DEMO_NAME=image_classification_demo
+if [ -n "$1" ]; then
+ DEMO_NAME=$1
+fi
+
+# The second parameter represents the model name
+MODEL_NAME=mobilenet_v1_fp32_224
+if [ -n "$2" ]; then
+ MODEL_NAME=$2
+fi
+
+# The third parameter indicates the name of the image to be tested
+IMAGE_NAME=0001.jpg
+if [ -n "$3" ]; then
+ IMAGE_NAME=$3
+fi
+
+# The fourth parameter represents the ID of the device
+ADB_DEVICE_NAME=
+if [ -n "$4" ]; then
+ ADB_DEVICE_NAME="-s $4"
+fi
+
+# Set the environment variables required during the running process
+EXPORT_ENVIRONMENT_VARIABLES="export GLOG_v=5; export SUBGRAPH_ONLINE_MODE=true; export RKNPU_LOGLEVEL=5; export RKNN_LOG_LEVEL=5; ulimit -c unlimited; export VIV_VX_ENABLE_GRAPH_TRANSFORM=-pcq:1; export VIV_VX_SET_PER_CHANNEL_ENTROPY=100; export TIMVX_BATCHNORM_FUSION_MAX_ALLOWED_QUANT_SCALE_DEVIATION=300000; export VSI_NN_LOG_LEVEL=5;"
+
+EXPORT_ENVIRONMENT_VARIABLES="${EXPORT_ENVIRONMENT_VARIABLES}export LD_LIBRARY_PATH=${WORK_SPACE}/lib:\$LD_LIBRARY_PATH;"
+
+# Please install adb, and DON'T run this in the docker.
+set -e
+adb $ADB_DEVICE_NAME shell "rm -rf $WORK_SPACE"
+adb $ADB_DEVICE_NAME shell "mkdir -p $WORK_SPACE"
+
+# Upload the demo, librarys, model and test images to the device
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/lib $WORK_SPACE
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/${DEMO_NAME} $WORK_SPACE
+adb $ADB_DEVICE_NAME push models $WORK_SPACE
+adb $ADB_DEVICE_NAME push images $WORK_SPACE
+
+# Execute the deployment demo
+adb $ADB_DEVICE_NAME shell "cd $WORK_SPACE; ${EXPORT_ENVIRONMENT_VARIABLES} chmod +x ./${DEMO_NAME}; ./${DEMO_NAME} ./models/${MODEL_NAME} ./images/$IMAGE_NAME"
diff --git a/deploy/fastdeploy/ascend/README.md b/deploy/fastdeploy/ascend/README.md
new file mode 100644
index 00000000000..b3874224d80
--- /dev/null
+++ b/deploy/fastdeploy/ascend/README.md
@@ -0,0 +1,87 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection检测模型在华为昇腾上的部署方案—FastDeploy
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在华为昇腾上快速部署检测模型
+
+## 2. 使用预导出的模型列表
+为了方便开发者的测试,下面提供了PaddleDetection导出的各系列模型,开发者可直接下载使用。其中精度指标来源于PaddleDetection中对各模型的介绍,详情各参考PaddleDetection中的说明。
+
+| 模型 | 参数大小 | 精度 | 备注 |
+|:---------------------------------------------------------------- |:----- |:----- | :------ |
+| [picodet_l_320_coco_lcnet](https://bj.bcebos.com/paddlehub/fastdeploy/picodet_l_320_coco_lcnet.tgz) |23MB | Box AP 42.6% |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz) |200MB | Box AP 51.4% |
+| [ppyoloe_plus_crn_m_80e_coco](https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz) |83.3MB | Box AP 49.8% |
+| [ppyolo_r50vd_dcn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolo_r50vd_dcn_1x_coco.tgz) | 180MB | Box AP 44.8% | 暂不支持TensorRT |
+| [ppyolov2_r101vd_dcn_365e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolov2_r101vd_dcn_365e_coco.tgz) | 282MB | Box AP 49.7% | 暂不支持TensorRT |
+| [yolov3_darknet53_270e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov3_darknet53_270e_coco.tgz) |237MB | Box AP 39.1% | |
+| [yolox_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolox_s_300e_coco.tgz) | 35MB | Box AP 40.4% | |
+| [faster_rcnn_r50_vd_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz) | 160MB | Box AP 40.8%| 暂不支持TensorRT |
+| [mask_rcnn_r50_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/mask_rcnn_r50_1x_coco.tgz) | 128M | Box AP 37.4%, Mask AP 32.8%| 暂不支持TensorRT、ORT |
+| [ssd_mobilenet_v1_300_120e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_mobilenet_v1_300_120e_voc.tgz) | 24.9M | Box AP 73.8%| 暂不支持TensorRT、ORT |
+| [ssd_vgg16_300_240e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_vgg16_300_240e_voc.tgz) | 106.5M | Box AP 77.8%| 暂不支持TensorRT、ORT |
+| [ssdlite_mobilenet_v1_300_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ssdlite_mobilenet_v1_300_coco.tgz) | 29.1M | | 暂不支持TensorRT、ORT |
+| [rtmdet_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_l_300e_coco.tgz) | 224M | Box AP 51.2%| |
+| [rtmdet_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_s_300e_coco.tgz) | 42M | Box AP 44.5%| |
+| [yolov5_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_l_300e_coco.tgz) | 183M | Box AP 48.9%| |
+| [yolov5_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_s_300e_coco.tgz) | 31M | Box AP 37.6%| |
+| [yolov6_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_l_300e_coco.tgz) | 229M | Box AP 51.0%| |
+| [yolov6_s_400e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_s_400e_coco.tgz) | 68M | Box AP 43.4%| |
+| [yolov7_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_l_300e_coco.tgz) | 145M | Box AP 51.0%| |
+| [yolov7_x_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_x_300e_coco.tgz) | 277M | Box AP 53.0%| |
+| [cascade_rcnn_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_fpn_1x_coco.tgz) | 271M | Box AP 41.1%| 暂不支持TensorRT、ORT |
+| [cascade_rcnn_r50_vd_fpn_ssld_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.tgz) | 271M | Box AP 45.0%| 暂不支持TensorRT、ORT |
+| [faster_rcnn_enhance_3x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_enhance_3x_coco.tgz) | 119M | Box AP 41.5%| 暂不支持TensorRT、ORT |
+| [fcos_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/fcos_r50_fpn_1x_coco.tgz) | 129M | Box AP 39.6%| 暂不支持TensorRT |
+| [gfl_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/gfl_r50_fpn_1x_coco.tgz) | 128M | Box AP 41.0%| 暂不支持TensorRT |
+| [ppyoloe_crn_l_80e_sliced_visdrone_640_025](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_80e_sliced_visdrone_640_025.tgz) | 200M | Box AP 31.9%| |
+| [retinanet_r101_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r101_fpn_2x_coco.tgz) | 210M | Box AP 40.6%| 暂不支持TensorRT、ORT |
+| [retinanet_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r50_fpn_1x_coco.tgz) | 136M | Box AP 37.5%| 暂不支持TensorRT、ORT |
+| [tood_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/tood_r50_fpn_1x_coco.tgz) | 130M | Box AP 42.5%| 暂不支持TensorRT、ORT |
+| [ttfnet_darknet53_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ttfnet_darknet53_1x_coco.tgz) | 178M | Box AP 33.5%| 暂不支持TensorRT、ORT |
+| [yolov8_x_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_x_500e_coco.tgz) | 265M | Box AP 53.8%
+| [yolov8_l_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_l_500e_coco.tgz) | 173M | Box AP 52.8%
+| [yolov8_m_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_m_500e_coco.tgz) | 99M | Box AP 50.2%
+| [yolov8_s_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_s_500e_coco.tgz) | 43M | Box AP 44.9%
+| [yolov8_n_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_n_500e_coco.tgz) | 13M | Box AP 37.3%
+
+
+## 3. 自行导出PaddleDetection部署模型
+### 3.1 模型版本
+支持[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)大于等于2.4版本的PaddleDetection模型部署。目前FastDeploy测试过成功部署的模型:
+
+- [PP-YOLOE(含PP-YOLOE+)系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
+- [PicoDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/picodet)
+- [PP-YOLO系列模型(含v2)](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyolo)
+- [YOLOv3系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolov3)
+- [YOLOX系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolox)
+- [FasterRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/faster_rcnn)
+- [MaskRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/mask_rcnn)
+- [SSD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ssd)
+- [YOLOv5系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov5)
+- [YOLOv6系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov6)
+- [YOLOv7系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov7)
+- [YOLOv8系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov8)
+- [RTMDet系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/rtmdet)
+- [CascadeRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/cascade_rcnn)
+- [PSSDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rcnn_enhance)
+- [RetinaNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/retinanet)
+- [PPYOLOESOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/smalldet)
+- [FCOS系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/fcos)
+- [TTFNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ttfnet)
+- [TOOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/tood)
+- [GFL系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/gfl)
+
+### 3.2 模型导出
+PaddleDetection模型导出,请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/deploy/EXPORT_MODEL.md),**注意**:PaddleDetection导出的模型包含`model.pdmodel`、`model.pdiparams`和`infer_cfg.yml`三个文件,FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+### 3.3 导出须知
+如果您是自行导出PaddleDetection推理模型,请注意以下问题:
+- 在导出模型时不要进行NMS的去除操作,正常导出即可
+- 如果用于跑原生TensorRT后端(非Paddle Inference后端),不要添加--trt参数
+- 导出模型时,不要添加`fuse_normalize=True`参数
+
+## 4. 详细的部署示例
+- [Python部署](python)
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/deploy/fastdeploy/ascend/cpp/CMakeLists.txt b/deploy/fastdeploy/ascend/cpp/CMakeLists.txt
new file mode 100644
index 00000000000..776d832f915
--- /dev/null
+++ b/deploy/fastdeploy/ascend/cpp/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/ascend/cpp/README.md b/deploy/fastdeploy/ascend/cpp/README.md
new file mode 100644
index 00000000000..62029d903c3
--- /dev/null
+++ b/deploy/fastdeploy/ascend/cpp/README.md
@@ -0,0 +1,52 @@
+[English](README.md) | 简体中文
+# PaddleDetection Ascend C++部署示例
+
+本目录下提供`infer.cc`快速完成PPYOLOE在华为昇腾上部署的示例。
+
+## 1. 部署环境准备
+在部署前,需自行编译基于华为昇腾NPU的预测库,参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 3. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试。
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/cpp/ascend/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+mkdir build
+cd build
+# 使用编译完成的FastDeploy库编译infer_demo
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-ascend
+make -j
+
+# 下载模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 华为昇腾推理
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+
+## 4. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+
+## 5. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/ascend/cpp/infer.cc b/deploy/fastdeploy/ascend/cpp/infer.cc
new file mode 100644
index 00000000000..c7394921b3e
--- /dev/null
+++ b/deploy/fastdeploy/ascend/cpp/infer.cc
@@ -0,0 +1,62 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void AscendInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "deploy.yaml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseAscend();
+ auto model = fastdeploy::vision::detection::PPYOLOE(
+ model_file, params_file, config_file, option);
+
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+ "e.g ./infer_model ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+
+ AscendInfer(argv[1], argv[2]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/ascend/python/README.md b/deploy/fastdeploy/ascend/python/README.md
new file mode 100644
index 00000000000..2b88148a591
--- /dev/null
+++ b/deploy/fastdeploy/ascend/python/README.md
@@ -0,0 +1,44 @@
+[English](README.md) | 简体中文
+# PaddleDetection Ascend Python部署示例
+
+本目录下提供`infer.py`快速完成PPYOLOE在华为昇腾上部署的示例。
+
+## 1. 部署环境准备
+在部署前,需自行编译基于华为昇腾NPU的FastDeploy python wheel包并安装,参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 3. 运行部署示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/ascend/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 华为昇腾推理
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+## 4. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [C++部署](../cpp)
+
+## 5. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/ascend/python/infer.py b/deploy/fastdeploy/ascend/python/infer.py
new file mode 100755
index 00000000000..46cb50129dd
--- /dev/null
+++ b/deploy/fastdeploy/ascend/python/infer.py
@@ -0,0 +1,46 @@
+import cv2
+import os
+
+import fastdeploy as fd
+
+
+def parse_arguments():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir", required=True, help="Path of PaddleDetection model.")
+ parser.add_argument(
+ "--image_file", type=str, required=True, help="Path of test image file.")
+ return parser.parse_args()
+
+args = parse_arguments()
+
+runtime_option = fd.RuntimeOption()
+runtime_option.use_ascend()
+
+if args.model_dir is None:
+ model_dir = fd.download_model(name='ppyoloe_crn_l_300e_coco')
+else:
+ model_dir = args.model_dir
+
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "infer_cfg.yml")
+
+# settting for runtime
+model = fd.vision.detection.PPYOLOE(
+ model_file, params_file, config_file, runtime_option=runtime_option)
+
+# predict
+if args.image_file is None:
+ image_file = fd.utils.get_detection_test_image()
+else:
+ image_file = args.image_file
+im = cv2.imread(image_file)
+result = model.predict(im)
+print(result)
+
+# visualize
+vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/cpu-gpu/README.md b/deploy/fastdeploy/cpu-gpu/README.md
new file mode 100644
index 00000000000..8485fb37afc
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/README.md
@@ -0,0 +1,97 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection检测模型在CPU-GPU上的部署方案—FastDeploy
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署检测模型
+
+## 2. 使用预导出的模型列表
+为了方便开发者的测试,下面提供了PaddleDetection导出的各系列模型,开发者可直接下载使用。其中精度指标来源于PaddleDetection中对各模型的介绍,详情各参考PaddleDetection中的说明。
+
+### 2.1 目标检测及实例分割模型
+| 模型 | 参数大小 | 精度 | 备注 |
+|:---------------------------------------------------------------- |:----- |:----- | :------ |
+| [picodet_l_320_coco_lcnet](https://bj.bcebos.com/paddlehub/fastdeploy/picodet_l_320_coco_lcnet.tgz) |23MB | Box AP 42.6% |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz) |200MB | Box AP 51.4% |
+| [ppyoloe_plus_crn_m_80e_coco](https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz) |83.3MB | Box AP 49.8% |
+| [ppyolo_r50vd_dcn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolo_r50vd_dcn_1x_coco.tgz) | 180MB | Box AP 44.8% | 暂不支持TensorRT |
+| [ppyolov2_r101vd_dcn_365e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolov2_r101vd_dcn_365e_coco.tgz) | 282MB | Box AP 49.7% | 暂不支持TensorRT |
+| [yolov3_darknet53_270e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov3_darknet53_270e_coco.tgz) |237MB | Box AP 39.1% | |
+| [yolox_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolox_s_300e_coco.tgz) | 35MB | Box AP 40.4% | |
+| [faster_rcnn_r50_vd_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz) | 160MB | Box AP 40.8%| 暂不支持TensorRT |
+| [mask_rcnn_r50_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/mask_rcnn_r50_1x_coco.tgz) | 128M | Box AP 37.4%, Mask AP 32.8%| 暂不支持TensorRT、ORT |
+| [ssd_mobilenet_v1_300_120e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_mobilenet_v1_300_120e_voc.tgz) | 24.9M | Box AP 73.8%| 暂不支持TensorRT、ORT |
+| [ssd_vgg16_300_240e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_vgg16_300_240e_voc.tgz) | 106.5M | Box AP 77.8%| 暂不支持TensorRT、ORT |
+| [ssdlite_mobilenet_v1_300_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ssdlite_mobilenet_v1_300_coco.tgz) | 29.1M | | 暂不支持TensorRT、ORT |
+| [rtmdet_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_l_300e_coco.tgz) | 224M | Box AP 51.2%| |
+| [rtmdet_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_s_300e_coco.tgz) | 42M | Box AP 44.5%| |
+| [yolov5_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_l_300e_coco.tgz) | 183M | Box AP 48.9%| |
+| [yolov5_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_s_300e_coco.tgz) | 31M | Box AP 37.6%| |
+| [yolov6_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_l_300e_coco.tgz) | 229M | Box AP 51.0%| |
+| [yolov6_s_400e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_s_400e_coco.tgz) | 68M | Box AP 43.4%| |
+| [yolov7_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_l_300e_coco.tgz) | 145M | Box AP 51.0%| |
+| [yolov7_x_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_x_300e_coco.tgz) | 277M | Box AP 53.0%| |
+| [cascade_rcnn_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_fpn_1x_coco.tgz) | 271M | Box AP 41.1%| 暂不支持TensorRT、ORT |
+| [cascade_rcnn_r50_vd_fpn_ssld_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.tgz) | 271M | Box AP 45.0%| 暂不支持TensorRT、ORT |
+| [faster_rcnn_enhance_3x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_enhance_3x_coco.tgz) | 119M | Box AP 41.5%| 暂不支持TensorRT、ORT |
+| [fcos_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/fcos_r50_fpn_1x_coco.tgz) | 129M | Box AP 39.6%| 暂不支持TensorRT |
+| [gfl_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/gfl_r50_fpn_1x_coco.tgz) | 128M | Box AP 41.0%| 暂不支持TensorRT |
+| [ppyoloe_crn_l_80e_sliced_visdrone_640_025](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_80e_sliced_visdrone_640_025.tgz) | 200M | Box AP 31.9%| |
+| [retinanet_r101_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r101_fpn_2x_coco.tgz) | 210M | Box AP 40.6%| 暂不支持TensorRT、ORT |
+| [retinanet_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r50_fpn_1x_coco.tgz) | 136M | Box AP 37.5%| 暂不支持TensorRT、ORT |
+| [tood_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/tood_r50_fpn_1x_coco.tgz) | 130M | Box AP 42.5%| 暂不支持TensorRT、ORT |
+| [ttfnet_darknet53_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ttfnet_darknet53_1x_coco.tgz) | 178M | Box AP 33.5%| 暂不支持TensorRT、ORT |
+| [yolov8_x_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_x_500e_coco.tgz) | 265M | Box AP 53.8%
+| [yolov8_l_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_l_500e_coco.tgz) | 173M | Box AP 52.8%
+| [yolov8_m_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_m_500e_coco.tgz) | 99M | Box AP 50.2%
+| [yolov8_s_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_s_500e_coco.tgz) | 43M | Box AP 44.9%
+| [yolov8_n_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_n_500e_coco.tgz) | 13M | Box AP 37.3%
+
+### 2.2 关键点检测模型
+| 模型 | 说明 | 模型格式 | 版本 |
+| :--- | :--- | :------- | :--- |
+| [PP-TinyPose-128x96](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_128x96_infer.tgz) | 单人关键点检测模型 | Paddle | [Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PP-TinyPose-256x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz) | 单人关键点检测模型 | Paddle | [Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PicoDet-S-Lcnet-Pedestrian-192x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_192x192_infer.tgz) + [PP-TinyPose-128x96](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_128x96_infer.tgz) | 单人关键点检测串联配置 | Paddle |[Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PicoDet-S-Lcnet-Pedestrian-320x320](https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz) + [PP-TinyPose-256x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz) | 多人关键点检测串联配置 | Paddle |[Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+
+
+## 3. 自行导出PaddleDetection部署模型
+### 3.1 模型版本
+支持[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)大于等于2.4版本的PaddleDetection模型部署。目前FastDeploy测试过成功部署的模型:
+
+- [PP-YOLOE(含PP-YOLOE+)系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
+- [PicoDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/picodet)
+- [PP-YOLO系列模型(含v2)](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyolo)
+- [YOLOv3系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolov3)
+- [YOLOX系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolox)
+- [FasterRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/faster_rcnn)
+- [MaskRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/mask_rcnn)
+- [SSD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ssd)
+- [YOLOv5系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov5)
+- [YOLOv6系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov6)
+- [YOLOv7系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov7)
+- [YOLOv8系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov8)
+- [RTMDet系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/rtmdet)
+- [CascadeRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/cascade_rcnn)
+- [PSSDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rcnn_enhance)
+- [RetinaNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/retinanet)
+- [PPYOLOESOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/smalldet)
+- [FCOS系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/fcos)
+- [TTFNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ttfnet)
+- [TOOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/tood)
+- [GFL系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/gfl)
+- [PP-PicoDet + PP-TinyPose系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose/README.md)
+
+### 3.2 模型导出
+PaddleDetection模型导出,请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/deploy/EXPORT_MODEL.md),**注意**:PaddleDetection导出的模型包含`model.pdmodel`、`model.pdiparams`和`infer_cfg.yml`三个文件,FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+### 3.3 导出须知
+如果您是自行导出PaddleDetection推理模型,请注意以下问题:
+- 在导出模型时不要进行NMS的去除操作,正常导出即可
+- 如果用于跑原生TensorRT后端(非Paddle Inference后端),不要添加--trt参数
+- 导出模型时,不要添加`fuse_normalize=True`参数
+
+## 4. 详细的部署示例
+- [Python部署](python)
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/CMakeLists.txt b/deploy/fastdeploy/cpu-gpu/cpp/CMakeLists.txt
new file mode 100644
index 00000000000..d2f10da4f95
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/CMakeLists.txt
@@ -0,0 +1,13 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+add_executable(infer_tinypose_demo ${PROJECT_SOURCE_DIR}/pptinypose_infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+target_link_libraries(infer_tinypose_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/README.md b/deploy/fastdeploy/cpu-gpu/cpp/README.md
new file mode 100644
index 00000000000..01d68dd14aa
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/README.md
@@ -0,0 +1,142 @@
+[English](README.md) | 简体中文
+# PaddleDetection CPU-GPU C++部署示例
+
+本目录下提供`infer.cc`快速完成PPYOLOE模型包括PPYOLOE在CPU/GPU,以及GPU上通过Paddle-TensorRT加速部署的示例。
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署PaddleDetection模型。FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+## 2. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 3. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 4. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+### 4.1 目标检测示例
+```bash
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-gpu-x.x.x.tgz
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build && cd build
+mv ../fastdeploy-linux-x64-gpu-x.x.x .
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-gpu-x.x.x
+make -j
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 运行部署示例
+# CPU推理
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 0
+# GPU推理
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
+# GPU上Paddle-TensorRT推理(注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 2
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+### 4.2 关键点检测示例
+```bash
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-gpu-x.x.x.tgz
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build && cd build
+mv ../fastdeploy-linux-x64-gpu-x.x.x .
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-gpu-x.x.x
+make -j
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# 运行部署示例
+# CPU推理
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg 0
+# GPU推理
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg 1
+# GPU上Paddle-TensorRT推理(注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg 2
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+关于如何进行多人关键点检测,请参考[PPTinyPose Pipeline示例](./det_keypoint_unite/)
+
+- 注意,以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 5. PaddleDetection C++接口
+FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+### 5.1 目标检测及实例分割模型
+```c++
+fastdeploy::vision::detection::PicoDet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::SOLOv2(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLOE(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLO(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::YOLOv3(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOX(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::FasterRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::MaskRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::SSD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv5(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv6(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv7(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv8(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::CascadeRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PSSDet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::RetinaNet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLOESOD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::FCOS(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::TOOD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::GFL(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+```
+
+### 5.2 关键点检测模型
+```C++
+fastdeploy::vision::keypointdetection::PPTinyPose(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+```
+
+PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件
+
+## 6. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+
+## 7. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
+
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/CMakeLists.txt b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/CMakeLists.txt
new file mode 100644
index 00000000000..71cbaa0fde1
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/det_keypoint_unite_infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/README.md b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/README.md
new file mode 100644
index 00000000000..66520a4e525
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/README.md
@@ -0,0 +1,74 @@
+[English](README.md) | 简体中文
+# PP-PicoDet + PP-TinyPose (Pipeline) CPU-GPU C++部署示例
+
+本目录下提供`det_keypoint_unite_infer.cc`快速完成多人模型配置 PP-PicoDet + PP-TinyPose 在CPU/GPU,以及GPU上通过TensorRT加速部署的`单图多人关键点检测`示例。执行如下脚本即可完成。**注意**: PP-TinyPose单模型独立部署,请参考[PP-TinyPose 单模型](../README.md)
+
+## 1. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../../README.md)或者[自行导出PaddleDetection部署模型](../../README.md)。
+
+## 3. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose和PP-PicoDet模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+tar -xvf PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/000000018491.jpg
+
+# CPU推理
+./infer_demo PP_PicoDet_V2_S_Pedestrian_320x320_infer PP_TinyPose_256x192_infer 000000018491.jpg 0
+# GPU推理
+./infer_demo PP_PicoDet_V2_S_Pedestrian_320x320_infer PP_TinyPose_256x192_infer 000000018491.jpg 1
+# GPU上Paddle-TensorRT推理(注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+./infer_demo PP_PicoDet_V2_S_Pedestrian_320x320_infer PP_TinyPose_256x192_infer 000000018491.jpg 2
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+- 注意,以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 4. PP-TinyPose 模型串联 C++ 接口
+
+```c++
+fastdeploy::pipeline::PPTinyPose(
+ fastdeploy::vision::detection::PicoDet* det_model,
+ fastdeploy::vision::keypointdetection::PPTinyPose* pptinypose_model)
+```
+
+PPTinyPose Pipeline模型加载和初始化。det_model表示初始化后的检测模型,pptinypose_model表示初始化后的关键点检测模型。
+
+
+## 5. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../../)
+- [Python部署](../../python/det_keypoint_unite/)
+
+## 6. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
new file mode 100755
index 00000000000..1b8b13120c6
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
@@ -0,0 +1,205 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#include "fastdeploy/pipeline.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& det_model_dir,
+ const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto det_model_file = det_model_dir + sep + "model.pdmodel";
+ auto det_params_file = det_model_dir + sep + "model.pdiparams";
+ auto det_config_file = det_model_dir + sep + "infer_cfg.yml";
+ auto det_model = fastdeploy::vision::detection::PicoDet(
+ det_model_file, det_params_file, det_config_file);
+ if (!det_model.Initialized()) {
+ std::cerr << "Detection Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+
+ auto pipeline =fastdeploy::pipeline::PPTinyPose(&det_model, &tinypose_model);
+ pipeline.detection_model_score_threshold = 0.5;
+ if (!pipeline.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.2);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "TinyPose visualized result saved in ./vis_result.jpg"
+ << std::endl;
+}
+
+void GpuInfer(const std::string& det_model_dir,
+ const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto option = fastdeploy::RuntimeOption();
+ option.UseGpu();
+ auto det_model_file = det_model_dir + sep + "model.pdmodel";
+ auto det_params_file = det_model_dir + sep + "model.pdiparams";
+ auto det_config_file = det_model_dir + sep + "infer_cfg.yml";
+ auto det_model = fastdeploy::vision::detection::PicoDet(
+ det_model_file, det_params_file, det_config_file, option);
+ if (!det_model.Initialized()) {
+ std::cerr << "Detection Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+
+ auto pipeline =
+ fastdeploy::pipeline::PPTinyPose(
+ &det_model, &tinypose_model);
+ pipeline.detection_model_score_threshold = 0.5;
+ if (!pipeline.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.2);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "TinyPose visualized result saved in ./vis_result.jpg"
+ << std::endl;
+}
+
+void TrtInfer(const std::string& det_model_dir,
+ const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto det_model_file = det_model_dir + sep + "model.pdmodel";
+ auto det_params_file = det_model_dir + sep + "model.pdiparams";
+ auto det_config_file = det_model_dir + sep + "infer_cfg.yml";
+
+ auto det_option = fastdeploy::RuntimeOption();
+ det_option.UseGpu();
+ det_option.UsePaddleInferBackend();
+ // If use original Tensorrt, not Paddle-TensorRT,
+ // please try `option.UseTrtBackend()`
+ det_option.paddle_infer_option.enable_trt = true;
+ det_option.paddle_infer_option.collect_trt_shape = true;
+ det_option.trt_option.SetShape("image", {1, 3, 320, 320}, {1, 3, 320, 320},
+ {1, 3, 320, 320});
+ det_option.trt_option.SetShape("scale_factor", {1, 2}, {1, 2}, {1, 2});
+ auto det_model = fastdeploy::vision::detection::PicoDet(
+ det_model_file, det_params_file, det_config_file, det_option);
+ if (!det_model.Initialized()) {
+ std::cerr << "Detection Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_option = fastdeploy::RuntimeOption();
+
+ tinypose_option.UseGpu();
+ tinypose_option.UsePaddleInferBackend();
+ // If use original Tensorrt, not Paddle-TensorRT,
+ // please try `option.UseTrtBackend()`
+ tinypose_option.paddle_infer_option.enable_trt = true;
+ tinypose_option.paddle_infer_option.collect_trt_shape = true;
+ tinypose_option.trt_option.SetShape("image", {1, 3, 256, 192}, {1, 3, 256, 192},
+ {1, 3, 256, 192});
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file,
+ tinypose_option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+
+ auto pipeline =
+ fastdeploy::pipeline::PPTinyPose(
+ &det_model, &tinypose_model);
+ pipeline.detection_model_score_threshold = 0.5;
+ if (!pipeline.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.2);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "TinyPose visualized result saved in ./vis_result.jpg"
+ << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 5) {
+ std::cout << "Usage: infer_demo path/to/detection_model_dir "
+ "path/to/pptinypose_model_dir path/to/image run_option, "
+ "e.g ./infer_model ./picodet_model_dir ./pptinypose_model_dir "
+ "./test.jpeg 0"
+ << std::endl;
+ std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+ "with gpu; 2: run with gpu and use tensorrt backend;"
+ << std::endl;
+ return -1;
+ }
+
+ if (std::atoi(argv[4]) == 0) {
+ CpuInfer(argv[1], argv[2], argv[3]);
+ } else if (std::atoi(argv[4]) == 1) {
+ GpuInfer(argv[1], argv[2], argv[3]);
+ } else if (std::atoi(argv[4]) == 2) {
+ TrtInfer(argv[1], argv[2], argv[3]);
+ }
+ return 0;
+}
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/infer.cc b/deploy/fastdeploy/cpu-gpu/cpp/infer.cc
new file mode 100644
index 00000000000..88042f5e083
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/infer.cc
@@ -0,0 +1,134 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseCpu();
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void GpuInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+
+ auto option = fastdeploy::RuntimeOption();
+ option.UseGpu();
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void TrtInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+
+ auto option = fastdeploy::RuntimeOption();
+ option.UseGpu();
+ option.UsePaddleInferBackend();
+ // If use original Tensorrt, not Paddle-TensorRT,
+ // please try `option.UseTrtBackend()`
+ option.paddle_infer_option.enable_trt = true;
+ option.paddle_infer_option.collect_trt_shape = true;
+ option.trt_option.SetShape("image", {1, 3, 640, 640}, {1, 3, 640, 640},
+ {1, 3, 640, 640});
+ option.trt_option.SetShape("scale_factor", {1, 2}, {1, 2}, {1, 2});
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 4) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+ "e.g ./infer_demo ./ppyoloe_model_dir ./test.jpeg 0"
+ << std::endl;
+ std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+ "with gpu; 2: run with gpu and use tensorrt backend"
+ << std::endl;
+ return -1;
+ }
+
+ if (std::atoi(argv[3]) == 0) {
+ CpuInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 1) {
+ GpuInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 2) {
+ TrtInfer(argv[1], argv[2]);
+ }
+ return 0;
+}
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/pptinypose_infer.cc b/deploy/fastdeploy/cpu-gpu/cpp/pptinypose_infer.cc
new file mode 100644
index 00000000000..0a56334f7f4
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/pptinypose_infer.cc
@@ -0,0 +1,149 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseCpu();
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+ if (!tinypose_model.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto tinypose_vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+ cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+ std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+ << std::endl;
+}
+
+void GpuInfer(const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto option = fastdeploy::RuntimeOption();
+ option.UseGpu();
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+ if (!tinypose_model.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto tinypose_vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+ cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+ std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+ << std::endl;
+}
+
+void TrtInfer(const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_option = fastdeploy::RuntimeOption();
+ tinypose_option.UseGpu();
+ tinypose_option.UsePaddleInferBackend();
+ // If use original Tensorrt, not Paddle-TensorRT,
+ // please try `option.UseTrtBackend()`
+ tinypose_option.paddle_infer_option.enable_trt = true;
+ tinypose_option.paddle_infer_option.collect_trt_shape = true;
+ tinypose_option.trt_option.SetShape("image", {1, 3, 256, 192}, {1, 3, 256, 192},
+ {1, 3, 256, 192});
+
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file,
+ tinypose_option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+ if (!tinypose_model.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto tinypose_vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+ cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+ std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+ << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 4) {
+ std::cout << "Usage: infer_demo path/to/pptinypose_model_dir path/to/image "
+ "run_option, "
+ "e.g ./infer_demo ./pptinypose_model_dir ./test.jpeg 0"
+ << std::endl;
+ std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+ "with gpu; 2: run with gpu and use tensorrt backend;"
+ << std::endl;
+ return -1;
+ }
+
+ if (std::atoi(argv[3]) == 0) {
+ CpuInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 1) {
+ GpuInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 2) {
+ TrtInfer(argv[1], argv[2]);
+ }
+ return 0;
+}
diff --git a/deploy/fastdeploy/cpu-gpu/python/README.md b/deploy/fastdeploy/cpu-gpu/python/README.md
new file mode 100644
index 00000000000..acadce22a8f
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/README.md
@@ -0,0 +1,126 @@
+[English](README.md) | 简体中文
+# PaddleDetection CPU-GPU Python部署示例
+
+本目录下提供`infer.py`快速完成PPYOLOE模型包括PPYOLOE在CPU/GPU,以及GPU上通过Paddle-TensorRT加速部署的示例。
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署PaddleDetection模型。FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+## 2. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 3. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 4. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+### 4.1 目标检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 运行部署示例
+# CPU推理
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg --device cpu
+# GPU推理
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg --device gpu
+# GPU上Paddle-TensorRT推理 (注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+### 4.2 关键点检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# 运行部署示例
+# CPU推理
+python pptinypose_infer.py --model_dir PP_TinyPose_256x192_infer --image_file hrnet_demo.jpg --device cpu
+# GPU推理
+python pptinypose_infer.py --model_dir PP_TinyPose_256x192_infer --image_file hrnet_demo.jpg --device gpu
+# GPU上Paddle-TensorRT推理 (注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+python pptinypose_infer.py --model_dir PP_TinyPose_256x192_infer --image_file hrnet_demo.jpg --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+关于如何进行多人关键点检测,请参考[PPTinyPose Pipeline示例](./det_keypoint_unite/)
+
+## 5. 部署示例选项说明
+
+|参数|含义|默认值
+|---|---|---|
+|--model_dir|指定模型文件夹所在的路径|None|
+|--image_file|指定测试图片所在的路径|None|
+|--device|指定即将运行的硬件类型,支持的值为`[cpu, gpu]`,当设置为cpu时,可运行在x86 cpu/arm cpu等cpu上|cpu|
+|--use_trt|是否使用trt,该项只在device为gpu时有效|False|
+
+## 6. PaddleDetection Python接口
+FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+### 6.1 目标检测及实例分割模型
+```python
+fastdeploy.vision.detection.PPYOLOE(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PicoDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOX(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.YOLOv3(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PPYOLO(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.FasterRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.MaskRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.SSD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv5(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv6(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv7(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.RTMDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.CascadeRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PSSDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.RetinaNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PPYOLOESOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.FCOS(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.TTFNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.TOOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.GFL(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+### 6.2 关键点检测模型
+```python
+fd.vision.keypointdetection.PPTinyPose(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件
+
+## 7. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [C++部署](../cpp)
+
+## 8. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/README.md b/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/README.md
new file mode 100644
index 00000000000..257188b6ece
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/README.md
@@ -0,0 +1,70 @@
+[English](README.md) | 简体中文
+# PP-PicoDet + PP-TinyPose (Pipeline) CPU-GPU Python部署示例
+
+本目录下提供`det_keypoint_unite_infer.py`快速完成多人模型配置 PP-PicoDet + PP-TinyPose 在CPU/GPU,以及GPU上通过TensorRT加速部署的`单图多人关键点检测`示例。执行如下脚本即可完成.**注意**: PP-TinyPose单模型独立部署,请参考[PP-TinyPose 单模型](../README.md)
+
+## 1. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../../README.md)或者[自行导出PaddleDetection部署模型](../../README.md)。
+
+## 3. 运行部署示例
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+tar -xvf PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/000000018491.jpg
+# CPU推理
+python det_keypoint_unite_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --det_model_dir PP_PicoDet_V2_S_Pedestrian_320x320_infer --image_file 000000018491.jpg --device cpu
+# GPU推理
+python det_keypoint_unite_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --det_model_dir PP_PicoDet_V2_S_Pedestrian_320x320_infer --image_file 000000018491.jpg --device gpu
+# GPU上Paddle-TensorRT推理(注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+python det_keypoint_unite_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --det_model_dir PP_PicoDet_V2_S_Pedestrian_320x320_infer --image_file 000000018491.jpg --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 4. 部署示例选项说明
+
+|参数|含义|默认值
+|---|---|---|
+|--tinypose_model_dir|指定关键点模型文件夹所在的路径|None|
+|--det_model_dir|指定目标模型文件夹所在的路径|None|
+|--image_file|指定测试图片所在的路径|None|
+|--device|指定即将运行的硬件类型,支持的值为`[cpu, gpu]`,当设置为cpu时,可运行在x86 cpu/arm cpu等cpu上|cpu|
+|--use_trt|是否使用trt,该项只在device为gpu时有效|False|
+
+## 5. PPTinyPose 模型串联 Python接口
+
+```python
+fd.pipeline.PPTinyPose(det_model=None, pptinypose_model=None)
+```
+
+PPTinyPose Pipeline 模型加载和初始化,其中det_model是使用`fd.vision.detection.PicoDet`初始化的检测模型,pptinypose_model是使用`fd.vision.keypointdetection.PPTinyPose`初始化的关键点检测模型。
+
+## 6. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../../)
+- [C++部署](../../cpp/)
+
+## 7. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/det_keypoint_unite_infer.py b/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/det_keypoint_unite_infer.py
new file mode 100755
index 00000000000..6873ed867b9
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/det_keypoint_unite_infer.py
@@ -0,0 +1,101 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--tinypose_model_dir",
+ required=True,
+ help="path of paddletinypose model directory")
+ parser.add_argument(
+ "--det_model_dir", help="path of paddledetection model directory")
+ parser.add_argument(
+ "--image_file", required=True, help="path of test image file.")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default='cpu',
+ help="type of inference device, support 'cpu' or 'gpu'.")
+ parser.add_argument(
+ "--use_trt",
+ type=ast.literal_eval,
+ default=False,
+ help="wether to use tensorrt.")
+ return parser.parse_args()
+
+
+def build_picodet_option(args):
+ option = fd.RuntimeOption()
+
+ if args.device.lower() == "gpu":
+ option.use_gpu()
+
+ if args.use_trt:
+ option.use_paddle_infer_backend()
+ # If use original Tensorrt, not Paddle-TensorRT,
+ # please try `option.use_trt_backend()`
+ option.paddle_infer_option.enable_trt = True
+ option.paddle_infer_option.collect_trt_shape = True
+ option.trt_option.set_shape("image", [1, 3, 320, 320], [1, 3, 320, 320],
+ [1, 3, 320, 320])
+ option.trt_option.set_shape("scale_factor", [1, 2], [1, 2], [1, 2])
+ return option
+
+
+def build_tinypose_option(args):
+ option = fd.RuntimeOption()
+
+ if args.device.lower() == "gpu":
+ option.use_gpu()
+
+ if args.use_trt:
+ option.use_paddle_infer_backend()
+ # If use original Tensorrt, not Paddle-TensorRT,
+ # please try `option.use_trt_backend()`
+ option.paddle_infer_option.enable_trt = True
+ option.paddle_infer_option.collect_trt_shape = True
+ option.trt_option.set_shape("image", [1, 3, 256, 192], [1, 3, 256, 192],
+ [1, 3, 256, 192])
+ return option
+
+
+args = parse_arguments()
+picodet_model_file = os.path.join(args.det_model_dir, "model.pdmodel")
+picodet_params_file = os.path.join(args.det_model_dir, "model.pdiparams")
+picodet_config_file = os.path.join(args.det_model_dir, "infer_cfg.yml")
+
+# setup runtime
+runtime_option = build_picodet_option(args)
+det_model = fd.vision.detection.PicoDet(
+ picodet_model_file,
+ picodet_params_file,
+ picodet_config_file,
+ runtime_option=runtime_option)
+
+tinypose_model_file = os.path.join(args.tinypose_model_dir, "model.pdmodel")
+tinypose_params_file = os.path.join(args.tinypose_model_dir, "model.pdiparams")
+tinypose_config_file = os.path.join(args.tinypose_model_dir, "infer_cfg.yml")
+# setup runtime
+runtime_option = build_tinypose_option(args)
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+ tinypose_model_file,
+ tinypose_params_file,
+ tinypose_config_file,
+ runtime_option=runtime_option)
+
+# predict
+im = cv2.imread(args.image_file)
+pipeline = fd.pipeline.PPTinyPose(det_model, tinypose_model)
+pipeline.detection_model_score_threshold = 0.5
+pipeline_result = pipeline.predict(im)
+print("Paddle TinyPose Result:\n", pipeline_result)
+
+# visualize
+vis_im = fd.vision.vis_keypoint_detection(
+ im, pipeline_result, conf_threshold=0.2)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/cpu-gpu/python/infer.py b/deploy/fastdeploy/cpu-gpu/python/infer.py
new file mode 100644
index 00000000000..bf1a96dda64
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/infer.py
@@ -0,0 +1,74 @@
+import cv2
+import os
+
+import fastdeploy as fd
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir", required=True, help="Path of PaddleDetection model.")
+ parser.add_argument(
+ "--image_file", type=str, required=True, help="Path of test image file.")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default='cpu',
+ help="Type of inference device, support, 'cpu' or 'gpu'.")
+ parser.add_argument(
+ "--use_trt",
+ type=ast.literal_eval,
+ default=False,
+ help="Wether to use tensorrt.")
+ return parser.parse_args()
+
+
+def build_option(args):
+ option = fd.RuntimeOption()
+
+ if args.device.lower() == "gpu":
+ option.use_gpu()
+
+ if args.use_trt:
+ option.use_paddle_infer_backend()
+ # If use original Tensorrt, not Paddle-TensorRT,
+ # please try `option.use_trt_backend()`
+ option.paddle_infer_option.enable_trt = True
+ option.paddle_infer_option.collect_trt_shape = True
+ option.trt_option.set_shape("image", [1, 3, 640, 640], [1, 3, 640, 640],
+ [1, 3, 640, 640])
+ option.trt_option.set_shape("scale_factor", [1, 2], [1, 2], [1, 2])
+ return option
+
+
+args = parse_arguments()
+
+if args.model_dir is None:
+ model_dir = fd.download_model(name='ppyoloe_crn_l_300e_coco')
+else:
+ model_dir = args.model_dir
+
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "infer_cfg.yml")
+
+# settting for runtime
+runtime_option = build_option(args)
+model = fd.vision.detection.PPYOLOE(
+ model_file, params_file, config_file, runtime_option=runtime_option)
+
+# predict
+if args.image_file is None:
+ image_file = fd.utils.get_detection_test_image()
+else:
+ image_file = args.image_file
+im = cv2.imread(image_file)
+result = model.predict(im)
+print(result)
+
+# visualize
+vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/cpu-gpu/python/pptinypose_infer.py b/deploy/fastdeploy/cpu-gpu/python/pptinypose_infer.py
new file mode 100644
index 00000000000..a3115f82be2
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/pptinypose_infer.py
@@ -0,0 +1,67 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir",
+ required=True,
+ help="path of PP-TinyPose model directory")
+ parser.add_argument(
+ "--image_file", required=True, help="path of test image file.")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default='cpu',
+ help="type of inference device, support 'cpu', or 'gpu'.")
+ parser.add_argument(
+ "--use_trt",
+ type=ast.literal_eval,
+ default=False,
+ help="wether to use tensorrt.")
+ return parser.parse_args()
+
+
+def build_option(args):
+ option = fd.RuntimeOption()
+
+ if args.device.lower() == "gpu":
+ option.use_gpu()
+
+ if args.use_trt:
+ option.use_paddle_infer_backend()
+ # If use original Tensorrt, not Paddle-TensorRT,
+ # please try `option.use_trt_backend()`
+ option.paddle_infer_option.enable_trt = True
+ option.paddle_infer_option.collect_trt_shape = True
+ option.trt_option.set_shape("image", [1, 3, 256, 192], [1, 3, 256, 192],
+ [1, 3, 256, 192])
+ return option
+
+
+args = parse_arguments()
+
+tinypose_model_file = os.path.join(args.model_dir, "model.pdmodel")
+tinypose_params_file = os.path.join(args.model_dir, "model.pdiparams")
+tinypose_config_file = os.path.join(args.model_dir, "infer_cfg.yml")
+# setup runtime
+runtime_option = build_option(args)
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+ tinypose_model_file,
+ tinypose_params_file,
+ tinypose_config_file,
+ runtime_option=runtime_option)
+# predict
+im = cv2.imread(args.image_file)
+tinypose_result = tinypose_model.predict(im)
+print("Paddle TinyPose Result:\n", tinypose_result)
+
+# visualize
+vis_im = fd.vision.vis_keypoint_detection(
+ im, tinypose_result, conf_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/kunlunxin/README.md b/deploy/fastdeploy/kunlunxin/README.md
new file mode 100644
index 00000000000..c264df0d614
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/README.md
@@ -0,0 +1,105 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection 在昆仑芯上的部署方案-FastDeploy
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在昆仑芯片上部署检测模型。
+
+支持如下芯片的部署
+- 昆仑 818-100(推理芯片)
+- 昆仑 818-300(训练芯片)
+
+支持如下芯片的设备
+- K100/K200 昆仑 AI 加速卡
+- R200 昆仑芯 AI 加速卡
+
+## 2. 使用预导出的模型列表
+
+为了方便开发者的测试,下面提供了PaddleDetection导出的各系列模型,开发者可直接下载使用。其中精度指标来源于PaddleDetection中对各模型的介绍,详情各参考PaddleDetection中的说明。
+
+### 2.1 目标检测及实例分割模型
+| 模型 | 参数大小 | 精度 | 备注 |
+|:---------------------------------------------------------------- |:----- |:----- | :------ |
+| [picodet_l_320_coco_lcnet](https://bj.bcebos.com/paddlehub/fastdeploy/picodet_l_320_coco_lcnet.tgz) |23MB | Box AP 42.6% |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz) |200MB | Box AP 51.4% |
+| [ppyoloe_plus_crn_m_80e_coco](https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz) |83.3MB | Box AP 49.8% |
+| [ppyolo_r50vd_dcn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolo_r50vd_dcn_1x_coco.tgz) | 180MB | Box AP 44.8% | 暂不支持TensorRT |
+| [ppyolov2_r101vd_dcn_365e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolov2_r101vd_dcn_365e_coco.tgz) | 282MB | Box AP 49.7% | 暂不支持TensorRT |
+| [yolov3_darknet53_270e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov3_darknet53_270e_coco.tgz) |237MB | Box AP 39.1% | |
+| [yolox_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolox_s_300e_coco.tgz) | 35MB | Box AP 40.4% | |
+| [faster_rcnn_r50_vd_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz) | 160MB | Box AP 40.8%| 暂不支持TensorRT |
+| [mask_rcnn_r50_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/mask_rcnn_r50_1x_coco.tgz) | 128M | Box AP 37.4%, Mask AP 32.8%| 暂不支持TensorRT、ORT |
+| [ssd_mobilenet_v1_300_120e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_mobilenet_v1_300_120e_voc.tgz) | 24.9M | Box AP 73.8%| 暂不支持TensorRT、ORT |
+| [ssd_vgg16_300_240e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_vgg16_300_240e_voc.tgz) | 106.5M | Box AP 77.8%| 暂不支持TensorRT、ORT |
+| [ssdlite_mobilenet_v1_300_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ssdlite_mobilenet_v1_300_coco.tgz) | 29.1M | | 暂不支持TensorRT、ORT |
+| [rtmdet_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_l_300e_coco.tgz) | 224M | Box AP 51.2%| |
+| [rtmdet_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_s_300e_coco.tgz) | 42M | Box AP 44.5%| |
+| [yolov5_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_l_300e_coco.tgz) | 183M | Box AP 48.9%| |
+| [yolov5_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_s_300e_coco.tgz) | 31M | Box AP 37.6%| |
+| [yolov6_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_l_300e_coco.tgz) | 229M | Box AP 51.0%| |
+| [yolov6_s_400e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_s_400e_coco.tgz) | 68M | Box AP 43.4%| |
+| [yolov7_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_l_300e_coco.tgz) | 145M | Box AP 51.0%| |
+| [yolov7_x_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_x_300e_coco.tgz) | 277M | Box AP 53.0%| |
+| [cascade_rcnn_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_fpn_1x_coco.tgz) | 271M | Box AP 41.1%| 暂不支持TensorRT、ORT |
+| [cascade_rcnn_r50_vd_fpn_ssld_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.tgz) | 271M | Box AP 45.0%| 暂不支持TensorRT、ORT |
+| [faster_rcnn_enhance_3x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_enhance_3x_coco.tgz) | 119M | Box AP 41.5%| 暂不支持TensorRT、ORT |
+| [fcos_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/fcos_r50_fpn_1x_coco.tgz) | 129M | Box AP 39.6%| 暂不支持TensorRT |
+| [gfl_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/gfl_r50_fpn_1x_coco.tgz) | 128M | Box AP 41.0%| 暂不支持TensorRT |
+| [ppyoloe_crn_l_80e_sliced_visdrone_640_025](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_80e_sliced_visdrone_640_025.tgz) | 200M | Box AP 31.9%| |
+| [retinanet_r101_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r101_fpn_2x_coco.tgz) | 210M | Box AP 40.6%| 暂不支持TensorRT、ORT |
+| [retinanet_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r50_fpn_1x_coco.tgz) | 136M | Box AP 37.5%| 暂不支持TensorRT、ORT |
+| [tood_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/tood_r50_fpn_1x_coco.tgz) | 130M | Box AP 42.5%| 暂不支持TensorRT、ORT |
+| [ttfnet_darknet53_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ttfnet_darknet53_1x_coco.tgz) | 178M | Box AP 33.5%| 暂不支持TensorRT、ORT |
+| [yolov8_x_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_x_500e_coco.tgz) | 265M | Box AP 53.8%
+| [yolov8_l_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_l_500e_coco.tgz) | 173M | Box AP 52.8%
+| [yolov8_m_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_m_500e_coco.tgz) | 99M | Box AP 50.2%
+| [yolov8_s_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_s_500e_coco.tgz) | 43M | Box AP 44.9%
+| [yolov8_n_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_n_500e_coco.tgz) | 13M | Box AP 37.3%
+
+### 2.2 关键点检测模型
+| 模型 | 说明 | 模型格式 | 版本 |
+| :--- | :--- | :------- | :--- |
+| [PP-TinyPose-128x96](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_128x96_infer.tgz) | 单人关键点检测模型 | Paddle | [Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PP-TinyPose-256x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz) | 单人关键点检测模型 | Paddle | [Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PicoDet-S-Lcnet-Pedestrian-192x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_192x192_infer.tgz) + [PP-TinyPose-128x96](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_128x96_infer.tgz) | 单人关键点检测串联配置 | Paddle |[Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PicoDet-S-Lcnet-Pedestrian-320x320](https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz) + [PP-TinyPose-256x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz) | 多人关键点检测串联配置 | Paddle |[Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+
+## 3. 自行导出PaddleDetection部署模型
+### 3.1 模型版本
+支持[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)大于等于2.4版本的PaddleDetection模型部署。目前FastDeploy测试过成功部署的模型:
+
+- [PP-YOLOE(含PP-YOLOE+)系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
+- [PicoDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/picodet)
+- [PP-YOLO系列模型(含v2)](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyolo)
+- [YOLOv3系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolov3)
+- [YOLOX系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolox)
+- [FasterRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/faster_rcnn)
+- [MaskRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/mask_rcnn)
+- [SSD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ssd)
+- [YOLOv5系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov5)
+- [YOLOv6系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov6)
+- [YOLOv7系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov7)
+- [YOLOv8系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov8)
+- [RTMDet系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/rtmdet)
+- [CascadeRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/cascade_rcnn)
+- [PSSDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rcnn_enhance)
+- [RetinaNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/retinanet)
+- [PPYOLOESOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/smalldet)
+- [FCOS系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/fcos)
+- [TTFNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ttfnet)
+- [TOOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/tood)
+- [GFL系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/gfl)
+- [PP-PicoDet + PP-TinyPose系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose/README.md)
+
+### 3.2 模型导出
+PaddleDetection模型导出,请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/deploy/EXPORT_MODEL.md),**注意**:PaddleDetection导出的模型包含`model.pdmodel`、`model.pdiparams`和`infer_cfg.yml`三个文件,FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+### 3.3 导出须知
+如果您是自行导出PaddleDetection推理模型,请注意以下问题:
+- 在导出模型时不要进行NMS的去除操作,正常导出即可
+- 如果用于跑原生TensorRT后端(非Paddle Inference后端),不要添加--trt参数
+- 导出模型时,不要添加`fuse_normalize=True`参数
+
+## 4. 详细的部署示例
+- [Python部署](python)
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/deploy/fastdeploy/kunlunxin/cpp/CMakeLists.txt b/deploy/fastdeploy/kunlunxin/cpp/CMakeLists.txt
new file mode 100644
index 00000000000..41ac6602fed
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+add_executable(infer_tinypose_demo ${PROJECT_SOURCE_DIR}/pptinypose_infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+target_link_libraries(infer_tinypose_demo ${FASTDEPLOY_LIBS})
+
diff --git a/deploy/fastdeploy/kunlunxin/cpp/README.md b/deploy/fastdeploy/kunlunxin/cpp/README.md
new file mode 100644
index 00000000000..af6453f41a3
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/README.md
@@ -0,0 +1,127 @@
+[English](README.md) | 简体中文
+# PaddleDetection 昆仑芯 XPU C++部署示例
+
+本目录下提供`infer.cc`快速完成PPYOLOE模型包括PPYOLOE在昆仑芯 XPU加速部署的示例。
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署PaddleDetection模型。FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+## 2. 部署环境准备
+在部署前,需自行编译基于昆仑芯XPU的预测库,参考文档[昆仑芯XPU部署环境编译安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 3. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 4. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+### 4.1 目标检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build
+cd build
+# 使用编译完成的FastDeploy库编译infer_demo
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-kunlunxin
+make -j
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 运行部署示例
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+### 4.2 关键点检测示例
+```bash
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-gpu-x.x.x.tgz
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build && cd build
+mv ../fastdeploy-linux-x64-gpu-x.x.x .
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-gpu-x.x.x
+make -j
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# 运行部署示例
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+关于如何进行多人关键点检测,请参考[PPTinyPose Pipeline示例](./det_keypoint_unite/)
+
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 5. PaddleDetection C++接口
+FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+### 5.1 目标检测及实例分割模型
+```c++
+fastdeploy::vision::detection::PicoDet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::SOLOv2(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLOE(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLO(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::YOLOv3(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOX(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::FasterRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::MaskRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::SSD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv5(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv6(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv7(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv8(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::CascadeRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PSSDet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::RetinaNet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLOESOD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::FCOS(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::TOOD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::GFL(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+```
+
+### 5.2 关键点检测模型
+```C++
+fastdeploy::vision::keypointdetection::PPTinyPose(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+```
+
+PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件
+
+## 6. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+
+## 7. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/CMakeLists.txt b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/CMakeLists.txt
new file mode 100644
index 00000000000..71cbaa0fde1
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/det_keypoint_unite_infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/README.md b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/README.md
new file mode 100644
index 00000000000..2dd0fd26a15
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/README.md
@@ -0,0 +1,70 @@
+[English](README.md) | 简体中文
+# PP-PicoDet + PP-TinyPose (Pipeline) 昆仑芯 XPU C++部署示例
+
+本目录下提供`det_keypoint_unite_infer.cc`快速完成多人模型配置 PP-PicoDet + PP-TinyPose 在CPU/GPU,以及GPU上通过TensorRT加速部署的`单图多人关键点检测`示例。执行如下脚本即可完成。**注意**: PP-TinyPose单模型独立部署,请参考[PP-TinyPose 单模型](../README.md)
+
+## 1. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../../README.md)或者[自行导出PaddleDetection部署模型](../../README.md)。
+
+## 3. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose和PP-PicoDet模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+tar -xvf PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/000000018491.jpg
+
+# 运行部署示例
+./infer_demo PP_PicoDet_V2_S_Pedestrian_320x320_infer PP_TinyPose_256x192_infer 000000018491.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+- 注意,以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 4. PP-TinyPose 模型串联 C++ 接口
+
+```c++
+fastdeploy::pipeline::PPTinyPose(
+ fastdeploy::vision::detection::PicoDet* det_model,
+ fastdeploy::vision::keypointdetection::PPTinyPose* pptinypose_model)
+```
+
+PPTinyPose Pipeline模型加载和初始化。det_model表示初始化后的检测模型,pptinypose_model表示初始化后的关键点检测模型。
+
+
+## 5. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../../)
+- [Python部署](../../python/det_keypoint_unite/)
+
+## 6. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
new file mode 100755
index 00000000000..089213ff942
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#include "fastdeploy/pipeline.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void KunlunXinInfer(const std::string& det_model_dir,
+ const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto option = fastdeploy::RuntimeOption();
+ option.UseKunlunXin();
+ auto det_model_file = det_model_dir + sep + "model.pdmodel";
+ auto det_params_file = det_model_dir + sep + "model.pdiparams";
+ auto det_config_file = det_model_dir + sep + "infer_cfg.yml";
+ auto det_model = fastdeploy::vision::detection::PicoDet(
+ det_model_file, det_params_file, det_config_file, option);
+ if (!det_model.Initialized()) {
+ std::cerr << "Detection Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+
+ auto pipeline =
+ fastdeploy::pipeline::PPTinyPose(
+ &det_model, &tinypose_model);
+ pipeline.detection_model_score_threshold = 0.5;
+ if (!pipeline.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.2);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "TinyPose visualized result saved in ./vis_result.jpg"
+ << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 5) {
+ std::cout << "Usage: infer_demo path/to/detection_model_dir "
+ "path/to/pptinypose_model_dir path/to/image, "
+ "e.g ./infer_model ./picodet_model_dir ./pptinypose_model_dir "
+ "./test.jpeg 0"
+ << std::endl;
+ return -1;
+ }
+
+ KunlunXinInfer(argv[1], argv[2], argv[3]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/kunlunxin/cpp/infer.cc b/deploy/fastdeploy/kunlunxin/cpp/infer.cc
new file mode 100644
index 00000000000..4f80bb43986
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/infer.cc
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseKunlunXin();
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image, "
+ "e.g ./infer_demo ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+ KunlunXinInfer(argv[1], argv[2]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/kunlunxin/cpp/pptinypose_infer.cc b/deploy/fastdeploy/kunlunxin/cpp/pptinypose_infer.cc
new file mode 100644
index 00000000000..168d167d1c6
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/pptinypose_infer.cc
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void KunlunXinInfer(const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseKunlunXin();
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+ if (!tinypose_model.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto tinypose_vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+ cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+ std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+ << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image, "
+ "e.g ./infer_demo ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+ KunlunXinInfer(argv[1], argv[2]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/kunlunxin/python/README.md b/deploy/fastdeploy/kunlunxin/python/README.md
new file mode 100644
index 00000000000..45ce3168a61
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/README.md
@@ -0,0 +1,117 @@
+[English](README.md) | 简体中文
+# PaddleDetection 昆仑芯 XPU Python部署示例
+
+本目录下提供`infer.py`快速完成PPYOLOE模型在昆仑芯 XPU上的加速部署的示例。
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署PaddleDetection模型。FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+## 2. 部署环境准备
+在部署前,需自行编译基于昆仑XPU的FastDeploy python wheel包并安装,参考文档[昆仑芯XPU部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 3. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 4. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+### 4.1 目标检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 运行部署示例
+# 昆仑芯推理
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+### 4.2 关键点检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# 运行部署示例
+python pptinypose_infer.py --model_dir PP_TinyPose_256x192_infer --image_file hrnet_demo.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+关于如何进行多人关键点检测,请参考[PPTinyPose Pipeline示例](./det_keypoint_unite/)
+
+
+## 5. 部署示例选项说明
+
+|参数|含义|默认值
+|---|---|---|
+|--model_dir|指定模型文件夹所在的路径|None|
+|--image_file|指定测试图片所在的路径|None|
+
+## 6. PaddleDetection Python接口
+FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+### 6.1 目标检测及实例分割模型
+```python
+fastdeploy.vision.detection.PPYOLOE(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PicoDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOX(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.YOLOv3(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PPYOLO(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.FasterRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.MaskRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.SSD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv5(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv6(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv7(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.RTMDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.CascadeRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PSSDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.RetinaNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PPYOLOESOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.FCOS(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.TTFNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.TOOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.GFL(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+### 6.2 关键点检测模型
+```python
+fd.vision.keypointdetection.PPTinyPose(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件
+
+## 7. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [C++部署](../cpp)
+
+## 8. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
diff --git a/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/README.md b/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/README.md
new file mode 100644
index 00000000000..3a7359f23f1
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/README.md
@@ -0,0 +1,65 @@
+[English](README.md) | 简体中文
+# PP-PicoDet + PP-TinyPose (Pipeline) CPU-GPU Python部署示例
+
+本目录下提供`det_keypoint_unite_infer.py`快速完成多人模型配置 PP-PicoDet + PP-TinyPose 在CPU/GPU,以及GPU上通过TensorRT加速部署的`单图多人关键点检测`示例。执行如下脚本即可完成.**注意**: PP-TinyPose单模型独立部署,请参考[PP-TinyPose 单模型](../README.md)
+
+## 1. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../../README.md)或者[自行导出PaddleDetection部署模型](../../README.md)。
+
+## 3. 运行部署示例
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+tar -xvf PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/000000018491.jpg
+
+# 运行部署示例
+python det_keypoint_unite_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --det_model_dir PP_PicoDet_V2_S_Pedestrian_320x320_infer --image_file 000000018491.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 4. 部署示例选项说明
+
+|参数|含义|默认值
+|---|---|---|
+|--tinypose_model_dir|指定关键点模型文件夹所在的路径|None|
+|--det_model_dir|指定目标模型文件夹所在的路径|None|
+|--image_file|指定测试图片所在的路径|None|
+
+## 5. PPTinyPose 模型串联 Python接口
+
+```python
+fd.pipeline.PPTinyPose(det_model=None, pptinypose_model=None)
+```
+
+PPTinyPose Pipeline 模型加载和初始化,其中det_model是使用`fd.vision.detection.PicoDet`初始化的检测模型,pptinypose_model是使用`fd.vision.keypointdetection.PPTinyPose`初始化的关键点检测模型。
+
+## 6. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../../)
+- [C++部署](../../cpp)
+
+## 7. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/det_keypoint_unite_infer.py b/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/det_keypoint_unite_infer.py
new file mode 100755
index 00000000000..48e99b26fd8
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/det_keypoint_unite_infer.py
@@ -0,0 +1,67 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--tinypose_model_dir",
+ required=True,
+ help="path of paddletinypose model directory")
+ parser.add_argument(
+ "--det_model_dir", help="path of paddledetection model directory")
+ parser.add_argument(
+ "--image_file", required=True, help="path of test image file.")
+ return parser.parse_args()
+
+
+def build_picodet_option(args):
+ option = fd.RuntimeOption()
+ option.use_kunlunxin()
+ return option
+
+
+def build_tinypose_option(args):
+ option = fd.RuntimeOption()
+ option.use_kunlunxin()
+ return option
+
+
+args = parse_arguments()
+picodet_model_file = os.path.join(args.det_model_dir, "model.pdmodel")
+picodet_params_file = os.path.join(args.det_model_dir, "model.pdiparams")
+picodet_config_file = os.path.join(args.det_model_dir, "infer_cfg.yml")
+
+# setup runtime
+runtime_option = build_picodet_option(args)
+det_model = fd.vision.detection.PicoDet(
+ picodet_model_file,
+ picodet_params_file,
+ picodet_config_file,
+ runtime_option=runtime_option)
+
+tinypose_model_file = os.path.join(args.tinypose_model_dir, "model.pdmodel")
+tinypose_params_file = os.path.join(args.tinypose_model_dir, "model.pdiparams")
+tinypose_config_file = os.path.join(args.tinypose_model_dir, "infer_cfg.yml")
+# setup runtime
+runtime_option = build_tinypose_option(args)
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+ tinypose_model_file,
+ tinypose_params_file,
+ tinypose_config_file,
+ runtime_option=runtime_option)
+
+# predict
+im = cv2.imread(args.image_file)
+pipeline = fd.pipeline.PPTinyPose(det_model, tinypose_model)
+pipeline.detection_model_score_threshold = 0.5
+pipeline_result = pipeline.predict(im)
+print("Paddle TinyPose Result:\n", pipeline_result)
+
+# visualize
+vis_im = fd.vision.vis_keypoint_detection(
+ im, pipeline_result, conf_threshold=0.2)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/kunlunxin/python/infer.py b/deploy/fastdeploy/kunlunxin/python/infer.py
new file mode 100755
index 00000000000..2916bd66836
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/infer.py
@@ -0,0 +1,45 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir", required=True, help="Path of PaddleDetection model.")
+ parser.add_argument(
+ "--image_file", type=str, required=True, help="Path of test image file.")
+ return parser.parse_args()
+
+args = parse_arguments()
+
+runtime_option = fd.RuntimeOption()
+runtime_option.use_kunlunxin()
+
+if args.model_dir is None:
+ model_dir = fd.download_model(name='ppyoloe_crn_l_300e_coco')
+else:
+ model_dir = args.model_dir
+
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "infer_cfg.yml")
+
+# settting for runtime
+model = fd.vision.detection.PPYOLOE(
+ model_file, params_file, config_file, runtime_option=runtime_option)
+
+# predict
+if args.image_file is None:
+ image_file = fd.utils.get_detection_test_image()
+else:
+ image_file = args.image_file
+im = cv2.imread(image_file)
+result = model.predict(im)
+print(result)
+
+# visualize
+vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/kunlunxin/python/pptinypose_infer.py b/deploy/fastdeploy/kunlunxin/python/pptinypose_infer.py
new file mode 100644
index 00000000000..f30f594b7ff
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/pptinypose_infer.py
@@ -0,0 +1,42 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir",
+ required=True,
+ help="path of PP-TinyPose model directory")
+ parser.add_argument(
+ "--image_file", required=True, help="path of test image file.")
+ return parser.parse_args()
+
+
+args = parse_arguments()
+
+runtime_option = fd.RuntimeOption()
+runtime_option.use_kunlunxin()
+
+tinypose_model_file = os.path.join(args.model_dir, "model.pdmodel")
+tinypose_params_file = os.path.join(args.model_dir, "model.pdiparams")
+tinypose_config_file = os.path.join(args.model_dir, "infer_cfg.yml")
+# setup runtime
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+ tinypose_model_file,
+ tinypose_params_file,
+ tinypose_config_file,
+ runtime_option=runtime_option)
+
+# predict
+im = cv2.imread(args.image_file)
+tinypose_result = tinypose_model.predict(im)
+print("Paddle TinyPose Result:\n", tinypose_result)
+
+# visualize
+vis_im = fd.vision.vis_keypoint_detection(
+ im, tinypose_result, conf_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/quantize/README.md b/deploy/fastdeploy/quantize/README.md
new file mode 100644
index 00000000000..aff080ef644
--- /dev/null
+++ b/deploy/fastdeploy/quantize/README.md
@@ -0,0 +1,64 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection 量化模型部署-FastDeploy
+
+FastDeploy已支持部署量化模型,并提供一键模型自动化压缩的工具.
+用户可以使用一键模型自动化压缩工具,自行对模型量化后部署, 也可以直接下载FastDeploy提供的量化模型进行部署.
+
+## 1. FastDeploy一键模型自动化压缩工具
+
+FastDeploy 提供了一键模型自动化压缩工具, 能够简单地通过输入一个配置文件, 对模型进行量化.
+详细教程请见: [一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/tree/develop/tools/common_tools/auto_compression)。**注意**: 推理量化后的分类模型仍然需要FP32模型文件夹下的deploy.yaml文件, 自行量化的模型文件夹内不包含此yaml文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。
+
+## 2. 量化完成的PaddleDetection模型
+
+用户也可以直接下载下表中的量化模型进行部署.(点击模型名字即可下载)
+
+Benchmark表格说明:
+- Runtime时延为模型在各种Runtime上的推理时延,包含CPU->GPU数据拷贝,GPU推理,GPU->CPU数据拷贝时间. 不包含模型各自的前后处理时间.
+- 端到端时延为模型在实际推理场景中的时延, 包含模型的前后处理.
+- 所测时延均为推理1000次后求得的平均值, 单位是毫秒.
+- INT8 + FP16 为在推理INT8量化模型的同时, 给Runtime 开启FP16推理选项
+- INT8 + FP16 + PM, 为在推理INT8量化模型和开启FP16的同时, 开启使用Pinned Memory的选项,可加速GPU->CPU数据拷贝的速度
+- 最大加速比, 为FP32时延除以INT8推理的最快时延,得到最大加速比.
+- 策略为量化蒸馏训练时, 采用少量无标签数据集训练得到量化模型, 并在全量验证集上验证精度, INT8精度并不代表最高的INT8精度.
+- CPU为Intel(R) Xeon(R) Gold 6271C, 所有测试中固定CPU线程数为1. GPU为Tesla T4, TensorRT版本8.4.15.
+
+
+- Runtime Benchmark
+| 模型 |推理后端 |部署硬件 | FP32 Runtime时延 | INT8 Runtime时延 | INT8 + FP16 Runtime时延 | INT8+FP16+PM Runtime时延 | 最大加速比 | FP32 mAP | INT8 mAP | 量化方式 |
+| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar ) | TensorRT | GPU | 27.90 | 6.39 |6.44|5.95 | 4.67 | 51.4 | 50.7 | 量化蒸馏训练 |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar ) | Paddle-TensorRT | GPU | 30.89 |None | 13.78 |14.01 | 2.24 | 51.4 | 50.5 | 量化蒸馏训练 |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar) | ONNX Runtime | CPU | 1057.82 | 449.52 |None|None | 2.35 |51.4 | 50.0 |量化蒸馏训练 |
+
+NOTE:
+- TensorRT比Paddle-TensorRT快的原因是在runtime移除了multiclass_nms3算子
+
+- 端到端 Benchmark
+| 模型 |推理后端 |部署硬件 | FP32 End2End时延 | INT8 End2End时延 | INT8 + FP16 End2End时延 | INT8+FP16+PM End2End时延 | 最大加速比 | FP32 mAP | INT8 mAP | 量化方式 |
+| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar ) | TensorRT | GPU | 35.75 | 15.42 |20.70|20.85 | 2.32 | 51.4 | 50.7 | 量化蒸馏训练 |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar ) | Paddle-TensorRT | GPU | 33.48 |None | 18.47 |18.03 | 1.81 | 51.4 | 50.5 | 量化蒸馏训练 |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar) | ONNX Runtime | CPU | 1067.17 | 461.037 |None|None | 2.31 |51.4 | 50.0 |量化蒸馏训练 |
+
+
+量化后模型的Benchmark比较,请参考[量化模型 Benchmark](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
+
+## 3. 支持部署量化模型的硬件
+
+FastDeploy 量化模型部署的过程大致都与FP32模型类似,只是模型量化与非量化的区别,如果硬件在量化模型部署过程有特殊处理,也会在文档中特别标明,因此量化模型部署可以参考如下硬件的链接
+
+|硬件类型|该硬件是否支持|使用指南|Python|C++|
+|:---:|:---:|:---:|:---:|:---:|
+|X86 CPU|✅|[链接](cpu-gpu)|✅|✅|
+|NVIDIA GPU|✅|[链接](cpu-gpu)|✅|✅|
+|飞腾CPU|✅|[链接](cpu-gpu)|✅|✅|
+|ARM CPU|✅|[链接](cpu-gpu)|✅|✅|
+|Intel GPU(集成显卡)|✅|[链接](cpu-gpu)|✅|✅|
+|Intel GPU(独立显卡)|✅|[链接](cpu-gpu)|✅|✅|
+|昆仑|✅|[链接](kunlun)|✅|✅|
+|昇腾|✅|[链接](ascend)|✅|✅|
+|瑞芯微|✅|[链接](rockchip)|✅|✅|
+|晶晨|✅|[链接](amlogic)|--|✅|
+|算能|✅|[链接](sophgo)|✅|✅|
diff --git a/deploy/fastdeploy/rockchip/rknpu2/README.md b/deploy/fastdeploy/rockchip/rknpu2/README.md
new file mode 100644
index 00000000000..18a19cc04eb
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/README.md
@@ -0,0 +1,121 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection RKNPU2部署示例
+
+## 1. 说明
+RKNPU2 提供了一个高性能接口来访问 Rockchip NPU,支持如下硬件的部署
+- RK3566/RK3568
+- RK3588/RK3588S
+- RV1103/RV1106
+
+在RKNPU2上已经通过测试的PaddleDetection模型如下:
+
+- Picodet
+- PPYOLOE(int8)
+- YOLOV8
+
+如果你需要查看详细的速度信息,请查看[RKNPU2模型速度一览表](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
+
+## 2. 使用预导出的模型列表
+
+### ONNX模型转RKNN模型
+
+为了方便大家使用,我们提供了python脚本,通过我们预配置的config文件,你将能够快速地转换ONNX模型到RKNN模型
+
+```bash
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml \
+ --target_platform rk3588
+```
+
+### RKNN模型列表
+
+为了方便大家测试,我们提供picodet和ppyoloe两个模型,解压后即可使用:
+
+| 模型名称 | 下载地址 |
+|-----------------------------|-----------------------------------------------------------------------------------|
+| picodet_s_416_coco_lcnet | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/picodet_s_416_coco_lcnet.zip |
+| ppyoloe_plus_crn_s_80e_coco | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/ppyoloe_plus_crn_s_80e_coco.zip |
+
+
+## 3. 自行导出PaddleDetection部署模型以及转换模型
+
+RKNPU部署模型前需要将Paddle模型转换成RKNN模型,具体步骤如下:
+
+* Paddle动态图模型转换为ONNX模型,请参考[PaddleDetection导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/EXPORT_MODEL.md)
+,注意在转换时请设置**export.nms=True**.
+* ONNX模型转换RKNN模型的过程,请参考[转换文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/export.md)进行转换。
+
+### 3.1 模型转换example
+
+#### 3.1.1 注意点
+
+PPDetection模型在RKNPU2上部署时要注意以下几点:
+
+* 模型导出需要包含Decode
+* 由于RKNPU2不支持NMS,因此输出节点必须裁剪至NMS之前
+* 由于RKNPU2 Div算子的限制,模型的输出节点需要裁剪至Div算子之前
+
+#### 3.1.2 Paddle模型转换为ONNX模型
+
+由于Rockchip提供的rknn-toolkit2工具暂时不支持Paddle模型直接导出为RKNN模型,因此需要先将Paddle模型导出为ONNX模型,再将ONNX模型转为RKNN模型。
+
+```bash
+# 以Picodet为例
+# 下载Paddle静态图模型并解压
+wget https://paddledet.bj.bcebos.com/deploy/Inference/picodet_s_416_coco_lcnet.tar
+tar xvf picodet_s_416_coco_lcnet.tar
+
+# 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐
+paddle2onnx --model_dir picodet_s_416_coco_lcnet \
+ --model_filename model.pdmodel \
+ --params_filename model.pdiparams \
+ --save_file picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+ --enable_dev_version True
+
+# 固定shape
+python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+ --output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+ --input_shape_dict "{'image':[1,3,416,416], 'scale_factor':[1,2]}"
+```
+
+#### 3.1.3 编写yaml文件
+
+**修改normalize参数**
+
+如果你需要在NPU上执行normalize操作,请根据你的模型配置normalize参数,例如:
+
+```yaml
+mean:
+ -
+ - 123.675
+ - 116.28
+ - 103.53
+std:
+ -
+ - 58.395
+ - 57.12
+ - 57.375
+```
+
+**修改outputs参数**
+
+由于Paddle2ONNX版本的不同,转换模型的输出节点名称也有所不同,请使用[Netron](https://netron.app)对模型进行可视化,并找到以下蓝色方框标记的NonMaxSuppression节点,红色方框的节点名称即为目标名称。
+
+## 4. 模型可视化
+例如,使用Netron可视化后,得到以下图片:
+
+
+
+找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数,修改后如下:
+
+```yaml
+outputs_nodes:
+ - 'p2o.Mul.179'
+ - 'p2o.Concat.9'
+```
+
+
+## 5. 详细的部署示例
+- [RKNN总体部署教程](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
+- [C++部署](cpp)
+- [Python部署](python)
diff --git a/deploy/fastdeploy/rockchip/rknpu2/cpp/CMakeLists.txt b/deploy/fastdeploy/rockchip/rknpu2/cpp/CMakeLists.txt
new file mode 100644
index 00000000000..a46b11f8138
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/cpp/CMakeLists.txt
@@ -0,0 +1,11 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
+project(infer_demo)
+
+set(CMAKE_CXX_STANDARD 14)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake)
+include_directories(${FastDeploy_INCLUDE_DIRS})
+add_executable(infer_demo infer.cc)
+target_link_libraries(infer_demo ${FastDeploy_LIBS})
diff --git a/deploy/fastdeploy/rockchip/rknpu2/cpp/README.md b/deploy/fastdeploy/rockchip/rknpu2/cpp/README.md
new file mode 100644
index 00000000000..d67581fabd9
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/cpp/README.md
@@ -0,0 +1,47 @@
+[English](README.md) | 简体中文
+# PaddleDetection RKNPU2 C++部署示例
+
+本目录下用于展示PaddleDetection系列模型在RKNPU2上的部署,以下的部署过程以PPYOLOE为例子。
+
+## 1. 部署环境准备
+在部署前,需确认以下两个步骤:
+
+1. 软硬件环境满足要求
+2. 根据开发环境,下载预编译部署库或者从头编译FastDeploy仓库
+
+以上步骤请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)实现
+
+## 2. 部署模型准备
+
+模型转换代码请参考[模型转换文档](../README.md)
+
+## 3. 运行部署示例
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/rockchip/rknpu2/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build && cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j8
+
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/ppyoloe_plus_crn_s_80e_coco.zip
+unzip ppyoloe_plus_crn_s_80e_coco.zip
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 运行部署示例
+# CPU推理
+./infer_demo ./ppyoloe_plus_crn_s_80e_coco 000000014439.jpg 0
+# RKNPU2推理
+./infer_demo ./ppyoloe_plus_crn_s_80e_coco 000000014439.jpg 1
+```
+
+## 4. 更多指南
+RKNPU上对模型的输入要求是使用NHWC格式,且图片归一化操作会在转RKNN模型时,内嵌到模型中,因此我们在使用FastDeploy部署时,需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。
+
+- [Python部署](../python)
+- [转换PaddleDetection RKNN模型文档](../README.md)
diff --git a/deploy/fastdeploy/rockchip/rknpu2/cpp/infer.cc b/deploy/fastdeploy/rockchip/rknpu2/cpp/infer.cc
new file mode 100644
index 00000000000..79b2091f205
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/cpp/infer.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+void ONNXInfer(const std::string& model_dir, const std::string& image_file) {
+ std::string model_file = model_dir + "/ppyoloe_plus_crn_s_80e_coco.onnx";
+ std::string params_file;
+ std::string config_file = model_dir + "/infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseCpu();
+ auto format = fastdeploy::ModelFormat::ONNX;
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(
+ model_file, params_file, config_file, option, format);
+
+ fastdeploy::TimeCounter tc;
+ tc.Start();
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ tc.End();
+ tc.PrintInfo("PPDet in ONNX");
+
+ std::cout << res.Str() << std::endl;
+ cv::imwrite("infer_onnx.jpg", vis_im);
+ std::cout << "Visualized result saved in ./infer_onnx.jpg" << std::endl;
+}
+
+void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file =
+ model_dir + "/ppyoloe_plus_crn_s_80e_coco_rk3588_quantized.rknn";
+ auto params_file = "";
+ auto config_file = model_dir + "/infer_cfg.yml";
+
+ auto option = fastdeploy::RuntimeOption();
+ option.UseRKNPU2();
+
+ auto format = fastdeploy::ModelFormat::RKNN;
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(
+ model_file, params_file, config_file, option, format);
+
+ model.GetPreprocessor().DisablePermute();
+ model.GetPreprocessor().DisableNormalize();
+ model.GetPostprocessor().ApplyNMS();
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ fastdeploy::TimeCounter tc;
+ tc.Start();
+ if (!model.Predict(&im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+ tc.End();
+ tc.PrintInfo("PPDet in RKNPU2");
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("infer_rknpu2.jpg", vis_im);
+ std::cout << "Visualized result saved in ./infer_rknpu2.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 4) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+ "e.g ./infer_demo ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+
+ if (std::atoi(argv[3]) == 0) {
+ ONNXInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 1) {
+ RKNPU2Infer(argv[1], argv[2]);
+ }
+ return 0;
+}
diff --git a/deploy/fastdeploy/rockchip/rknpu2/python/README.md b/deploy/fastdeploy/rockchip/rknpu2/python/README.md
new file mode 100644
index 00000000000..d231cdaa305
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/python/README.md
@@ -0,0 +1,41 @@
+[English](README.md) | 简体中文
+# PaddleDetection RKNPU2 Python部署示例
+
+本目录下用于展示PaddleDetection系列模型在RKNPU2上的部署,以下的部署过程以PPYOLOE为例子。
+
+## 1. 部署环境准备
+在部署前,需确认以下步骤
+
+- 1. 软硬件环境满足要求,RKNPU2环境部署等参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
+
+## 2. 部署模型准备
+
+模型转换代码请参考[模型转换文档](../README.md)
+
+## 3. 运行部署示例
+
+本目录下提供`infer.py`快速完成PPYOLOE在RKNPU上部署的示例。执行如下脚本即可完成
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/rockchip/rknpu2/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/ppyoloe_plus_crn_s_80e_coco.zip
+unzip ppyoloe_plus_crn_s_80e_coco.zip
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 运行部署示例
+python3 infer.py --model_file ./ppyoloe_plus_crn_s_80e_coco/ppyoloe_plus_crn_s_80e_coco_rk3588_quantized.rknn \
+ --config_file ./ppyoloe_plus_crn_s_80e_coco/infer_cfg.yml \
+ --image_file 000000014439.jpg
+```
+
+# 4. 更多指南
+RKNPU上对模型的输入要求是使用NHWC格式,且图片归一化操作会在转RKNN模型时,内嵌到模型中,因此我们在使用FastDeploy部署时,需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。
+
+- [C++部署](../cpp)
+- [转换PaddleDetection RKNN模型文档](../README.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/rockchip/rknpu2/python/infer.py b/deploy/fastdeploy/rockchip/rknpu2/python/infer.py
new file mode 100644
index 00000000000..e5ac057ba08
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/python/infer.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_file",
+ default="./ppyoloe_plus_crn_s_80e_coco/ppyoloe_plus_crn_s_80e_coco_rk3588_quantized.rknn",
+ help="Path of rknn model.")
+ parser.add_argument(
+ "--config_file",
+ default="./ppyoloe_plus_crn_s_80e_coco/infer_cfg.yml",
+ help="Path of config.")
+ parser.add_argument(
+ "--image_file",
+ type=str,
+ default="./000000014439.jpg",
+ help="Path of test image file.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_arguments()
+
+ model_file = args.model_file
+ params_file = ""
+ config_file = args.config_file
+
+ # setup runtime
+ runtime_option = fd.RuntimeOption()
+ runtime_option.use_rknpu2()
+
+ model = fd.vision.detection.PPYOLOE(
+ model_file,
+ params_file,
+ config_file,
+ runtime_option=runtime_option,
+ model_format=fd.ModelFormat.RKNN)
+ model.preprocessor.disable_normalize()
+ model.preprocessor.disable_permute()
+ model.postprocessor.apply_nms()
+
+ # predict
+ im = cv2.imread(args.image_file)
+ result = model.predict(im)
+ print(result)
+
+ # visualize
+ vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+ cv2.imwrite("visualized_result.jpg", vis_im)
+ print("Visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/rockchip/rv1126/README.md b/deploy/fastdeploy/rockchip/rv1126/README.md
new file mode 100644
index 00000000000..02b41153274
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/README.md
@@ -0,0 +1,17 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection 检测模型在瑞芯微NPU上的部署方案-FastDeploy
+
+## 1. 说明
+本示例基于RV1126来介绍如何使用FastDeploy部署PaddleDetection模型,支持如下芯片的部署:
+- Rockchip RV1109
+- Rockchip RV1126
+- Rockchip RK1808
+
+模型的量化和量化模型的下载请参考:[模型量化](../../quantize/README.md)
+
+## 详细部署文档
+
+在 RV1126 上只支持 C++ 的部署。
+
+- [C++部署](cpp)
diff --git a/deploy/fastdeploy/rockchip/rv1126/cpp/CMakeLists.txt b/deploy/fastdeploy/rockchip/rv1126/cpp/CMakeLists.txt
new file mode 100755
index 00000000000..af493f6b67d
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/cpp/CMakeLists.txt
@@ -0,0 +1,27 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+include_directories(${FastDeploy_INCLUDE_DIRS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+
+set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
+
+install(TARGETS infer_demo DESTINATION ./)
+
+install(DIRECTORY models DESTINATION ./)
+install(DIRECTORY images DESTINATION ./)
+
+file(GLOB_RECURSE FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/lib*.so*)
+file(GLOB_RECURSE ALL_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/lib*.so*)
+list(APPEND ALL_LIBS ${FASTDEPLOY_LIBS})
+install(PROGRAMS ${ALL_LIBS} DESTINATION lib)
+
+file(GLOB ADB_TOOLS run_with_adb.sh)
+install(PROGRAMS ${ADB_TOOLS} DESTINATION ./)
diff --git a/deploy/fastdeploy/rockchip/rv1126/cpp/README.md b/deploy/fastdeploy/rockchip/rv1126/cpp/README.md
new file mode 100644
index 00000000000..811b8a1029c
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/cpp/README.md
@@ -0,0 +1,64 @@
+[English](README.md) | 简体中文
+# PaddleDetection 量化模型 RV1126 C++ 部署示例
+
+本目录下提供的 `infer.cc`,可以帮助用户快速完成 PP-YOLOE 量化模型在 RV1126 上的部署推理加速。
+
+## 1. 部署环境准备
+### 1.1 FastDeploy 交叉编译环境准备
+软硬件环境满足要求,以及交叉编译环境的准备,请参考:[瑞芯微RV1126部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 2. 部署模型准备
+1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
+2. 用户可以先使用 PaddleDetection 自行导出 Float32 模型,注意导出模型模型时设置参数:use_shared_conv=False,更多细节请参考:[PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe)
+3. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的检测模型仍然需要FP32模型文件夹下的 infer_cfg.yml 文件,自行量化的模型文件夹内不包含此 yaml 文件,用户从 FP32 模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。)
+4. 模型需要异构计算,异构计算文件可以参考:[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了模型,可以先测试我们提供的异构文件,验证精度是否符合要求。
+
+更多量化相关相关信息可查阅[模型量化](../../../quantize/README.md)
+
+## 3. 运行部署示例
+请按照以下步骤完成在 RV1126 上部署 PP-YOLOE 量化模型:
+1. 交叉编译编译 FastDeploy 库,具体请参考:[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/rv1126.md)
+
+2. 将编译后的库拷贝到当前目录,可使用如下命令:
+```bash
+cp -r FastDeploy/build/fastdeploy-timvx/ PaddleDetection/deploy/fastdeploy/rockchip/rv1126/cpp
+```
+
+3. 在当前路径下载部署所需的模型和示例图片:
+```bash
+cd PaddleDetection/deploy/fastdeploy/rockchip/rv1126/cpp
+mkdir models && mkdir images
+wget https://bj.bcebos.com/fastdeploy/models/ppyoloe_noshare_qat.tar.gz
+tar -xvf ppyoloe_noshare_qat.tar.gz
+cp -r ppyoloe_noshare_qat models
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+cp -r 000000014439.jpg images
+```
+
+4. 编译部署示例,可使入如下命令:
+```bash
+cd PaddleDetection/deploy/fastdeploy/rockchip/rv1126/cpp
+mkdir build && cd build
+cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=armhf ..
+make -j8
+make install
+# 成功编译之后,会生成 install 文件夹,里面有一个运行 demo 和部署所需的库
+```
+
+5. 基于 adb 工具部署 PP-YOLOE 检测模型到 Rockchip RV1126,可使用如下命令:
+```bash
+# 进入 install 目录
+cd PaddleDetection/deploy/fastdeploy/rockchip/rv1126/cpp/build/install/
+# 如下命令表示:bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
+bash run_with_adb.sh infer_demo ppyoloe_noshare_qat 000000014439.jpg $DEVICE_ID
+```
+
+部署成功后运行结果如下:
+
+
+
+需要特别注意的是,在 RV1126 上部署的模型需要是量化后的模型,模型的量化请参考:[模型量化](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
+
+## 4. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
diff --git a/deploy/fastdeploy/rockchip/rv1126/cpp/infer.cc b/deploy/fastdeploy/rockchip/rv1126/cpp/infer.cc
new file mode 100644
index 00000000000..d4a69b49238
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/cpp/infer.cc
@@ -0,0 +1,66 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+ auto subgraph_file = model_dir + sep + "subgraph.txt";
+ fastdeploy::vision::EnableFlyCV();
+ fastdeploy::RuntimeOption option;
+ option.UseTimVX();
+ option.paddle_lite_option.nnadapter_subgraph_partition_config_path =
+ subgraph_file
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ assert(model.Initialized());
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout << "Usage: infer_demo path/to/quant_model "
+ "path/to/image "
+ "e.g ./infer_demo ./PPYOLOE_L_quant ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+
+ std::string model_dir = argv[1];
+ std::string test_image = argv[2];
+ InitAndInfer(model_dir, test_image);
+ return 0;
+}
diff --git a/deploy/fastdeploy/rockchip/rv1126/cpp/run_with_adb.sh b/deploy/fastdeploy/rockchip/rv1126/cpp/run_with_adb.sh
new file mode 100755
index 00000000000..aacaed4c516
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/cpp/run_with_adb.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+HOST_SPACE=${PWD}
+echo ${HOST_SPACE}
+WORK_SPACE=/data/local/tmp/test
+
+# The first parameter represents the demo name
+DEMO_NAME=image_classification_demo
+if [ -n "$1" ]; then
+ DEMO_NAME=$1
+fi
+
+# The second parameter represents the model name
+MODEL_NAME=mobilenet_v1_fp32_224
+if [ -n "$2" ]; then
+ MODEL_NAME=$2
+fi
+
+# The third parameter indicates the name of the image to be tested
+IMAGE_NAME=0001.jpg
+if [ -n "$3" ]; then
+ IMAGE_NAME=$3
+fi
+
+# The fourth parameter represents the ID of the device
+ADB_DEVICE_NAME=
+if [ -n "$4" ]; then
+ ADB_DEVICE_NAME="-s $4"
+fi
+
+# Set the environment variables required during the running process
+EXPORT_ENVIRONMENT_VARIABLES="export GLOG_v=5; export VIV_VX_ENABLE_GRAPH_TRANSFORM=-pcq:1; export VIV_VX_SET_PER_CHANNEL_ENTROPY=100; export TIMVX_BATCHNORM_FUSION_MAX_ALLOWED_QUANT_SCALE_DEVIATION=300000; export VSI_NN_LOG_LEVEL=5;"
+
+EXPORT_ENVIRONMENT_VARIABLES="${EXPORT_ENVIRONMENT_VARIABLES}export LD_LIBRARY_PATH=${WORK_SPACE}/lib:\$LD_LIBRARY_PATH;"
+
+# Please install adb, and DON'T run this in the docker.
+set -e
+adb $ADB_DEVICE_NAME shell "rm -rf $WORK_SPACE"
+adb $ADB_DEVICE_NAME shell "mkdir -p $WORK_SPACE"
+
+# Upload the demo, librarys, model and test images to the device
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/lib $WORK_SPACE
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/${DEMO_NAME} $WORK_SPACE
+adb $ADB_DEVICE_NAME push models $WORK_SPACE
+adb $ADB_DEVICE_NAME push images $WORK_SPACE
+
+# Execute the deployment demo
+adb $ADB_DEVICE_NAME shell "cd $WORK_SPACE; ${EXPORT_ENVIRONMENT_VARIABLES} chmod +x ./${DEMO_NAME}; ./${DEMO_NAME} ./models/${MODEL_NAME} ./images/$IMAGE_NAME"
diff --git a/deploy/fastdeploy/serving/README.md b/deploy/fastdeploy/serving/README.md
new file mode 100644
index 00000000000..8e6bbc0631b
--- /dev/null
+++ b/deploy/fastdeploy/serving/README.md
@@ -0,0 +1,111 @@
+[English](README.md) | 简体中文
+# PaddleDetection 服务化部署示例
+
+本文档以PP-YOLOE模型(ppyoloe_crn_l_300e_coco)为例,进行详细介绍。其他PaddleDetection模型都已支持服务化部署,只需将下述命令中的模型和配置名字修改成要部署模型的名字。
+
+PaddleDetection模型导出和预训练模型下载请看[PaddleDetection模型部署](../README.md)文档。
+
+## 1. 部署环境准备
+在服务化部署前,需确认
+
+- 1. 服务化镜像的软硬件环境要求和镜像拉取命令请参考[FastDeploy服务化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README_CN.md)
+
+
+## 2. 启动服务
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/serving
+
+#下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 将配置文件放入预处理目录
+mv ppyoloe_crn_l_300e_coco/infer_cfg.yml models/preprocess/1/
+
+# 将模型放入 models/runtime/1目录下, 并重命名为model.pdmodel和model.pdiparams
+mv ppyoloe_crn_l_300e_coco/model.pdmodel models/runtime/1/model.pdmodel
+mv ppyoloe_crn_l_300e_coco/model.pdiparams models/runtime/1/model.pdiparams
+
+# 将ppdet和runtime中的ppyoloe配置文件重命名成标准的config名字
+# 其他模型比如faster_rcc就将faster_rcnn_config.pbtxt重命名为config.pbtxt
+cp models/ppdet/ppyoloe_config.pbtxt models/ppdet/config.pbtxt
+cp models/runtime/ppyoloe_runtime_config.pbtxt models/runtime/config.pbtxt
+
+# 注意: 由于mask_rcnn模型多一个输出,需要将后处理目录(models/postprocess)中的mask_config.pbtxt重命名为config.pbtxt
+
+# 拉取fastdeploy镜像(x.y.z为镜像版本号,需替换成fastdeploy版本数字)
+# GPU镜像
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+# CPU镜像
+docker pull paddlepaddle/fastdeploy:z.y.z-cpu-only-21.10
+
+# 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
+nvidia-docker run -it --net=host --name fd_serving --shm-size="1g" -v `pwd`/:/serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 bash
+
+# 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量,会拥有所有GPU卡的调度权限)
+CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models
+```
+>> **注意**:
+
+>> 由于mask_rcnn模型多一个输出,部署mask_rcnn需要将后处理目录(models/postprocess)中的mask_config.pbtxt重命名为config.pbtxt
+
+>> 拉取镜像请看[服务化部署主文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README_CN.md)
+
+>> 执行fastdeployserver启动服务出现"Address already in use", 请使用`--grpc-port`指定grpc端口号来启动服务,同时更改客户端示例中的请求端口号.
+
+>> 其他启动参数可以使用 fastdeployserver --help 查看
+
+服务启动成功后, 会有以下输出:
+```
+......
+I0928 04:51:15.784517 206 grpc_server.cc:4117] Started GRPCInferenceService at 0.0.0.0:8001
+I0928 04:51:15.785177 206 http_server.cc:2815] Started HTTPService at 0.0.0.0:8000
+I0928 04:51:15.826578 206 http_server.cc:167] Started Metrics Service at 0.0.0.0:8002
+```
+
+
+## 3. 客户端请求
+
+在物理机器中执行以下命令,发送grpc请求并输出结果
+```
+#下载测试图片
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+#安装客户端依赖
+python3 -m pip install tritonclient[all]
+
+# 发送请求
+python3 paddledet_grpc_client.py
+```
+
+发送请求成功后,会返回json格式的检测结果并打印输出:
+```
+output_name: DET_RESULT
+[[159.93016052246094, 82.35527038574219, 199.8546600341797, 164.68682861328125],
+... ...,
+[60.200584411621094, 123.73260498046875, 108.83859252929688, 169.07467651367188]]
+```
+
+## 4. 配置修改
+
+当前默认配置在GPU上运行Paddle引擎, 如果要在CPU或其他推理引擎上运行。 需要修改`models/runtime/config.pbtxt`中配置,详情请参考[配置文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/model_configuration.md)
+
+
+## 5. 使用VisualDL进行可视化部署
+
+可以使用VisualDL进行[Serving可视化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/vdl_management.md),上述启动服务、配置修改以及客户端请求的操作都可以基于VisualDL进行。
+
+通过VisualDL的可视化界面对PaddleDetection进行服务化部署只需要如下三步:
+```text
+1. 载入模型库:./vision/detection/paddledetection/serving/models
+2. 下载模型资源文件:点击preprocess模型,点击版本号1添加预训练模型,选择检测模型ppyoloe_crn_l_300e_coco进行下载,此时preprocess中将会有资源文件infer_cfg.yml。点击runtime模型,点击版本号1添加预训练模型,选择检测模型ppyoloe_crn_l_300e_coco进行下载,此时runtime中将会有资源文件model.pdmodel和model.pdiparams。
+3. 设置启动配置文件:点击ensemble配置按钮,选择配置文件ppyoloe_config.pbtxt,并设为启动配置文件。点击runtime模型,选择配置文件ppyoloe_runtime_config.pbtxt,并设为启动配置文件。
+4. 启动服务:点击启动服务按钮,输入启动参数。
+```
+
+
+
diff --git a/deploy/fastdeploy/serving/models/postprocess/1/model.py b/deploy/fastdeploy/serving/models/postprocess/1/model.py
new file mode 100644
index 00000000000..35054e51657
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/postprocess/1/model.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+import time
+
+import fastdeploy as fd
+
+# triton_python_backend_utils is available in every Triton Python model. You
+# need to use this module to create inference requests and responses. It also
+# contains some utility functions for extracting information from model_config
+# and converting Triton input/output types to numpy types.
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+ """Your Python model must use the same class name. Every Python model
+ that is created must have "TritonPythonModel" as the class name.
+ """
+
+ def initialize(self, args):
+ """`initialize` is called only once when the model is being loaded.
+ Implementing `initialize` function is optional. This function allows
+ the model to intialize any state associated with this model.
+ Parameters
+ ----------
+ args : dict
+ Both keys and values are strings. The dictionary keys and values are:
+ * model_config: A JSON string containing the model configuration
+ * model_instance_kind: A string containing model instance kind
+ * model_instance_device_id: A string containing model instance device ID
+ * model_repository: Model repository path
+ * model_version: Model version
+ * model_name: Model name
+ """
+ # You must parse model_config. JSON string is not parsed here
+ self.model_config = json.loads(args['model_config'])
+ print("model_config:", self.model_config)
+
+ self.input_names = []
+ for input_config in self.model_config["input"]:
+ self.input_names.append(input_config["name"])
+ print("postprocess input names:", self.input_names)
+
+ self.output_names = []
+ self.output_dtype = []
+ for output_config in self.model_config["output"]:
+ self.output_names.append(output_config["name"])
+ dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+ self.output_dtype.append(dtype)
+ print("postprocess output names:", self.output_names)
+
+ self.postprocess_ = fd.vision.detection.PaddleDetPostprocessor()
+
+ def execute(self, requests):
+ """`execute` must be implemented in every Python model. `execute`
+ function receives a list of pb_utils.InferenceRequest as the only
+ argument. This function is called when an inference is requested
+ for this model. Depending on the batching configuration (e.g. Dynamic
+ Batching) used, `requests` may contain multiple requests. Every
+ Python model, must create one pb_utils.InferenceResponse for every
+ pb_utils.InferenceRequest in `requests`. If there is an error, you can
+ set the error argument when creating a pb_utils.InferenceResponse.
+ Parameters
+ ----------
+ requests : list
+ A list of pb_utils.InferenceRequest
+ Returns
+ -------
+ list
+ A list of pb_utils.InferenceResponse. The length of this list must
+ be the same as `requests`
+ """
+ responses = []
+ for request in requests:
+ infer_outputs = []
+ for name in self.input_names:
+ infer_output = pb_utils.get_input_tensor_by_name(request, name)
+ if infer_output:
+ infer_output = infer_output.as_numpy()
+ infer_outputs.append(infer_output)
+
+ results = self.postprocess_.run(infer_outputs)
+ r_str = fd.vision.utils.fd_result_to_json(results)
+
+ r_np = np.array(r_str, dtype=np.object_)
+ out_tensor = pb_utils.Tensor(self.output_names[0], r_np)
+ inference_response = pb_utils.InferenceResponse(
+ output_tensors=[out_tensor, ])
+ responses.append(inference_response)
+ return responses
+
+ def finalize(self):
+ """`finalize` is called only once when the model is being unloaded.
+ Implementing `finalize` function is optional. This function allows
+ the model to perform any necessary clean ups before exit.
+ """
+ print('Cleaning up...')
diff --git a/deploy/fastdeploy/serving/models/postprocess/config.pbtxt b/deploy/fastdeploy/serving/models/postprocess/config.pbtxt
new file mode 100644
index 00000000000..bb09e32c6d7
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/postprocess/config.pbtxt
@@ -0,0 +1,30 @@
+name: "postprocess"
+backend: "python"
+
+input [
+ {
+ name: "post_input1"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "post_input2"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ }
+]
+
+output [
+ {
+ name: "post_output"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_CPU
+ }
+]
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/postprocess/mask_config.pbtxt b/deploy/fastdeploy/serving/models/postprocess/mask_config.pbtxt
new file mode 100644
index 00000000000..8985cc78a24
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/postprocess/mask_config.pbtxt
@@ -0,0 +1,34 @@
+backend: "python"
+
+input [
+ {
+ name: "post_input1"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "post_input2"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ },
+ {
+ name: "post_input3"
+ data_type: TYPE_INT32
+ dims: [ -1, -1, -1 ]
+ }
+]
+
+output [
+ {
+ name: "post_output"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_CPU
+ }
+]
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/ppdet/1/README.md b/deploy/fastdeploy/serving/models/ppdet/1/README.md
new file mode 100644
index 00000000000..877efdf8de7
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/1/README.md
@@ -0,0 +1,3 @@
+# PaddleDetection Pipeline
+
+The pipeline directory does not have model files, but a version number directory needs to be maintained.
diff --git a/deploy/fastdeploy/serving/models/ppdet/faster_rcnn_config.pbtxt b/deploy/fastdeploy/serving/models/ppdet/faster_rcnn_config.pbtxt
new file mode 100644
index 00000000000..91d132b9adc
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/faster_rcnn_config.pbtxt
@@ -0,0 +1,80 @@
+platform: "ensemble"
+
+input [
+ {
+ name: "INPUT"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+output [
+ {
+ name: "DET_RESULT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+ensemble_scheduling {
+ step [
+ {
+ model_name: "preprocess"
+ model_version: 1
+ input_map {
+ key: "preprocess_input"
+ value: "INPUT"
+ }
+ output_map {
+ key: "preprocess_output1"
+ value: "RUNTIME_INPUT1"
+ }
+ output_map {
+ key: "preprocess_output2"
+ value: "RUNTIME_INPUT2"
+ }
+ output_map {
+ key: "preprocess_output3"
+ value: "RUNTIME_INPUT3"
+ }
+ },
+ {
+ model_name: "runtime"
+ model_version: 1
+ input_map {
+ key: "image"
+ value: "RUNTIME_INPUT1"
+ }
+ input_map {
+ key: "scale_factor"
+ value: "RUNTIME_INPUT2"
+ }
+ input_map {
+ key: "im_shape"
+ value: "RUNTIME_INPUT3"
+ }
+ output_map {
+ key: "concat_12.tmp_0"
+ value: "RUNTIME_OUTPUT1"
+ }
+ output_map {
+ key: "concat_8.tmp_0"
+ value: "RUNTIME_OUTPUT2"
+ }
+ },
+ {
+ model_name: "postprocess"
+ model_version: 1
+ input_map {
+ key: "post_input1"
+ value: "RUNTIME_OUTPUT1"
+ }
+ input_map {
+ key: "post_input2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ output_map {
+ key: "post_output"
+ value: "DET_RESULT"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/ppdet/mask_rcnn_config.pbtxt b/deploy/fastdeploy/serving/models/ppdet/mask_rcnn_config.pbtxt
new file mode 100644
index 00000000000..b0ee4e092af
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/mask_rcnn_config.pbtxt
@@ -0,0 +1,88 @@
+platform: "ensemble"
+
+input [
+ {
+ name: "INPUT"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+output [
+ {
+ name: "DET_RESULT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+ensemble_scheduling {
+ step [
+ {
+ model_name: "preprocess"
+ model_version: 1
+ input_map {
+ key: "preprocess_input"
+ value: "INPUT"
+ }
+ output_map {
+ key: "preprocess_output1"
+ value: "RUNTIME_INPUT1"
+ }
+ output_map {
+ key: "preprocess_output2"
+ value: "RUNTIME_INPUT2"
+ }
+ output_map {
+ key: "preprocess_output3"
+ value: "RUNTIME_INPUT3"
+ }
+ },
+ {
+ model_name: "runtime"
+ model_version: 1
+ input_map {
+ key: "image"
+ value: "RUNTIME_INPUT1"
+ }
+ input_map {
+ key: "scale_factor"
+ value: "RUNTIME_INPUT2"
+ }
+ input_map {
+ key: "im_shape"
+ value: "RUNTIME_INPUT3"
+ }
+ output_map {
+ key: "concat_9.tmp_0"
+ value: "RUNTIME_OUTPUT1"
+ }
+ output_map {
+ key: "concat_5.tmp_0"
+ value: "RUNTIME_OUTPUT2"
+ },
+ output_map {
+ key: "tmp_109"
+ value: "RUNTIME_OUTPUT3"
+ }
+ },
+ {
+ model_name: "postprocess"
+ model_version: 1
+ input_map {
+ key: "post_input1"
+ value: "RUNTIME_OUTPUT1"
+ }
+ input_map {
+ key: "post_input2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ input_map {
+ key: "post_input3"
+ value: "RUNTIME_OUTPUT3"
+ }
+ output_map {
+ key: "post_output"
+ value: "DET_RESULT"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/ppdet/ppyolo_config.pbtxt b/deploy/fastdeploy/serving/models/ppdet/ppyolo_config.pbtxt
new file mode 100644
index 00000000000..f7c1fe6121b
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/ppyolo_config.pbtxt
@@ -0,0 +1,80 @@
+platform: "ensemble"
+
+input [
+ {
+ name: "INPUT"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+output [
+ {
+ name: "DET_RESULT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+ensemble_scheduling {
+ step [
+ {
+ model_name: "preprocess"
+ model_version: 1
+ input_map {
+ key: "preprocess_input"
+ value: "INPUT"
+ }
+ output_map {
+ key: "preprocess_output1"
+ value: "RUNTIME_INPUT1"
+ }
+ output_map {
+ key: "preprocess_output2"
+ value: "RUNTIME_INPUT2"
+ }
+ output_map {
+ key: "preprocess_output3"
+ value: "RUNTIME_INPUT3"
+ }
+ },
+ {
+ model_name: "runtime"
+ model_version: 1
+ input_map {
+ key: "image"
+ value: "RUNTIME_INPUT1"
+ }
+ input_map {
+ key: "scale_factor"
+ value: "RUNTIME_INPUT2"
+ }
+ input_map {
+ key: "im_shape"
+ value: "RUNTIME_INPUT3"
+ }
+ output_map {
+ key: "matrix_nms_0.tmp_0"
+ value: "RUNTIME_OUTPUT1"
+ }
+ output_map {
+ key: "matrix_nms_0.tmp_2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ },
+ {
+ model_name: "postprocess"
+ model_version: 1
+ input_map {
+ key: "post_input1"
+ value: "RUNTIME_OUTPUT1"
+ }
+ input_map {
+ key: "post_input2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ output_map {
+ key: "post_output"
+ value: "DET_RESULT"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/ppdet/ppyoloe_config.pbtxt b/deploy/fastdeploy/serving/models/ppdet/ppyoloe_config.pbtxt
new file mode 100644
index 00000000000..3cb479b46f5
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/ppyoloe_config.pbtxt
@@ -0,0 +1,72 @@
+platform: "ensemble"
+
+input [
+ {
+ name: "INPUT"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+output [
+ {
+ name: "DET_RESULT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+ensemble_scheduling {
+ step [
+ {
+ model_name: "preprocess"
+ model_version: 1
+ input_map {
+ key: "preprocess_input"
+ value: "INPUT"
+ }
+ output_map {
+ key: "preprocess_output1"
+ value: "RUNTIME_INPUT1"
+ }
+ output_map {
+ key: "preprocess_output2"
+ value: "RUNTIME_INPUT2"
+ }
+ },
+ {
+ model_name: "runtime"
+ model_version: 1
+ input_map {
+ key: "image"
+ value: "RUNTIME_INPUT1"
+ }
+ input_map {
+ key: "scale_factor"
+ value: "RUNTIME_INPUT2"
+ }
+ output_map {
+ key: "multiclass_nms3_0.tmp_0"
+ value: "RUNTIME_OUTPUT1"
+ }
+ output_map {
+ key: "multiclass_nms3_0.tmp_2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ },
+ {
+ model_name: "postprocess"
+ model_version: 1
+ input_map {
+ key: "post_input1"
+ value: "RUNTIME_OUTPUT1"
+ }
+ input_map {
+ key: "post_input2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ output_map {
+ key: "post_output"
+ value: "DET_RESULT"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/preprocess/1/model.py b/deploy/fastdeploy/serving/models/preprocess/1/model.py
new file mode 100644
index 00000000000..2ea72054de3
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/preprocess/1/model.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+import os
+
+import fastdeploy as fd
+
+# triton_python_backend_utils is available in every Triton Python model. You
+# need to use this module to create inference requests and responses. It also
+# contains some utility functions for extracting information from model_config
+# and converting Triton input/output types to numpy types.
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+ """Your Python model must use the same class name. Every Python model
+ that is created must have "TritonPythonModel" as the class name.
+ """
+
+ def initialize(self, args):
+ """`initialize` is called only once when the model is being loaded.
+ Implementing `initialize` function is optional. This function allows
+ the model to intialize any state associated with this model.
+ Parameters
+ ----------
+ args : dict
+ Both keys and values are strings. The dictionary keys and values are:
+ * model_config: A JSON string containing the model configuration
+ * model_instance_kind: A string containing model instance kind
+ * model_instance_device_id: A string containing model instance device ID
+ * model_repository: Model repository path
+ * model_version: Model version
+ * model_name: Model name
+ """
+ # You must parse model_config. JSON string is not parsed here
+ self.model_config = json.loads(args['model_config'])
+ print("model_config:", self.model_config)
+
+ self.input_names = []
+ for input_config in self.model_config["input"]:
+ self.input_names.append(input_config["name"])
+ print("preprocess input names:", self.input_names)
+
+ self.output_names = []
+ self.output_dtype = []
+ for output_config in self.model_config["output"]:
+ self.output_names.append(output_config["name"])
+ # dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+ # self.output_dtype.append(dtype)
+ self.output_dtype.append(output_config["data_type"])
+ print("preprocess output names:", self.output_names)
+
+ # init PaddleClasPreprocess class
+ yaml_path = os.path.abspath(os.path.dirname(
+ __file__)) + "/infer_cfg.yml"
+ self.preprocess_ = fd.vision.detection.PaddleDetPreprocessor(yaml_path)
+
+ def execute(self, requests):
+ """`execute` must be implemented in every Python model. `execute`
+ function receives a list of pb_utils.InferenceRequest as the only
+ argument. This function is called when an inference is requested
+ for this model. Depending on the batching configuration (e.g. Dynamic
+ Batching) used, `requests` may contain multiple requests. Every
+ Python model, must create one pb_utils.InferenceResponse for every
+ pb_utils.InferenceRequest in `requests`. If there is an error, you can
+ set the error argument when creating a pb_utils.InferenceResponse.
+ Parameters
+ ----------
+ requests : list
+ A list of pb_utils.InferenceRequest
+ Returns
+ -------
+ list
+ A list of pb_utils.InferenceResponse. The length of this list must
+ be the same as `requests`
+ """
+ responses = []
+ for request in requests:
+ data = pb_utils.get_input_tensor_by_name(request,
+ self.input_names[0])
+ data = data.as_numpy()
+ outputs = self.preprocess_.run(data)
+
+ output_tensors = []
+ for idx, name in enumerate(self.output_names):
+ dlpack_tensor = outputs[idx].to_dlpack()
+ output_tensor = pb_utils.Tensor.from_dlpack(name,
+ dlpack_tensor)
+ output_tensors.append(output_tensor)
+
+ inference_response = pb_utils.InferenceResponse(
+ output_tensors=output_tensors)
+ responses.append(inference_response)
+ return responses
+
+ def finalize(self):
+ """`finalize` is called only once when the model is being unloaded.
+ Implementing `finalize` function is optional. This function allows
+ the model to perform any necessary clean ups before exit.
+ """
+ print('Cleaning up...')
diff --git a/deploy/fastdeploy/serving/models/preprocess/config.pbtxt b/deploy/fastdeploy/serving/models/preprocess/config.pbtxt
new file mode 100644
index 00000000000..39a42113bce
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/preprocess/config.pbtxt
@@ -0,0 +1,35 @@
+name: "preprocess"
+backend: "python"
+
+input [
+ {
+ name: "preprocess_input"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+
+output [
+ {
+ name: "preprocess_output1"
+ data_type: TYPE_FP32
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "preprocess_output2"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ },
+ {
+ name: "preprocess_output3"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_CPU
+ }
+]
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/runtime/1/README.md b/deploy/fastdeploy/serving/models/runtime/1/README.md
new file mode 100644
index 00000000000..1e5d914b439
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/1/README.md
@@ -0,0 +1,5 @@
+# Runtime Directory
+
+This directory holds the model files.
+Paddle models must be model.pdmodel and model.pdiparams files.
+ONNX models must be model.onnx files.
diff --git a/deploy/fastdeploy/serving/models/runtime/faster_rcnn_runtime_config.pbtxt b/deploy/fastdeploy/serving/models/runtime/faster_rcnn_runtime_config.pbtxt
new file mode 100644
index 00000000000..9f4b9833e82
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/faster_rcnn_runtime_config.pbtxt
@@ -0,0 +1,58 @@
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+ {
+ # input name
+ name: "image"
+ # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+ data_type: TYPE_FP32
+ # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "scale_factor"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ },
+ {
+ name: "im_shape"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+ {
+ name: "concat_12.tmp_0"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "concat_8.tmp_0"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ }
+]
+
+# Number of instances of the model
+instance_group [
+ {
+ # The number of instances is 1
+ count: 1
+ # Use GPU, CPU inference option is:KIND_CPU
+ kind: KIND_GPU
+ # The instance is deployed on the 0th GPU card
+ gpus: [0]
+ }
+]
+
+optimization {
+ execution_accelerators {
+ gpu_execution_accelerator : [ {
+ # use Paddle engine
+ name: "paddle",
+ }
+ ]
+}}
diff --git a/deploy/fastdeploy/serving/models/runtime/mask_rcnn_runtime_config.pbtxt b/deploy/fastdeploy/serving/models/runtime/mask_rcnn_runtime_config.pbtxt
new file mode 100644
index 00000000000..13fdd5b41de
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/mask_rcnn_runtime_config.pbtxt
@@ -0,0 +1,63 @@
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+ {
+ # input name
+ name: "image"
+ # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+ data_type: TYPE_FP32
+ # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "scale_factor"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ },
+ {
+ name: "im_shape"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+ {
+ name: "concat_9.tmp_0"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "concat_5.tmp_0"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ },
+ {
+ name: "tmp_109"
+ data_type: TYPE_INT32
+ dims: [ -1, -1, -1 ]
+ }
+]
+
+# Number of instances of the model
+instance_group [
+ {
+ # The number of instances is 1
+ count: 1
+ # Use GPU, CPU inference option is:KIND_CPU
+ kind: KIND_GPU
+ # The instance is deployed on the 0th GPU card
+ gpus: [0]
+ }
+]
+
+optimization {
+ execution_accelerators {
+ gpu_execution_accelerator : [ {
+ # use Paddle engine
+ name: "paddle",
+ }
+ ]
+}}
diff --git a/deploy/fastdeploy/serving/models/runtime/ppyolo_runtime_config.pbtxt b/deploy/fastdeploy/serving/models/runtime/ppyolo_runtime_config.pbtxt
new file mode 100644
index 00000000000..0f7b6330846
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/ppyolo_runtime_config.pbtxt
@@ -0,0 +1,58 @@
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+ {
+ # input name
+ name: "image"
+ # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+ data_type: TYPE_FP32
+ # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "scale_factor"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ },
+ {
+ name: "im_shape"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+ {
+ name: "matrix_nms_0.tmp_0"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "matrix_nms_0.tmp_2"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ }
+]
+
+# Number of instances of the model
+instance_group [
+ {
+ # The number of instances is 1
+ count: 1
+ # Use GPU, CPU inference option is:KIND_CPU
+ kind: KIND_GPU
+ # The instance is deployed on the 0th GPU card
+ gpus: [0]
+ }
+]
+
+optimization {
+ execution_accelerators {
+ gpu_execution_accelerator : [ {
+ # use Paddle engine
+ name: "paddle",
+ }
+ ]
+}}
diff --git a/deploy/fastdeploy/serving/models/runtime/ppyoloe_runtime_config.pbtxt b/deploy/fastdeploy/serving/models/runtime/ppyoloe_runtime_config.pbtxt
new file mode 100644
index 00000000000..dc8d15845ce
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/ppyoloe_runtime_config.pbtxt
@@ -0,0 +1,55 @@
+# optional, If name is specified it must match the name of the model repository directory containing the model.
+name: "runtime"
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+ {
+ # input name
+ name: "image"
+ # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+ data_type: TYPE_FP32
+ # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "scale_factor"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+ {
+ name: "multiclass_nms3_0.tmp_0"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "multiclass_nms3_0.tmp_2"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ }
+]
+
+# Number of instances of the model
+instance_group [
+ {
+ # The number of instances is 1
+ count: 1
+ # Use GPU, CPU inference option is:KIND_CPU
+ kind: KIND_GPU
+ # The instance is deployed on the 0th GPU card
+ gpus: [0]
+ }
+]
+
+optimization {
+ execution_accelerators {
+ gpu_execution_accelerator : [ {
+ # use Paddle engine
+ name: "paddle",
+ }
+ ]
+}}
diff --git a/deploy/fastdeploy/serving/paddledet_grpc_client.py b/deploy/fastdeploy/serving/paddledet_grpc_client.py
new file mode 100644
index 00000000000..84223949678
--- /dev/null
+++ b/deploy/fastdeploy/serving/paddledet_grpc_client.py
@@ -0,0 +1,109 @@
+import logging
+import numpy as np
+import time
+from typing import Optional
+import cv2
+import json
+
+from tritonclient import utils as client_utils
+from tritonclient.grpc import InferenceServerClient, InferInput, InferRequestedOutput, service_pb2_grpc, service_pb2
+
+LOGGER = logging.getLogger("run_inference_on_triton")
+
+
+class SyncGRPCTritonRunner:
+ DEFAULT_MAX_RESP_WAIT_S = 120
+
+ def __init__(
+ self,
+ server_url: str,
+ model_name: str,
+ model_version: str,
+ *,
+ verbose=False,
+ resp_wait_s: Optional[float]=None, ):
+ self._server_url = server_url
+ self._model_name = model_name
+ self._model_version = model_version
+ self._verbose = verbose
+ self._response_wait_t = self.DEFAULT_MAX_RESP_WAIT_S if resp_wait_s is None else resp_wait_s
+
+ self._client = InferenceServerClient(
+ self._server_url, verbose=self._verbose)
+ error = self._verify_triton_state(self._client)
+ if error:
+ raise RuntimeError(
+ f"Could not communicate to Triton Server: {error}")
+
+ LOGGER.debug(
+ f"Triton server {self._server_url} and model {self._model_name}:{self._model_version} "
+ f"are up and ready!")
+
+ model_config = self._client.get_model_config(self._model_name,
+ self._model_version)
+ model_metadata = self._client.get_model_metadata(self._model_name,
+ self._model_version)
+ LOGGER.info(f"Model config {model_config}")
+ LOGGER.info(f"Model metadata {model_metadata}")
+
+ for tm in model_metadata.inputs:
+ print("tm:", tm)
+ self._inputs = {tm.name: tm for tm in model_metadata.inputs}
+ self._input_names = list(self._inputs)
+ self._outputs = {tm.name: tm for tm in model_metadata.outputs}
+ self._output_names = list(self._outputs)
+ self._outputs_req = [
+ InferRequestedOutput(name) for name in self._outputs
+ ]
+
+ def Run(self, inputs):
+ """
+ Args:
+ inputs: list, Each value corresponds to an input name of self._input_names
+ Returns:
+ results: dict, {name : numpy.array}
+ """
+ infer_inputs = []
+ for idx, data in enumerate(inputs):
+ infer_input = InferInput(self._input_names[idx], data.shape,
+ "UINT8")
+ infer_input.set_data_from_numpy(data)
+ infer_inputs.append(infer_input)
+
+ results = self._client.infer(
+ model_name=self._model_name,
+ model_version=self._model_version,
+ inputs=infer_inputs,
+ outputs=self._outputs_req,
+ client_timeout=self._response_wait_t, )
+ results = {name: results.as_numpy(name) for name in self._output_names}
+ return results
+
+ def _verify_triton_state(self, triton_client):
+ if not triton_client.is_server_live():
+ return f"Triton server {self._server_url} is not live"
+ elif not triton_client.is_server_ready():
+ return f"Triton server {self._server_url} is not ready"
+ elif not triton_client.is_model_ready(self._model_name,
+ self._model_version):
+ return f"Model {self._model_name}:{self._model_version} is not ready"
+ return None
+
+
+if __name__ == "__main__":
+ model_name = "ppdet"
+ model_version = "1"
+ url = "localhost:8001"
+ runner = SyncGRPCTritonRunner(url, model_name, model_version)
+ im = cv2.imread("000000014439.jpg")
+ im = np.array([im, ])
+ # batch input
+ # im = np.array([im, im, im])
+ for i in range(1):
+ result = runner.Run([im, ])
+ for name, values in result.items():
+ print("output_name:", name)
+ # values is batch
+ for value in values:
+ value = json.loads(value)
+ print(value['boxes'])
diff --git a/deploy/fastdeploy/sophgo/README.md b/deploy/fastdeploy/sophgo/README.md
new file mode 100644
index 00000000000..1da9a4b2247
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/README.md
@@ -0,0 +1,108 @@
+# PaddleDetection SOPHGO部署示例
+
+## 1. 支持模型列表
+
+目前SOPHGO支持如下模型的部署
+- [PP-YOLOE系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe)
+- [PicoDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/picodet)
+- [YOLOV8系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4)
+
+## 2. 准备PP-YOLOE YOLOV8或者PicoDet部署模型以及转换模型
+
+SOPHGO-TPU部署模型前需要将Paddle模型转换成bmodel模型,具体步骤如下:
+- Paddle动态图模型转换为ONNX模型,请参考[PaddleDetection导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/EXPORT_MODEL.md).
+- ONNX模型转换bmodel模型的过程,请参考[TPU-MLIR](https://github.com/sophgo/tpu-mlir)
+
+## 3. 模型转换example
+
+PP-YOLOE YOLOV8和PicoDet模型转换过程类似,下面以ppyoloe_crn_s_300e_coco为例子,教大家如何转换Paddle模型到SOPHGO-TPU模型
+
+### 导出ONNX模型
+```shell
+#导出paddle模型
+python tools/export_model.py -c configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml --output_dir=output_inference -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_300e_coco.pdparams
+
+#paddle模型转ONNX模型
+paddle2onnx --model_dir ppyoloe_crn_s_300e_coco \
+ --model_filename model.pdmodel \
+ --params_filename model.pdiparams \
+ --save_file ppyoloe_crn_s_300e_coco.onnx \
+ --enable_dev_version True
+
+#进入Paddle2ONNX文件夹,固定ONNX模型shape
+python -m paddle2onnx.optimize --input_model ppyoloe_crn_s_300e_coco.onnx \
+ --output_model ppyoloe_crn_s_300e_coco.onnx \
+ --input_shape_dict "{'image':[1,3,640,640]}"
+
+```
+### 导出bmodel模型
+
+以转化BM1684x的bmodel模型为例子,我们需要下载[TPU-MLIR](https://github.com/sophgo/tpu-mlir)工程,安装过程具体参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
+## 4. 安装
+``` shell
+docker pull sophgo/tpuc_dev:latest
+
+# myname1234是一个示例,也可以设置其他名字
+docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest
+
+source ./envsetup.sh
+./build.sh
+```
+
+## 5. ONNX模型转换为bmodel模型
+``` shell
+mkdir ppyoloe_crn_s_300e_coco && cd ppyoloe_crn_s_300e_coco
+
+# 下载测试图片,并将图片转换为npz格式
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+#使用python获得模型转换所需要的npz文件
+im = cv2.imread(im)
+im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+#[640 640]为ppyoloe_crn_s_300e_coco的输入大小
+im_scale_y = 640 / float(im.shape[0])
+im_scale_x = 640 / float(im.shape[1])
+inputs = {}
+inputs['image'] = np.array((im, )).astype('float32')
+inputs['scale_factor'] = np.array([im_scale_y, im_scale_x]).astype('float32')
+np.savez('inputs.npz', image = inputs['image'], scale_factor = inputs['scale_factor'])
+
+#放入onnx模型文件ppyoloe_crn_s_300e_coco.onnx
+
+mkdir workspace && cd workspace
+
+# 将ONNX模型转换为mlir模型
+model_transform.py \
+ --model_name ppyoloe_crn_s_300e_coco \
+ --model_def ../ppyoloe_crn_s_300e_coco.onnx \
+ --input_shapes [[1,3,640,640],[1,2]] \
+ --keep_aspect_ratio \
+ --pixel_format rgb \
+ --output_names p2o.Div.1,p2o.Concat.29 \
+ --test_input ../inputs.npz \
+ --test_result ppyoloe_crn_s_300e_coco_top_outputs.npz \
+ --mlir ppyoloe_crn_s_300e_coco.mlir
+```
+## 6. 注意
+**由于TPU-MLIR当前不支持后处理算法,所以需要查看后处理的输入作为网络的输出**
+具体方法为:output_names需要通过[NETRO](https://netron.app/) 查看,网页中打开需要转换的ONNX模型,搜索NonMaxSuppression节点
+查看INPUTS中boxes和scores的名字,这个两个名字就是我们所需的output_names
+例如使用Netron可视化后,可以得到如下图片
+
+找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.1,p2o.Concat.29
+
+``` bash
+# 将mlir模型转换为BM1684x的F32 bmodel模型
+model_deploy.py \
+ --mlir ppyoloe_crn_s_300e_coco.mlir \
+ --quantize F32 \
+ --chip bm1684x \
+ --test_input ppyoloe_crn_s_300e_coco_in_f32.npz \
+ --test_reference ppyoloe_crn_s_300e_coco_top_outputs.npz \
+ --model ppyoloe_crn_s_300e_coco_1684x_f32.bmodel
+```
+最终获得可以在BM1684x上能够运行的bmodel模型ppyoloe_crn_s_300e_coco_1684x_f32.bmodel。如果需要进一步对模型进行加速,可以将ONNX模型转换为INT8 bmodel,具体步骤参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
+
+## 7. 详细的部署示例
+- [Cpp部署](./cpp)
+- [python部署](./python)
diff --git a/deploy/fastdeploy/sophgo/cpp/CMakeLists.txt b/deploy/fastdeploy/sophgo/cpp/CMakeLists.txt
new file mode 100644
index 00000000000..f2749337284
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+set(ENABLE_LITE_BACKEND OFF)
+#set(FDLIB ${FASTDEPLOY_INSTALL_DIR})
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+include_directories(${FastDeploy_INCLUDE_DIRS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/sophgo/cpp/README.md b/deploy/fastdeploy/sophgo/cpp/README.md
new file mode 100644
index 00000000000..44179ddffe6
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/cpp/README.md
@@ -0,0 +1,57 @@
+# PaddleDetection 算能 C++部署示例
+
+本目录下提供`infer.cc`,`快速完成 PP-YOLOE ,在SOPHGO BM1684x板子上加速部署的示例。PP-YOLOV8和 PicoDet的部署逻辑类似,只需要切换模型即可。
+
+## 1. 部署环境准备
+在部署前,需自行编译基于算能硬件的预测库,参考文档[算能硬件部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#算能硬件部署环境)
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 3. 生成基本目录文件
+
+该例程由以下几个部分组成
+```text
+.
+├── CMakeLists.txt
+├── fastdeploy-sophgo # 编译文件夹
+├── image # 存放图片的文件夹
+├── infer.cc
+└── model # 存放模型文件的文件夹
+```
+
+## 4. 运行部署示例
+
+### 4.1 编译并拷贝SDK到thirdpartys文件夹
+
+请参考[SOPHGO部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)仓库编译SDK,编译完成后,将在build目录下生成fastdeploy-sophgo目录.
+
+### 4.2 拷贝模型文件,以及配置文件至model文件夹
+将Paddle模型转换为SOPHGO bmodel模型,转换步骤参考[文档](../README.md)
+将转换后的SOPHGO bmodel模型文件拷贝至model中
+
+### 4.3 准备测试图片至image文件夹
+```bash
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+cp 000000014439.jpg ./images
+```
+
+### 4.4 编译example
+
+```bash
+cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-sophgo
+make
+```
+
+## 4.5 运行例程
+
+```bash
+#ppyoloe推理示例
+./infer_demo model images/000000014439.jpg
+```
+
+## 5. 更多指南
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+- [模型转换](../README.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/sophgo/cpp/infer.cc b/deploy/fastdeploy/sophgo/cpp/infer.cc
new file mode 100644
index 00000000000..6ee6aeb1c44
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/cpp/infer.cc
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include
+
+#include
+#include
+
+#include "fastdeploy/vision.h"
+
+void SophgoInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + "/ppyoloe_crn_s_300e_coco_1684x_f32.bmodel";
+ auto params_file = "";
+ auto config_file = model_dir + "/infer_cfg.yml";
+
+ auto option = fastdeploy::RuntimeOption();
+ option.UseSophgo();
+
+ auto format = fastdeploy::ModelFormat::SOPHGO;
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(
+ model_file, params_file, config_file, option, format);
+
+ model.GetPostprocessor().ApplyNMS();
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(&im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("infer_sophgo.jpg", vis_im);
+ std::cout << "Visualized result saved in ./infer_sophgo.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image, "
+ "e.g ./infer_demo ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+ SophgoInfer(argv[1], argv[2]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/sophgo/python/README.md b/deploy/fastdeploy/sophgo/python/README.md
new file mode 100644
index 00000000000..e8a1f59833a
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/python/README.md
@@ -0,0 +1,30 @@
+# PaddleDetection Python部署示例
+
+## 1. 部署环境准备
+
+在部署前,需自行编译基于算能硬件的FastDeploy python wheel包并安装,参考文档[算能硬件部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#算能硬件部署环境)
+
+本目录下提供`infer.py`, 快速完成 PP-YOLOE ,在SOPHGO TPU上部署的示例,执行如下脚本即可完成。PP-YOLOV8和 PicoDet的部署逻辑类似,只需要切换模型即可。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/sophgo/python
+
+# 下载图片
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 推理
+#ppyoloe推理示例
+python3 infer.py --model_file model/ppyoloe_crn_s_300e_coco_1684x_f32.bmodel --config_file model/infer_cfg.yml --image_file ./000000014439.jpg
+
+# 运行完成后返回结果如下所示
+可视化结果存储在sophgo_result.jpg中
+```
+
+## 3. 更多指南
+- [C++部署](../cpp)
+- [转换PP-YOLOE SOPHGO模型文档](../README.md)
diff --git a/deploy/fastdeploy/sophgo/python/infer.py b/deploy/fastdeploy/sophgo/python/infer.py
new file mode 100644
index 00000000000..f10418f9c47
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/python/infer.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_file", required=True, help="Path of sophgo model.")
+ parser.add_argument("--config_file", required=True, help="Path of config.")
+ parser.add_argument(
+ "--image_file", type=str, required=True, help="Path of test image file.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_arguments()
+
+ model_file = args.model_file
+ params_file = ""
+ config_file = args.config_file
+
+ # setup runtime
+ runtime_option = fd.RuntimeOption()
+ runtime_option.use_sophgo()
+
+ model = fd.vision.detection.PPYOLOE(
+ model_file,
+ params_file,
+ config_file,
+ runtime_option=runtime_option,
+ model_format=fd.ModelFormat.SOPHGO)
+
+ model.postprocessor.apply_nms()
+
+ # predict
+ im = cv2.imread(args.image_file)
+ result = model.predict(im)
+ print(result)
+
+ # visualize
+ vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+ cv2.imwrite("sophgo_result.jpg", vis_im)
+ print("Visualized result save in ./sophgo_result.jpg")
From 0afb4d5ac3a2a8d0e7cb2cd0dc2b9a714f017b0b Mon Sep 17 00:00:00 2001
From: wjm <897383984@qq.com>
Date: Tue, 4 Apr 2023 16:59:52 +0800
Subject: [PATCH 060/116] fix export for FCOS (#8028)
---
ppdet/modeling/heads/fcos_head.py | 15 +--------------
1 file changed, 1 insertion(+), 14 deletions(-)
diff --git a/ppdet/modeling/heads/fcos_head.py b/ppdet/modeling/heads/fcos_head.py
index 89c933fe531..f9757897512 100644
--- a/ppdet/modeling/heads/fcos_head.py
+++ b/ppdet/modeling/heads/fcos_head.py
@@ -262,23 +262,10 @@ def forward(self, fpn_feats, targets=None):
bboxes_reg_list.append(bbox_reg)
centerness_list.append(centerness)
- if targets is not None:
- self.is_teacher = targets.get('ARSL_teacher', False)
- if self.is_teacher:
- return [cls_logits_list, bboxes_reg_list, centerness_list]
-
- if targets is not None:
- self.is_student = targets.get('ARSL_student', False)
- if self.is_student:
- return [cls_logits_list, bboxes_reg_list, centerness_list]
-
if targets is not None:
self.is_teacher = targets.get('is_teacher', False)
if self.is_teacher:
- return [
- locations_list, cls_logits_list, bboxes_reg_list,
- centerness_list
- ]
+ return [cls_logits_list, bboxes_reg_list, centerness_list]
if self.training and targets is not None:
get_data = targets.get('get_data', False)
From 8f37ad0fdc817411f421ae816d480737b7342665 Mon Sep 17 00:00:00 2001
From: LokeZhou
Date: Thu, 6 Apr 2023 16:33:51 +0800
Subject: [PATCH 061/116] [tipc] enable tinypose (#8047)
---
.../configs/keypoint/tinypose_128x96.yml | 30 +++++++++----------
.../tinypose_128x96_train_infer_python.txt | 2 +-
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/test_tipc/configs/keypoint/tinypose_128x96.yml b/test_tipc/configs/keypoint/tinypose_128x96.yml
index 338d9793c47..50720e9accd 100644
--- a/test_tipc/configs/keypoint/tinypose_128x96.yml
+++ b/test_tipc/configs/keypoint/tinypose_128x96.yml
@@ -83,25 +83,25 @@ TestDataset:
!ImageFolder
anno_path: dataset/coco/keypoint_imagelist.txt
-worker_num: 2
+worker_num: 16
global_mean: &global_mean [0.485, 0.456, 0.406]
global_std: &global_std [0.229, 0.224, 0.225]
TrainReader:
sample_transforms:
- - RandomFlipHalfBodyTransform:
- scale: 0.25
- rot: 30
- num_joints_half_body: 8
- prob_half_body: 0.3
- pixel_std: *pixel_std
- trainsize: *trainsize
- upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- flip_pairs: *flip_perm
- - AugmentationbyInformantionDropping:
- prob_cutout: 0.5
- offset_factor: 0.05
- num_patch: 1
- trainsize: *trainsize
+ # - RandomFlipHalfBodyTransform:
+ # scale: 0.25
+ # rot: 30
+ # num_joints_half_body: 8
+ # prob_half_body: 0.3
+ # pixel_std: *pixel_std
+ # trainsize: *trainsize
+ # upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ # flip_pairs: *flip_perm
+ # - AugmentationbyInformantionDropping:
+ # prob_cutout: 0.5
+ # offset_factor: 0.05
+ # num_patch: 1
+ # trainsize: *trainsize
- TopDownAffine:
trainsize: *trainsize
use_udp: true
diff --git a/test_tipc/configs/keypoint/tinypose_128x96_train_infer_python.txt b/test_tipc/configs/keypoint/tinypose_128x96_train_infer_python.txt
index 7a61216a596..9e87cb0c3ae 100644
--- a/test_tipc/configs/keypoint/tinypose_128x96_train_infer_python.txt
+++ b/test_tipc/configs/keypoint/tinypose_128x96_train_infer_python.txt
@@ -49,7 +49,7 @@ inference:./deploy/python/keypoint_infer.py
--save_log_path:null
--run_benchmark:False
null:null
-===========================disable_train_benchmark==========================
+===========================train_benchmark_params==========================
batch_size:512
fp_items:fp32|fp16
epoch:1
From 92752b020fa18826258856cfc08fcb78bb70cc7d Mon Sep 17 00:00:00 2001
From: Feng Ni
Date: Tue, 11 Apr 2023 20:07:20 +0800
Subject: [PATCH 062/116] [doc] fix focalnet swin docs (#7994)
---
configs/focalnet/README.md | 2 +-
configs/swin/README.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/configs/focalnet/README.md b/configs/focalnet/README.md
index be7c0fd4224..816382a960b 100644
--- a/configs/focalnet/README.md
+++ b/configs/focalnet/README.md
@@ -3,7 +3,7 @@
## 模型库
### FocalNet on COCO
-| 网络网络 | 输入尺寸| 图片数/GPU | 学习率策略 | mAPval
0.5:0.95 | mAPval
0.5 | 下载链接 | 配置文件 |
+| 网络网络 | 输入尺寸| 图片数/GPU | 学习率策略 | 推理时间(fps) | mAPval
0.5:0.95 | 下载链接 | 配置文件 |
| :--------- | :---- | :-------: | :------: | :---------------------: | :----------------: | :-------: |:------: |
| PP-YOLOE+ FocalNet-tiny | 640 | 8 | 36e | - | 46.6 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_focalnet_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_plus_focalnet_tiny_36e_coco.yml) |
diff --git a/configs/swin/README.md b/configs/swin/README.md
index 6cef97b0fb0..eea2d8d492c 100644
--- a/configs/swin/README.md
+++ b/configs/swin/README.md
@@ -2,7 +2,7 @@
## COCO Model Zoo
-| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
+| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | mAPval
0.5:0.95 | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| swin_T_224 | Faster R-CNN | 2 | 36e | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
| swin_T_224 | PP-YOLOE+ | 8 | 36e | ---- | 44.7 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_swin_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_plus_swin_tiny_36e_coco.yml) |
From 5d1f888362241790000950e2b63115dc8d1c6019 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Tue, 18 Apr 2023 16:35:52 +0800
Subject: [PATCH 063/116] add rtdetr final (#8094)
* [exp] add r50vd in dino
add yoloe reader
alter reference points to unsigmoid
fix amp training
alter usage in paddle-inference
update new base
alter ext_ops
add hybrid encoder
* add pp rt-detr
---------
Co-authored-by: ghostxsl <451323469@qq.com>
---
configs/rtdetr/README.md | 41 ++
configs/rtdetr/_base_/optimizer_6x.yml | 19 +
configs/rtdetr/_base_/rtdetr_r50vd.yml | 71 +++
configs/rtdetr/_base_/rtdetr_reader.yml | 43 ++
configs/rtdetr/rtdetr_r101vd_6x_coco.yml | 37 ++
configs/rtdetr/rtdetr_r50vd_6x_coco.yml | 11 +
ppdet/data/transform/batch_operators.py | 30 +-
ppdet/data/transform/operators.py | 22 +-
ppdet/modeling/architectures/detr.py | 3 +-
ppdet/modeling/losses/detr_loss.py | 79 ++-
ppdet/modeling/transformers/__init__.py | 4 +
ppdet/modeling/transformers/hybrid_encoder.py | 301 ++++++++++
ppdet/modeling/transformers/matchers.py | 7 +-
.../transformers/rtdetr_transformer.py | 546 ++++++++++++++++++
ppdet/modeling/transformers/utils.py | 15 +-
15 files changed, 1189 insertions(+), 40 deletions(-)
create mode 100644 configs/rtdetr/README.md
create mode 100644 configs/rtdetr/_base_/optimizer_6x.yml
create mode 100644 configs/rtdetr/_base_/rtdetr_r50vd.yml
create mode 100644 configs/rtdetr/_base_/rtdetr_reader.yml
create mode 100644 configs/rtdetr/rtdetr_r101vd_6x_coco.yml
create mode 100644 configs/rtdetr/rtdetr_r50vd_6x_coco.yml
create mode 100644 ppdet/modeling/transformers/hybrid_encoder.py
create mode 100644 ppdet/modeling/transformers/rtdetr_transformer.py
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
new file mode 100644
index 00000000000..3a11e87bca3
--- /dev/null
+++ b/configs/rtdetr/README.md
@@ -0,0 +1,41 @@
+# DETRs Beat YOLOs on Real-time Object Detection
+
+## Introduction
+We propose a **R**eal-**T**ime **DE**tection **TR**ansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS. For more details, please refer to our [paper](https://arxiv.org/abs/2304.08069).
+
+
+

+
+
+
+## Model Zoo
+
+### Model Zoo on COCO
+
+| Model | Epoch | backbone | input shape | $AP^{val}$ | $AP^{val}_{50}$| Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | Pretrained Model | config |
+|:--------------:|:-----:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
+| RT-DETR-R50 | 80 | ResNet-50 | 640 | 53.1 | 71.3 | 42 | 136 | 108 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams) | [config](./rtdetr_r50vd_6x_coco.yml)
+| RT-DETR-R101 | 80 | ResNet-101 | 640 | 54.3 | 72.7 | 76 | 259 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams) | [config](./rtdetr_r101vd_6x_coco.yml)
+| RT-DETR-L | 80 | HGNetv2 | 640 | 53.0 | 71.6 | 32 | 110 | 114 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams) | [comming soon](rtdetr_hgnetv2_l_6x_coco.yml)
+| RT-DETR-X | 80 | HGNetv2 | 640 | 54.8 | 73.1 | 67 | 234 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_x_6x_coco.pdparams) | [comming soon](rtdetr_hgnetv2_x_6x_coco.yml)
+
+**Notes:**
+- RT-DETR uses 4GPU to train.
+- RT-DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+
+GPU multi-card training
+```bash
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml --fleet --eval
+```
+
+## Citations
+```
+@misc{lv2023detrs,
+ title={DETRs Beat YOLOs on Real-time Object Detection},
+ author={Wenyu Lv and Shangliang Xu and Yian Zhao and Guanzhong Wang and Jinman Wei and Cheng Cui and Yuning Du and Qingqing Dang and Yi Liu},
+ year={2023},
+ eprint={2304.08069},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
diff --git a/configs/rtdetr/_base_/optimizer_6x.yml b/configs/rtdetr/_base_/optimizer_6x.yml
new file mode 100644
index 00000000000..5abe2f75a2c
--- /dev/null
+++ b/configs/rtdetr/_base_/optimizer_6x.yml
@@ -0,0 +1,19 @@
+epoch: 72
+
+LearningRate:
+ base_lr: 0.0001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 1.0
+ milestones: [100]
+ use_warmup: true
+ - !LinearWarmup
+ start_factor: 0.001
+ steps: 2000
+
+OptimizerBuilder:
+ clip_grad_by_norm: 0.1
+ regularizer: false
+ optimizer:
+ type: AdamW
+ weight_decay: 0.0001
diff --git a/configs/rtdetr/_base_/rtdetr_r50vd.yml b/configs/rtdetr/_base_/rtdetr_r50vd.yml
new file mode 100644
index 00000000000..fc5fb3ada4f
--- /dev/null
+++ b/configs/rtdetr/_base_/rtdetr_r50vd.yml
@@ -0,0 +1,71 @@
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+norm_type: sync_bn
+use_ema: True
+ema_decay: 0.9999
+ema_decay_type: "exponential"
+ema_filter_no_grad: True
+hidden_dim: 256
+use_focal_loss: True
+eval_size: [640, 640]
+
+
+DETR:
+ backbone: ResNet
+ neck: HybridEncoder
+ transformer: RTDETRTransformer
+ detr_head: DINOHead
+ post_process: DETRPostProcess
+
+ResNet:
+ # index 0 stands for res2
+ depth: 50
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [1, 2, 3]
+ lr_mult_list: [0.1, 0.1, 0.1, 0.1]
+ num_stages: 4
+ freeze_stem_only: True
+
+HybridEncoder:
+ hidden_dim: 256
+ use_encoder_idx: [2]
+ num_encoder_layers: 1
+ encoder_layer:
+ name: TransformerLayer
+ d_model: 256
+ nhead: 8
+ dim_feedforward: 1024
+ dropout: 0.
+ activation: 'gelu'
+ expansion: 1.0
+
+
+RTDETRTransformer:
+ num_queries: 300
+ position_embed_type: sine
+ feat_strides: [8, 16, 32]
+ num_levels: 3
+ nhead: 8
+ num_decoder_layers: 6
+ dim_feedforward: 1024
+ dropout: 0.0
+ activation: relu
+ num_denoising: 100
+ label_noise_ratio: 0.5
+ box_noise_scale: 1.0
+ learnt_init_query: False
+
+DINOHead:
+ loss:
+ name: DINOLoss
+ loss_coeff: {class: 1, bbox: 5, giou: 2}
+ aux_loss: True
+ use_vfl: True
+ matcher:
+ name: HungarianMatcher
+ matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRPostProcess:
+ num_top_queries: 300
diff --git a/configs/rtdetr/_base_/rtdetr_reader.yml b/configs/rtdetr/_base_/rtdetr_reader.yml
new file mode 100644
index 00000000000..1b6f86c8c50
--- /dev/null
+++ b/configs/rtdetr/_base_/rtdetr_reader.yml
@@ -0,0 +1,43 @@
+worker_num: 4
+TrainReader:
+ sample_transforms:
+ - Decode: {}
+ - RandomDistort: {prob: 0.8}
+ - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCrop: {prob: 0.8}
+ - RandomFlip: {}
+ batch_transforms:
+ - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
+ - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+ - NormalizeBox: {}
+ - BboxXYXY2XYWH: {}
+ - Permute: {}
+ batch_size: 4
+ shuffle: true
+ drop_last: true
+ collate_batch: false
+ use_shared_memory: false
+
+
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+ - Permute: {}
+ batch_size: 4
+ shuffle: false
+ drop_last: false
+
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 640, 640]
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+ - Permute: {}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
diff --git a/configs/rtdetr/rtdetr_r101vd_6x_coco.yml b/configs/rtdetr/rtdetr_r101vd_6x_coco.yml
new file mode 100644
index 00000000000..fd2f55ae1ae
--- /dev/null
+++ b/configs/rtdetr/rtdetr_r101vd_6x_coco.yml
@@ -0,0 +1,37 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_6x.yml',
+ '_base_/rtdetr_r50vd.yml',
+ '_base_/rtdetr_reader.yml',
+]
+
+weights: output/rtdetr_r101vd_6x_coco/model_final
+find_unused_parameters: True
+log_iter: 200
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
+
+ResNet:
+ # index 0 stands for res2
+ depth: 101
+ variant: d
+ norm_type: bn
+ freeze_at: 0
+ return_idx: [1, 2, 3]
+ lr_mult_list: [0.01, 0.01, 0.01, 0.01]
+ num_stages: 4
+ freeze_stem_only: True
+
+HybridEncoder:
+ hidden_dim: 384
+ use_encoder_idx: [2]
+ num_encoder_layers: 1
+ encoder_layer:
+ name: TransformerLayer
+ d_model: 384
+ nhead: 8
+ dim_feedforward: 2048
+ dropout: 0.
+ activation: 'gelu'
+ expansion: 1.0
diff --git a/configs/rtdetr/rtdetr_r50vd_6x_coco.yml b/configs/rtdetr/rtdetr_r50vd_6x_coco.yml
new file mode 100644
index 00000000000..51bf4437fb9
--- /dev/null
+++ b/configs/rtdetr/rtdetr_r50vd_6x_coco.yml
@@ -0,0 +1,11 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_6x.yml',
+ '_base_/rtdetr_r50vd.yml',
+ '_base_/rtdetr_reader.yml',
+]
+
+weights: output/rtdetr_r50vd_6x_coco/model_final
+find_unused_parameters: True
+log_iter: 200
diff --git a/ppdet/data/transform/batch_operators.py b/ppdet/data/transform/batch_operators.py
index 2637db43d21..5b8bbcd3b63 100644
--- a/ppdet/data/transform/batch_operators.py
+++ b/ppdet/data/transform/batch_operators.py
@@ -950,7 +950,7 @@ def __call__(self, samples, context=None):
@register_op
class PadMaskBatch(BaseOperator):
"""
- Pad a batch of samples so they can be divisible by a stride.
+ Pad a batch of samples so that they can be divisible by a stride.
The layout of each image should be 'CHW'.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
@@ -959,7 +959,7 @@ class PadMaskBatch(BaseOperator):
`pad_mask` for transformer.
"""
- def __init__(self, pad_to_stride=0, return_pad_mask=False):
+ def __init__(self, pad_to_stride=0, return_pad_mask=True):
super(PadMaskBatch, self).__init__()
self.pad_to_stride = pad_to_stride
self.return_pad_mask = return_pad_mask
@@ -984,7 +984,7 @@ def __call__(self, samples, context=None):
im_c, im_h, im_w = im.shape[:]
padding_im = np.zeros(
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
- padding_im[:, :im_h, :im_w] = im
+ padding_im[:, :im_h, :im_w] = im.astype(np.float32)
data['image'] = padding_im
if 'semantic' in data and data['semantic'] is not None:
semantic = data['semantic']
@@ -1108,12 +1108,13 @@ def __init__(self, return_gt_mask=True, pad_img=False, minimum_gtnum=0):
self.pad_img = pad_img
self.minimum_gtnum = minimum_gtnum
- def _impad(self, img: np.ndarray,
- *,
- shape = None,
- padding = None,
- pad_val = 0,
- padding_mode = 'constant') -> np.ndarray:
+ def _impad(self,
+ img: np.ndarray,
+ *,
+ shape=None,
+ padding=None,
+ pad_val=0,
+ padding_mode='constant') -> np.ndarray:
"""Pad the given image to a certain shape or pad on all sides with
specified padding mode and padding value.
@@ -1169,7 +1170,7 @@ def _impad(self, img: np.ndarray,
padding = (padding, padding, padding, padding)
else:
raise ValueError('Padding must be a int or a 2, or 4 element tuple.'
- f'But received {padding}')
+ f'But received {padding}')
# check padding mode
assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
@@ -1194,10 +1195,10 @@ def _impad(self, img: np.ndarray,
def checkmaxshape(self, samples):
maxh, maxw = 0, 0
for sample in samples:
- h,w = sample['im_shape']
- if h>maxh:
+ h, w = sample['im_shape']
+ if h > maxh:
maxh = h
- if w>maxw:
+ if w > maxw:
maxw = w
return (maxh, maxw)
@@ -1246,7 +1247,8 @@ def __call__(self, samples, context=None):
sample['difficult'] = pad_diff
if 'gt_joints' in sample:
num_joints = sample['gt_joints'].shape[1]
- pad_gt_joints = np.zeros((num_max_boxes, num_joints, 3), dtype=np.float32)
+ pad_gt_joints = np.zeros(
+ (num_max_boxes, num_joints, 3), dtype=np.float32)
if num_gt > 0:
pad_gt_joints[:num_gt] = sample['gt_joints']
sample['gt_joints'] = pad_gt_joints
diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py
index 25f3452993e..206d9a48d0f 100644
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -501,7 +501,8 @@ def __init__(self,
brightness=[0.5, 1.5, 0.5],
random_apply=True,
count=4,
- random_channel=False):
+ random_channel=False,
+ prob=1.0):
super(RandomDistort, self).__init__()
self.hue = hue
self.saturation = saturation
@@ -510,6 +511,7 @@ def __init__(self,
self.random_apply = random_apply
self.count = count
self.random_channel = random_channel
+ self.prob = prob
def apply_hue(self, img):
low, high, prob = self.hue
@@ -563,6 +565,8 @@ def apply_brightness(self, img):
return img
def apply(self, sample, context=None):
+ if random.random() > self.prob:
+ return sample
img = sample['image']
if self.random_apply:
functions = [
@@ -1488,7 +1492,8 @@ def __init__(self,
allow_no_crop=True,
cover_all_box=False,
is_mask_crop=False,
- ioumode="iou"):
+ ioumode="iou",
+ prob=1.0):
super(RandomCrop, self).__init__()
self.aspect_ratio = aspect_ratio
self.thresholds = thresholds
@@ -1498,6 +1503,7 @@ def __init__(self,
self.cover_all_box = cover_all_box
self.is_mask_crop = is_mask_crop
self.ioumode = ioumode
+ self.prob = prob
def crop_segms(self, segms, valid_ids, crop, height, width):
def _crop_poly(segm, crop):
@@ -1588,6 +1594,9 @@ def set_fake_bboxes(self, sample):
return sample
def apply(self, sample, context=None):
+ if random.random() > self.prob:
+ return sample
+
if 'gt_bbox' not in sample:
# only used in semi-det as unsup data
sample = self.set_fake_bboxes(sample)
@@ -2829,22 +2838,23 @@ def __init__(self,
def get_size_with_aspect_ratio(self, image_shape, size, max_size=None):
h, w = image_shape
+ max_clip = False
if max_size is not None:
min_original_size = float(min((w, h)))
max_original_size = float(max((w, h)))
if max_original_size / min_original_size * size > max_size:
- size = int(
- round(max_size * min_original_size / max_original_size))
+ size = int(max_size * min_original_size / max_original_size)
+ max_clip = True
if (w <= h and w == size) or (h <= w and h == size):
return (w, h)
if w < h:
ow = size
- oh = int(round(size * h / w))
+ oh = int(round(size * h / w)) if not max_clip else max_size
else:
oh = size
- ow = int(round(size * w / h))
+ ow = int(round(size * w / h)) if not max_clip else max_size
return (ow, oh)
diff --git a/ppdet/modeling/architectures/detr.py b/ppdet/modeling/architectures/detr.py
index 2d599258592..7839a1263ff 100644
--- a/ppdet/modeling/architectures/detr.py
+++ b/ppdet/modeling/architectures/detr.py
@@ -40,9 +40,9 @@ def __init__(self,
exclude_post_process=False):
super(DETR, self).__init__()
self.backbone = backbone
- self.neck = neck
self.transformer = transformer
self.detr_head = detr_head
+ self.neck = neck
self.post_process = post_process
self.with_mask = with_mask
self.exclude_post_process = exclude_post_process
@@ -54,6 +54,7 @@ def from_config(cls, cfg, *args, **kwargs):
# neck
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs) if cfg['neck'] else None
+
# transformer
if neck is not None:
kwargs = {'input_shape': neck.out_shape}
diff --git a/ppdet/modeling/losses/detr_loss.py b/ppdet/modeling/losses/detr_loss.py
index 45a2d5e14d7..24f14c3d489 100644
--- a/ppdet/modeling/losses/detr_loss.py
+++ b/ppdet/modeling/losses/detr_loss.py
@@ -21,7 +21,8 @@
import paddle.nn.functional as F
from ppdet.core.workspace import register
from .iou_loss import GIoULoss
-from ..transformers import bbox_cxcywh_to_xyxy, sigmoid_focal_loss
+from ..transformers import bbox_cxcywh_to_xyxy, sigmoid_focal_loss, varifocal_loss_with_logits
+from ..bbox_utils import bbox_iou
__all__ = ['DETRLoss', 'DINOLoss']
@@ -43,7 +44,10 @@ def __init__(self,
'dice': 1
},
aux_loss=True,
- use_focal_loss=False):
+ use_focal_loss=False,
+ use_vfl=False,
+ use_uni_match=False,
+ uni_match_ind=0):
r"""
Args:
num_classes (int): The number of classes.
@@ -60,6 +64,9 @@ def __init__(self,
self.loss_coeff = loss_coeff
self.aux_loss = aux_loss
self.use_focal_loss = use_focal_loss
+ self.use_vfl = use_vfl
+ self.use_uni_match = use_uni_match
+ self.uni_match_ind = uni_match_ind
if not self.use_focal_loss:
self.loss_coeff['class'] = paddle.full([num_classes + 1],
@@ -73,13 +80,15 @@ def _get_loss_class(self,
match_indices,
bg_index,
num_gts,
- postfix=""):
+ postfix="",
+ iou_score=None):
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
name_class = "loss_class" + postfix
target_label = paddle.full(logits.shape[:2], bg_index, dtype='int64')
bs, num_query_objects = target_label.shape
- if sum(len(a) for a in gt_class) > 0:
+ num_gt = sum(len(a) for a in gt_class)
+ if num_gt > 0:
index, updates = self._get_index_updates(num_query_objects,
gt_class, match_indices)
target_label = paddle.scatter(
@@ -88,12 +97,23 @@ def _get_loss_class(self,
if self.use_focal_loss:
target_label = F.one_hot(target_label,
self.num_classes + 1)[..., :-1]
- return {
- name_class: self.loss_coeff['class'] * sigmoid_focal_loss(
- logits, target_label, num_gts / num_query_objects)
- if self.use_focal_loss else F.cross_entropy(
+ if iou_score is not None and self.use_vfl:
+ target_score = paddle.zeros([bs, num_query_objects])
+ if num_gt > 0:
+ target_score = paddle.scatter(
+ target_score.reshape([-1, 1]), index, iou_score)
+ target_score = target_score.reshape(
+ [bs, num_query_objects, 1]) * target_label
+ loss_ = self.loss_coeff['class'] * varifocal_loss_with_logits(
+ logits, target_score, target_label,
+ num_gts / num_query_objects)
+ else:
+ loss_ = self.loss_coeff['class'] * sigmoid_focal_loss(
+ logits, target_label, num_gts / num_query_objects)
+ else:
+ loss_ = F.cross_entropy(
logits, target_label, weight=self.loss_coeff['class'])
- }
+ return {name_class: loss_}
def _get_loss_bbox(self, boxes, gt_bbox, match_indices, num_gts,
postfix=""):
@@ -167,9 +187,19 @@ def _get_loss_aux(self,
loss_class = []
loss_bbox, loss_giou = [], []
loss_mask, loss_dice = [], []
+ if dn_match_indices is not None:
+ match_indices = dn_match_indices
+ elif self.use_uni_match:
+ match_indices = self.matcher(
+ boxes[self.uni_match_ind],
+ logits[self.uni_match_ind],
+ gt_bbox,
+ gt_class,
+ masks=masks[self.uni_match_ind] if masks is not None else None,
+ gt_mask=gt_mask)
for i, (aux_boxes, aux_logits) in enumerate(zip(boxes, logits)):
aux_masks = masks[i] if masks is not None else None
- if dn_match_indices is None:
+ if not self.use_uni_match and dn_match_indices is None:
match_indices = self.matcher(
aux_boxes,
aux_logits,
@@ -177,12 +207,21 @@ def _get_loss_aux(self,
gt_class,
masks=aux_masks,
gt_mask=gt_mask)
+ if self.use_vfl:
+ if sum(len(a) for a in gt_bbox) > 0:
+ src_bbox, target_bbox = self._get_src_target_assign(
+ aux_boxes.detach(), gt_bbox, match_indices)
+ iou_score = bbox_iou(
+ bbox_cxcywh_to_xyxy(src_bbox).split(4, -1),
+ bbox_cxcywh_to_xyxy(target_bbox).split(4, -1))
+ else:
+ iou_score = None
else:
- match_indices = dn_match_indices
+ iou_score = None
loss_class.append(
self._get_loss_class(aux_logits, gt_class, match_indices,
- bg_index, num_gts, postfix)['loss_class' +
- postfix])
+ bg_index, num_gts, postfix, iou_score)[
+ 'loss_class' + postfix])
loss_ = self._get_loss_bbox(aux_boxes, gt_bbox, match_indices,
num_gts, postfix)
loss_bbox.append(loss_['loss_bbox' + postfix])
@@ -252,10 +291,22 @@ def _get_prediction_loss(self,
else:
match_indices = dn_match_indices
+ if self.use_vfl:
+ if sum(len(a) for a in gt_bbox) > 0:
+ src_bbox, target_bbox = self._get_src_target_assign(
+ boxes.detach(), gt_bbox, match_indices)
+ iou_score = bbox_iou(
+ bbox_cxcywh_to_xyxy(src_bbox).split(4, -1),
+ bbox_cxcywh_to_xyxy(target_bbox).split(4, -1))
+ else:
+ iou_score = None
+ else:
+ iou_score = None
+
loss = dict()
loss.update(
self._get_loss_class(logits, gt_class, match_indices,
- self.num_classes, num_gts, postfix))
+ self.num_classes, num_gts, postfix, iou_score))
loss.update(
self._get_loss_bbox(boxes, gt_bbox, match_indices, num_gts,
postfix))
diff --git a/ppdet/modeling/transformers/__init__.py b/ppdet/modeling/transformers/__init__.py
index e20bd6203ce..33a12402656 100644
--- a/ppdet/modeling/transformers/__init__.py
+++ b/ppdet/modeling/transformers/__init__.py
@@ -20,6 +20,8 @@
from . import dino_transformer
from . import group_detr_transformer
from . import mask_dino_transformer
+from . import rtdetr_transformer
+from . import hybrid_encoder
from .detr_transformer import *
from .utils import *
@@ -30,3 +32,5 @@
from .petr_transformer import *
from .group_detr_transformer import *
from .mask_dino_transformer import *
+from .rtdetr_transformer import *
+from .hybrid_encoder import *
diff --git a/ppdet/modeling/transformers/hybrid_encoder.py b/ppdet/modeling/transformers/hybrid_encoder.py
new file mode 100644
index 00000000000..b64c4ee3ba6
--- /dev/null
+++ b/ppdet/modeling/transformers/hybrid_encoder.py
@@ -0,0 +1,301 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import register, serializable
+from ppdet.modeling.ops import get_act_fn
+from ..shape_spec import ShapeSpec
+from ..backbones.csp_darknet import BaseConv
+from ..backbones.cspresnet import RepVggBlock
+from ppdet.modeling.transformers.detr_transformer import TransformerEncoder
+from ..initializer import xavier_uniform_, linear_init_
+from ..layers import MultiHeadAttention
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+
+__all__ = ['HybridEncoder']
+
+
+class CSPRepLayer(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ num_blocks=3,
+ expansion=1.0,
+ bias=False,
+ act="silu"):
+ super(CSPRepLayer, self).__init__()
+ hidden_channels = int(out_channels * expansion)
+ self.conv1 = BaseConv(
+ in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+ self.conv2 = BaseConv(
+ in_channels, hidden_channels, ksize=1, stride=1, bias=bias, act=act)
+ self.bottlenecks = nn.Sequential(*[
+ RepVggBlock(
+ hidden_channels, hidden_channels, act=act)
+ for _ in range(num_blocks)
+ ])
+ if hidden_channels != out_channels:
+ self.conv3 = BaseConv(
+ hidden_channels,
+ out_channels,
+ ksize=1,
+ stride=1,
+ bias=bias,
+ act=act)
+ else:
+ self.conv3 = nn.Identity()
+
+ def forward(self, x):
+ x_1 = self.conv1(x)
+ x_1 = self.bottlenecks(x_1)
+ x_2 = self.conv2(x)
+ return self.conv3(x_1 + x_2)
+
+
+@register
+class TransformerLayer(nn.Layer):
+ def __init__(self,
+ d_model,
+ nhead,
+ dim_feedforward=1024,
+ dropout=0.,
+ activation="relu",
+ attn_dropout=None,
+ act_dropout=None,
+ normalize_before=False):
+ super(TransformerLayer, self).__init__()
+ attn_dropout = dropout if attn_dropout is None else attn_dropout
+ act_dropout = dropout if act_dropout is None else act_dropout
+ self.normalize_before = normalize_before
+
+ self.self_attn = MultiHeadAttention(d_model, nhead, attn_dropout)
+ # Implementation of Feedforward model
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
+ self.dropout = nn.Dropout(act_dropout, mode="upscale_in_train")
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+ self.norm1 = nn.LayerNorm(d_model)
+ self.norm2 = nn.LayerNorm(d_model)
+ self.dropout1 = nn.Dropout(dropout, mode="upscale_in_train")
+ self.dropout2 = nn.Dropout(dropout, mode="upscale_in_train")
+ self.activation = getattr(F, activation)
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ linear_init_(self.linear1)
+ linear_init_(self.linear2)
+
+ @staticmethod
+ def with_pos_embed(tensor, pos_embed):
+ return tensor if pos_embed is None else tensor + pos_embed
+
+ def forward(self, src, src_mask=None, pos_embed=None):
+ residual = src
+ if self.normalize_before:
+ src = self.norm1(src)
+ q = k = self.with_pos_embed(src, pos_embed)
+ src = self.self_attn(q, k, value=src, attn_mask=src_mask)
+
+ src = residual + self.dropout1(src)
+ if not self.normalize_before:
+ src = self.norm1(src)
+
+ residual = src
+ if self.normalize_before:
+ src = self.norm2(src)
+ src = self.linear2(self.dropout(self.activation(self.linear1(src))))
+ src = residual + self.dropout2(src)
+ if not self.normalize_before:
+ src = self.norm2(src)
+ return src
+
+
+@register
+@serializable
+class HybridEncoder(nn.Layer):
+ __shared__ = ['depth_mult', 'act', 'trt', 'eval_size']
+ __inject__ = ['encoder_layer']
+
+ def __init__(self,
+ in_channels=[512, 1024, 2048],
+ feat_strides=[8, 16, 32],
+ hidden_dim=256,
+ use_encoder_idx=[2],
+ num_encoder_layers=1,
+ encoder_layer='TransformerLayer',
+ pe_temperature=10000,
+ expansion=1.0,
+ depth_mult=1.0,
+ act='silu',
+ trt=False,
+ eval_size=None):
+ super(HybridEncoder, self).__init__()
+ self.in_channels = in_channels
+ self.feat_strides = feat_strides
+ self.hidden_dim = hidden_dim
+ self.use_encoder_idx = use_encoder_idx
+ self.num_encoder_layers = num_encoder_layers
+ self.pe_temperature = pe_temperature
+ self.eval_size = eval_size
+
+ # channel projection
+ self.input_proj = nn.LayerList()
+ for in_channel in in_channels:
+ self.input_proj.append(
+ nn.Sequential(
+ nn.Conv2D(
+ in_channel, hidden_dim, kernel_size=1, bias_attr=False),
+ nn.BatchNorm2D(
+ hidden_dim,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))))
+ # encoder transformer
+ self.encoder = nn.LayerList([
+ TransformerEncoder(encoder_layer, num_encoder_layers)
+ for _ in range(len(use_encoder_idx))
+ ])
+
+ act = get_act_fn(
+ act, trt=trt) if act is None or isinstance(act,
+ (str, dict)) else act
+ # top-down fpn
+ self.lateral_convs = nn.LayerList()
+ self.fpn_blocks = nn.LayerList()
+ for idx in range(len(in_channels) - 1, 0, -1):
+ self.lateral_convs.append(
+ BaseConv(
+ hidden_dim, hidden_dim, 1, 1, act=act))
+ self.fpn_blocks.append(
+ CSPRepLayer(
+ hidden_dim * 2,
+ hidden_dim,
+ round(3 * depth_mult),
+ act=act,
+ expansion=expansion))
+
+ # bottom-up pan
+ self.downsample_convs = nn.LayerList()
+ self.pan_blocks = nn.LayerList()
+ for idx in range(len(in_channels) - 1):
+ self.downsample_convs.append(
+ BaseConv(
+ hidden_dim, hidden_dim, 3, stride=2, act=act))
+ self.pan_blocks.append(
+ CSPRepLayer(
+ hidden_dim * 2,
+ hidden_dim,
+ round(3 * depth_mult),
+ act=act,
+ expansion=expansion))
+
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ if self.eval_size:
+ for idx in self.use_encoder_idx:
+ stride = self.feat_strides[idx]
+ pos_embed = self.build_2d_sincos_position_embedding(
+ self.eval_size[1] // stride, self.eval_size[0] // stride,
+ self.hidden_dim, self.pe_temperature)
+ setattr(self, f'pos_embed{idx}', pos_embed)
+
+ @staticmethod
+ def build_2d_sincos_position_embedding(w,
+ h,
+ embed_dim=256,
+ temperature=10000.):
+ grid_w = paddle.arange(int(w), dtype=paddle.float32)
+ grid_h = paddle.arange(int(h), dtype=paddle.float32)
+ grid_w, grid_h = paddle.meshgrid(grid_w, grid_h)
+ assert embed_dim % 4 == 0, \
+ 'Embed dimension must be divisible by 4 for 2D sin-cos position embedding'
+ pos_dim = embed_dim // 4
+ omega = paddle.arange(pos_dim, dtype=paddle.float32) / pos_dim
+ omega = 1. / (temperature**omega)
+
+ out_w = grid_w.flatten()[..., None] @omega[None]
+ out_h = grid_h.flatten()[..., None] @omega[None]
+
+ return paddle.concat(
+ [
+ paddle.sin(out_w), paddle.cos(out_w), paddle.sin(out_h),
+ paddle.cos(out_h)
+ ],
+ axis=1)[None, :, :]
+
+ def forward(self, feats, for_mot=False):
+ assert len(feats) == len(self.in_channels)
+ # get projection features
+ proj_feats = [self.input_proj[i](feat) for i, feat in enumerate(feats)]
+ # encoder
+ if self.num_encoder_layers > 0:
+ for i, enc_ind in enumerate(self.use_encoder_idx):
+ h, w = proj_feats[enc_ind].shape[2:]
+ # flatten [B, C, H, W] to [B, HxW, C]
+ src_flatten = proj_feats[enc_ind].flatten(2).transpose(
+ [0, 2, 1])
+ if self.training or self.eval_size is None:
+ pos_embed = self.build_2d_sincos_position_embedding(
+ w, h, self.hidden_dim, self.pe_temperature)
+ else:
+ pos_embed = getattr(self, f'pos_embed{enc_ind}', None)
+ memory = self.encoder[i](src_flatten, pos_embed=pos_embed)
+ proj_feats[enc_ind] = memory.transpose([0, 2, 1]).reshape(
+ [-1, self.hidden_dim, h, w])
+
+ # top-down fpn
+ inner_outs = [proj_feats[-1]]
+ for idx in range(len(self.in_channels) - 1, 0, -1):
+ feat_heigh = inner_outs[0]
+ feat_low = proj_feats[idx - 1]
+ feat_heigh = self.lateral_convs[len(self.in_channels) - 1 - idx](
+ feat_heigh)
+ inner_outs[0] = feat_heigh
+
+ upsample_feat = F.interpolate(
+ feat_heigh, scale_factor=2., mode="nearest")
+ inner_out = self.fpn_blocks[len(self.in_channels) - 1 - idx](
+ paddle.concat(
+ [upsample_feat, feat_low], axis=1))
+ inner_outs.insert(0, inner_out)
+
+ # bottom-up pan
+ outs = [inner_outs[0]]
+ for idx in range(len(self.in_channels) - 1):
+ feat_low = outs[-1]
+ feat_height = inner_outs[idx + 1]
+ downsample_feat = self.downsample_convs[idx](feat_low)
+ out = self.pan_blocks[idx](paddle.concat(
+ [downsample_feat, feat_height], axis=1))
+ outs.append(out)
+
+ return outs
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {
+ 'in_channels': [i.channels for i in input_shape],
+ 'feat_strides': [i.stride for i in input_shape]
+ }
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=self.hidden_dim, stride=self.feat_strides[idx])
+ for idx in range(len(self.in_channels))
+ ]
diff --git a/ppdet/modeling/transformers/matchers.py b/ppdet/modeling/transformers/matchers.py
index f163a6eeae9..72459a3f909 100644
--- a/ppdet/modeling/transformers/matchers.py
+++ b/ppdet/modeling/transformers/matchers.py
@@ -107,16 +107,15 @@ def forward(self,
tgt_bbox = paddle.concat(gt_bbox)
# Compute the classification cost
+ out_prob = paddle.gather(out_prob, tgt_ids, axis=1)
if self.use_focal_loss:
neg_cost_class = (1 - self.alpha) * (out_prob**self.gamma) * (-(
1 - out_prob + 1e-8).log())
pos_cost_class = self.alpha * (
(1 - out_prob)**self.gamma) * (-(out_prob + 1e-8).log())
- cost_class = paddle.gather(
- pos_cost_class, tgt_ids, axis=1) - paddle.gather(
- neg_cost_class, tgt_ids, axis=1)
+ cost_class = pos_cost_class - neg_cost_class
else:
- cost_class = -paddle.gather(out_prob, tgt_ids, axis=1)
+ cost_class = -out_prob
# Compute the L1 cost between boxes
cost_bbox = (
diff --git a/ppdet/modeling/transformers/rtdetr_transformer.py b/ppdet/modeling/transformers/rtdetr_transformer.py
new file mode 100644
index 00000000000..672590edfde
--- /dev/null
+++ b/ppdet/modeling/transformers/rtdetr_transformer.py
@@ -0,0 +1,546 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Modified from Deformable-DETR (https://github.com/fundamentalvision/Deformable-DETR)
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Modified from detrex (https://github.com/IDEA-Research/detrex)
+# Copyright 2022 The IDEA Authors. All rights reserved.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+
+from ppdet.core.workspace import register
+from ..layers import MultiHeadAttention
+from ..heads.detr_head import MLP
+from .deformable_transformer import MSDeformableAttention
+from ..initializer import (linear_init_, constant_, xavier_uniform_, normal_,
+ bias_init_with_prob)
+from .utils import (_get_clones, get_sine_pos_embed,
+ get_contrastive_denoising_training_group, inverse_sigmoid)
+
+__all__ = ['RTDETRTransformer']
+
+
+class PPMSDeformableAttention(MSDeformableAttention):
+ def forward(self,
+ query,
+ reference_points,
+ value,
+ value_spatial_shapes,
+ value_level_start_index,
+ value_mask=None):
+ """
+ Args:
+ query (Tensor): [bs, query_length, C]
+ reference_points (Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
+ bottom-right (1, 1), including padding area
+ value (Tensor): [bs, value_length, C]
+ value_spatial_shapes (List): [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
+ value_level_start_index (List): [n_levels], [0, H_0*W_0, H_0*W_0+H_1*W_1, ...]
+ value_mask (Tensor): [bs, value_length], True for non-padding elements, False for padding elements
+
+ Returns:
+ output (Tensor): [bs, Length_{query}, C]
+ """
+ bs, Len_q = query.shape[:2]
+ Len_v = value.shape[1]
+
+ value = self.value_proj(value)
+ if value_mask is not None:
+ value_mask = value_mask.astype(value.dtype).unsqueeze(-1)
+ value *= value_mask
+ value = value.reshape([bs, Len_v, self.num_heads, self.head_dim])
+
+ sampling_offsets = self.sampling_offsets(query).reshape(
+ [bs, Len_q, self.num_heads, self.num_levels, self.num_points, 2])
+ attention_weights = self.attention_weights(query).reshape(
+ [bs, Len_q, self.num_heads, self.num_levels * self.num_points])
+ attention_weights = F.softmax(attention_weights).reshape(
+ [bs, Len_q, self.num_heads, self.num_levels, self.num_points])
+
+ if reference_points.shape[-1] == 2:
+ offset_normalizer = paddle.to_tensor(value_spatial_shapes)
+ offset_normalizer = offset_normalizer.flip([1]).reshape(
+ [1, 1, 1, self.num_levels, 1, 2])
+ sampling_locations = reference_points.reshape([
+ bs, Len_q, 1, self.num_levels, 1, 2
+ ]) + sampling_offsets / offset_normalizer
+ elif reference_points.shape[-1] == 4:
+ sampling_locations = (
+ reference_points[:, :, None, :, None, :2] + sampling_offsets /
+ self.num_points * reference_points[:, :, None, :, None, 2:] *
+ 0.5)
+ else:
+ raise ValueError(
+ "Last dim of reference_points must be 2 or 4, but get {} instead.".
+ format(reference_points.shape[-1]))
+
+ if not isinstance(query, paddle.Tensor):
+ from ppdet.modeling.transformers.utils import deformable_attention_core_func
+ output = deformable_attention_core_func(
+ value, value_spatial_shapes, value_level_start_index,
+ sampling_locations, attention_weights)
+ else:
+ value_spatial_shapes = paddle.to_tensor(value_spatial_shapes)
+ value_level_start_index = paddle.to_tensor(value_level_start_index)
+ output = self.ms_deformable_attn_core(
+ value, value_spatial_shapes, value_level_start_index,
+ sampling_locations, attention_weights)
+ output = self.output_proj(output)
+
+ return output
+
+
+class TransformerDecoderLayer(nn.Layer):
+ def __init__(self,
+ d_model=256,
+ n_head=8,
+ dim_feedforward=1024,
+ dropout=0.,
+ activation="relu",
+ n_levels=4,
+ n_points=4,
+ weight_attr=None,
+ bias_attr=None):
+ super(TransformerDecoderLayer, self).__init__()
+
+ # self attention
+ self.self_attn = MultiHeadAttention(d_model, n_head, dropout=dropout)
+ self.dropout1 = nn.Dropout(dropout)
+ self.norm1 = nn.LayerNorm(
+ d_model,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+
+ # cross attention
+ self.cross_attn = PPMSDeformableAttention(d_model, n_head, n_levels,
+ n_points, 1.0)
+ self.dropout2 = nn.Dropout(dropout)
+ self.norm2 = nn.LayerNorm(
+ d_model,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+
+ # ffn
+ self.linear1 = nn.Linear(d_model, dim_feedforward, weight_attr,
+ bias_attr)
+ self.activation = getattr(F, activation)
+ self.dropout3 = nn.Dropout(dropout)
+ self.linear2 = nn.Linear(dim_feedforward, d_model, weight_attr,
+ bias_attr)
+ self.dropout4 = nn.Dropout(dropout)
+ self.norm3 = nn.LayerNorm(
+ d_model,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ linear_init_(self.linear1)
+ linear_init_(self.linear2)
+ xavier_uniform_(self.linear1.weight)
+ xavier_uniform_(self.linear2.weight)
+
+ def with_pos_embed(self, tensor, pos):
+ return tensor if pos is None else tensor + pos
+
+ def forward_ffn(self, tgt):
+ return self.linear2(self.dropout3(self.activation(self.linear1(tgt))))
+
+ def forward(self,
+ tgt,
+ reference_points,
+ memory,
+ memory_spatial_shapes,
+ memory_level_start_index,
+ attn_mask=None,
+ memory_mask=None,
+ query_pos_embed=None):
+ # self attention
+ q = k = self.with_pos_embed(tgt, query_pos_embed)
+ if attn_mask is not None:
+ attn_mask = paddle.where(
+ attn_mask.astype('bool'),
+ paddle.zeros(attn_mask.shape, tgt.dtype),
+ paddle.full(attn_mask.shape, float("-inf"), tgt.dtype))
+ tgt2 = self.self_attn(q, k, value=tgt, attn_mask=attn_mask)
+ tgt = tgt + self.dropout1(tgt2)
+ tgt = self.norm1(tgt)
+
+ # cross attention
+ tgt2 = self.cross_attn(
+ self.with_pos_embed(tgt, query_pos_embed), reference_points, memory,
+ memory_spatial_shapes, memory_level_start_index, memory_mask)
+ tgt = tgt + self.dropout2(tgt2)
+ tgt = self.norm2(tgt)
+
+ # ffn
+ tgt2 = self.forward_ffn(tgt)
+ tgt = tgt + self.dropout4(tgt2)
+ tgt = self.norm3(tgt)
+
+ return tgt
+
+
+class TransformerDecoder(nn.Layer):
+ def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
+ super(TransformerDecoder, self).__init__()
+ self.layers = _get_clones(decoder_layer, num_layers)
+ self.hidden_dim = hidden_dim
+ self.num_layers = num_layers
+ self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx
+
+ def forward(self,
+ tgt,
+ ref_points_unact,
+ memory,
+ memory_spatial_shapes,
+ memory_level_start_index,
+ bbox_head,
+ score_head,
+ query_pos_head,
+ attn_mask=None,
+ memory_mask=None):
+ output = tgt
+ dec_out_bboxes = []
+ dec_out_logits = []
+ ref_points_detach = F.sigmoid(ref_points_unact)
+ for i, layer in enumerate(self.layers):
+ ref_points_input = ref_points_detach.unsqueeze(2)
+ query_pos_embed = query_pos_head(ref_points_detach)
+
+ output = layer(output, ref_points_input, memory,
+ memory_spatial_shapes, memory_level_start_index,
+ attn_mask, memory_mask, query_pos_embed)
+
+ inter_ref_bbox = F.sigmoid(bbox_head[i](output) + inverse_sigmoid(
+ ref_points_detach))
+
+ if self.training:
+ dec_out_logits.append(score_head[i](output))
+ if i == 0:
+ dec_out_bboxes.append(inter_ref_bbox)
+ else:
+ dec_out_bboxes.append(
+ F.sigmoid(bbox_head[i](output) + inverse_sigmoid(
+ ref_points)))
+ elif i == self.eval_idx:
+ dec_out_logits.append(score_head[i](output))
+ dec_out_bboxes.append(inter_ref_bbox)
+
+ ref_points = inter_ref_bbox
+ ref_points_detach = inter_ref_bbox.detach(
+ ) if self.training else inter_ref_bbox
+
+ return paddle.stack(dec_out_bboxes), paddle.stack(dec_out_logits)
+
+
+@register
+class RTDETRTransformer(nn.Layer):
+ __shared__ = ['num_classes', 'hidden_dim', 'eval_size']
+
+ def __init__(self,
+ num_classes=80,
+ hidden_dim=256,
+ num_queries=300,
+ position_embed_type='sine',
+ backbone_feat_channels=[512, 1024, 2048],
+ feat_strides=[8, 16, 32],
+ num_levels=3,
+ num_decoder_points=4,
+ nhead=8,
+ num_decoder_layers=6,
+ dim_feedforward=1024,
+ dropout=0.,
+ activation="relu",
+ num_denoising=100,
+ label_noise_ratio=0.5,
+ box_noise_scale=1.0,
+ learnt_init_query=True,
+ eval_size=None,
+ eval_idx=-1,
+ eps=1e-2):
+ super(RTDETRTransformer, self).__init__()
+ assert position_embed_type in ['sine', 'learned'], \
+ f'ValueError: position_embed_type not supported {position_embed_type}!'
+ assert len(backbone_feat_channels) <= num_levels
+ assert len(feat_strides) == len(backbone_feat_channels)
+ for _ in range(num_levels - len(feat_strides)):
+ feat_strides.append(feat_strides[-1] * 2)
+
+ self.hidden_dim = hidden_dim
+ self.nhead = nhead
+ self.feat_strides = feat_strides
+ self.num_levels = num_levels
+ self.num_classes = num_classes
+ self.num_queries = num_queries
+ self.eps = eps
+ self.num_decoder_layers = num_decoder_layers
+ self.eval_size = eval_size
+
+ # backbone feature projection
+ self._build_input_proj_layer(backbone_feat_channels)
+
+ # Transformer module
+ decoder_layer = TransformerDecoderLayer(
+ hidden_dim, nhead, dim_feedforward, dropout, activation, num_levels,
+ num_decoder_points)
+ self.decoder = TransformerDecoder(hidden_dim, decoder_layer,
+ num_decoder_layers, eval_idx)
+
+ # denoising part
+ self.denoising_class_embed = nn.Embedding(
+ num_classes,
+ hidden_dim,
+ weight_attr=ParamAttr(initializer=nn.initializer.Normal()))
+ self.num_denoising = num_denoising
+ self.label_noise_ratio = label_noise_ratio
+ self.box_noise_scale = box_noise_scale
+
+ # decoder embedding
+ self.learnt_init_query = learnt_init_query
+ if learnt_init_query:
+ self.tgt_embed = nn.Embedding(num_queries, hidden_dim)
+ self.query_pos_head = MLP(4, 2 * hidden_dim, hidden_dim, num_layers=2)
+
+ # encoder head
+ self.enc_output = nn.Sequential(
+ nn.Linear(hidden_dim, hidden_dim),
+ nn.LayerNorm(
+ hidden_dim,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0))))
+ self.enc_score_head = nn.Linear(hidden_dim, num_classes)
+ self.enc_bbox_head = MLP(hidden_dim, hidden_dim, 4, num_layers=3)
+
+ # decoder head
+ self.dec_score_head = nn.LayerList([
+ nn.Linear(hidden_dim, num_classes)
+ for _ in range(num_decoder_layers)
+ ])
+ self.dec_bbox_head = nn.LayerList([
+ MLP(hidden_dim, hidden_dim, 4, num_layers=3)
+ for _ in range(num_decoder_layers)
+ ])
+
+ self._reset_parameters()
+
+ def _reset_parameters(self):
+ # class and bbox head init
+ bias_cls = bias_init_with_prob(0.01)
+ linear_init_(self.enc_score_head)
+ constant_(self.enc_score_head.bias, bias_cls)
+ constant_(self.enc_bbox_head.layers[-1].weight)
+ constant_(self.enc_bbox_head.layers[-1].bias)
+ for cls_, reg_ in zip(self.dec_score_head, self.dec_bbox_head):
+ linear_init_(cls_)
+ constant_(cls_.bias, bias_cls)
+ constant_(reg_.layers[-1].weight)
+ constant_(reg_.layers[-1].bias)
+
+ linear_init_(self.enc_output[0])
+ xavier_uniform_(self.enc_output[0].weight)
+ if self.learnt_init_query:
+ xavier_uniform_(self.tgt_embed.weight)
+ xavier_uniform_(self.query_pos_head.layers[0].weight)
+ xavier_uniform_(self.query_pos_head.layers[1].weight)
+ for l in self.input_proj:
+ xavier_uniform_(l[0].weight)
+
+ # init encoder output anchors and valid_mask
+ if self.eval_size:
+ self.anchors, self.valid_mask = self._generate_anchors()
+
+ @classmethod
+ def from_config(cls, cfg, input_shape):
+ return {'backbone_feat_channels': [i.channels for i in input_shape]}
+
+ def _build_input_proj_layer(self, backbone_feat_channels):
+ self.input_proj = nn.LayerList()
+ for in_channels in backbone_feat_channels:
+ self.input_proj.append(
+ nn.Sequential(
+ ('conv', nn.Conv2D(
+ in_channels,
+ self.hidden_dim,
+ kernel_size=1,
+ bias_attr=False)), ('norm', nn.BatchNorm2D(
+ self.hidden_dim,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0))))))
+ in_channels = backbone_feat_channels[-1]
+ for _ in range(self.num_levels - len(backbone_feat_channels)):
+ self.input_proj.append(
+ nn.Sequential(
+ ('conv', nn.Conv2D(
+ in_channels,
+ self.hidden_dim,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias_attr=False)), ('norm', nn.BatchNorm2D(
+ self.hidden_dim,
+ weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+ bias_attr=ParamAttr(regularizer=L2Decay(0.0))))))
+ in_channels = self.hidden_dim
+
+ def _get_encoder_input(self, feats):
+ # get projection features
+ proj_feats = [self.input_proj[i](feat) for i, feat in enumerate(feats)]
+ if self.num_levels > len(proj_feats):
+ len_srcs = len(proj_feats)
+ for i in range(len_srcs, self.num_levels):
+ if i == len_srcs:
+ proj_feats.append(self.input_proj[i](feats[-1]))
+ else:
+ proj_feats.append(self.input_proj[i](proj_feats[-1]))
+
+ # get encoder inputs
+ feat_flatten = []
+ spatial_shapes = []
+ level_start_index = [0, ]
+ for i, feat in enumerate(proj_feats):
+ _, _, h, w = feat.shape
+ # [b, c, h, w] -> [b, h*w, c]
+ feat_flatten.append(feat.flatten(2).transpose([0, 2, 1]))
+ # [num_levels, 2]
+ spatial_shapes.append([h, w])
+ # [l], start index of each level
+ level_start_index.append(h * w + level_start_index[-1])
+
+ # [b, l, c]
+ feat_flatten = paddle.concat(feat_flatten, 1)
+ level_start_index.pop()
+ return (feat_flatten, spatial_shapes, level_start_index)
+
+ def forward(self, feats, pad_mask=None, gt_meta=None):
+ # input projection and embedding
+ (memory, spatial_shapes,
+ level_start_index) = self._get_encoder_input(feats)
+
+ # prepare denoising training
+ if self.training:
+ denoising_class, denoising_bbox_unact, attn_mask, dn_meta = \
+ get_contrastive_denoising_training_group(gt_meta,
+ self.num_classes,
+ self.num_queries,
+ self.denoising_class_embed.weight,
+ self.num_denoising,
+ self.label_noise_ratio,
+ self.box_noise_scale)
+ else:
+ denoising_class, denoising_bbox_unact, attn_mask, dn_meta = None, None, None, None
+
+ target, init_ref_points_unact, enc_topk_bboxes, enc_topk_logits = \
+ self._get_decoder_input(
+ memory, spatial_shapes, denoising_class, denoising_bbox_unact)
+
+ # decoder
+ out_bboxes, out_logits = self.decoder(
+ target,
+ init_ref_points_unact,
+ memory,
+ spatial_shapes,
+ level_start_index,
+ self.dec_bbox_head,
+ self.dec_score_head,
+ self.query_pos_head,
+ attn_mask=attn_mask)
+ return (out_bboxes, out_logits, enc_topk_bboxes, enc_topk_logits,
+ dn_meta)
+
+ def _generate_anchors(self,
+ spatial_shapes=None,
+ grid_size=0.05,
+ dtype="float32"):
+ if spatial_shapes is None:
+ spatial_shapes = [
+ [int(self.eval_size[0] / s), int(self.eval_size[1] / s)]
+ for s in self.feat_strides
+ ]
+ anchors = []
+ for lvl, (h, w) in enumerate(spatial_shapes):
+ grid_y, grid_x = paddle.meshgrid(
+ paddle.arange(
+ end=h, dtype=dtype),
+ paddle.arange(
+ end=w, dtype=dtype))
+ grid_xy = paddle.stack([grid_x, grid_y], -1)
+
+ valid_WH = paddle.to_tensor([h, w]).astype(dtype)
+ grid_xy = (grid_xy.unsqueeze(0) + 0.5) / valid_WH
+ wh = paddle.ones_like(grid_xy) * grid_size * (2.0**lvl)
+ anchors.append(
+ paddle.concat([grid_xy, wh], -1).reshape([-1, h * w, 4]))
+
+ anchors = paddle.concat(anchors, 1)
+ valid_mask = ((anchors > self.eps) *
+ (anchors < 1 - self.eps)).all(-1, keepdim=True)
+ anchors = paddle.log(anchors / (1 - anchors))
+ anchors = paddle.where(valid_mask, anchors,
+ paddle.to_tensor(float("inf")))
+ return anchors, valid_mask
+
+ def _get_decoder_input(self,
+ memory,
+ spatial_shapes,
+ denoising_class=None,
+ denoising_bbox_unact=None):
+ bs, _, _ = memory.shape
+ # prepare input for decoder
+ if self.training or self.eval_size is None:
+ anchors, valid_mask = self._generate_anchors(spatial_shapes)
+ else:
+ anchors, valid_mask = self.anchors, self.valid_mask
+ memory = paddle.where(valid_mask, memory, paddle.to_tensor(0.))
+ output_memory = self.enc_output(memory)
+
+ enc_outputs_class = self.enc_score_head(output_memory)
+ enc_outputs_coord_unact = self.enc_bbox_head(output_memory) + anchors
+
+ _, topk_ind = paddle.topk(
+ enc_outputs_class.max(-1), self.num_queries, axis=1)
+ # extract region proposal boxes
+ batch_ind = paddle.arange(end=bs, dtype=topk_ind.dtype)
+ batch_ind = batch_ind.unsqueeze(-1).tile([1, self.num_queries])
+ topk_ind = paddle.stack([batch_ind, topk_ind], axis=-1)
+
+ reference_points_unact = paddle.gather_nd(enc_outputs_coord_unact,
+ topk_ind) # unsigmoided.
+ enc_topk_bboxes = F.sigmoid(reference_points_unact)
+ if denoising_bbox_unact is not None:
+ reference_points_unact = paddle.concat(
+ [denoising_bbox_unact, reference_points_unact], 1)
+ if self.training:
+ reference_points_unact = reference_points_unact.detach()
+ enc_topk_logits = paddle.gather_nd(enc_outputs_class, topk_ind)
+
+ # extract region features
+ if self.learnt_init_query:
+ target = self.tgt_embed.weight.unsqueeze(0).tile([bs, 1, 1])
+ else:
+ target = paddle.gather_nd(output_memory, topk_ind)
+ if self.training:
+ target = target.detach()
+ if denoising_class is not None:
+ target = paddle.concat([denoising_class, target], 1)
+
+ return target, reference_points_unact, enc_topk_bboxes, enc_topk_logits
diff --git a/ppdet/modeling/transformers/utils.py b/ppdet/modeling/transformers/utils.py
index b19233fdeec..a6f211a78f2 100644
--- a/ppdet/modeling/transformers/utils.py
+++ b/ppdet/modeling/transformers/utils.py
@@ -32,7 +32,7 @@
__all__ = [
'_get_clones', 'bbox_overlaps', 'bbox_cxcywh_to_xyxy',
'bbox_xyxy_to_cxcywh', 'sigmoid_focal_loss', 'inverse_sigmoid',
- 'deformable_attention_core_func'
+ 'deformable_attention_core_func', 'varifocal_loss_with_logits'
]
@@ -395,3 +395,16 @@ def mask_to_box_coordinate(mask,
out_bbox /= paddle.to_tensor([w, h, w, h]).astype(dtype)
return out_bbox if format == "xyxy" else bbox_xyxy_to_cxcywh(out_bbox)
+
+
+def varifocal_loss_with_logits(pred_logits,
+ gt_score,
+ label,
+ normalizer=1.0,
+ alpha=0.75,
+ gamma=2.0):
+ pred_score = F.sigmoid(pred_logits)
+ weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label
+ loss = F.binary_cross_entropy_with_logits(
+ pred_logits, gt_score, weight=weight, reduction='none')
+ return loss.mean(1).sum() / normalizer
From 71eb35687de84830cb60c4c2b4d077f12720c7b7 Mon Sep 17 00:00:00 2001
From: dust8
Date: Tue, 18 Apr 2023 16:52:57 +0800
Subject: [PATCH 064/116] fix picodet docs (#8005)
---
configs/picodet/README.md | 2 +-
configs/picodet/README_en.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/configs/picodet/README.md b/configs/picodet/README.md
index 17ea8566e81..206730a8453 100644
--- a/configs/picodet/README.md
+++ b/configs/picodet/README.md
@@ -155,7 +155,7 @@ python tools/export_model.py -c configs/picodet/picodet_s_320_coco_lcnet.yml \
--output_dir=output_inference
```
-- 如无需导出后处理,请指定:`-o export.benchmark=True`(如果-o已出现过,此处删掉-o)或者手动修改[runtime.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/runtime.yml) 中相应字段。
+- 如无需导出后处理,请指定:`-o export.post_process=False`(如果-o已出现过,此处删掉-o)或者手动修改[runtime.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/runtime.yml) 中相应字段。
- 如无需导出NMS,请指定:`-o export.nms=False`或者手动修改[runtime.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/runtime.yml) 中相应字段。 许多导出至ONNX场景只支持单输入及固定shape输出,所以如果导出至ONNX,推荐不导出NMS。
diff --git a/configs/picodet/README_en.md b/configs/picodet/README_en.md
index 46136678b82..7a4b38d1874 100644
--- a/configs/picodet/README_en.md
+++ b/configs/picodet/README_en.md
@@ -141,7 +141,7 @@ python tools/export_model.py -c configs/picodet/picodet_s_320_coco_lcnet.yml \
--output_dir=output_inference
```
-- If no post processing is required, please specify: `-o export.benchmark=True` (if -o has already appeared, delete -o here) or manually modify corresponding fields in [runtime.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/runtime.yml).
+- If no post processing is required, please specify: `-o export.post_process=False` (if -o has already appeared, delete -o here) or manually modify corresponding fields in [runtime.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/runtime.yml).
- If no NMS is required, please specify: `-o export.nms=True` or manually modify corresponding fields in [runtime.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/runtime.yml). Many scenes exported to ONNX only support single input and fixed shape output, so if exporting to ONNX, it is recommended not to export NMS.
From 0da41eac58ff393e17a9a3542590eb1be3de47a1 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Tue, 18 Apr 2023 18:44:15 +0800
Subject: [PATCH 065/116] update readme (#8099)
---
configs/rtdetr/README.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index 3a11e87bca3..92ca0347ba9 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -14,10 +14,10 @@ We propose a **R**eal-**T**ime **DE**tection **TR**ansformer (RT-DETR), the firs
| Model | Epoch | backbone | input shape | $AP^{val}$ | $AP^{val}_{50}$| Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | Pretrained Model | config |
|:--------------:|:-----:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
-| RT-DETR-R50 | 80 | ResNet-50 | 640 | 53.1 | 71.3 | 42 | 136 | 108 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams) | [config](./rtdetr_r50vd_6x_coco.yml)
-| RT-DETR-R101 | 80 | ResNet-101 | 640 | 54.3 | 72.7 | 76 | 259 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams) | [config](./rtdetr_r101vd_6x_coco.yml)
-| RT-DETR-L | 80 | HGNetv2 | 640 | 53.0 | 71.6 | 32 | 110 | 114 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams) | [comming soon](rtdetr_hgnetv2_l_6x_coco.yml)
-| RT-DETR-X | 80 | HGNetv2 | 640 | 54.8 | 73.1 | 67 | 234 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_x_6x_coco.pdparams) | [comming soon](rtdetr_hgnetv2_x_6x_coco.yml)
+| RT-DETR-R50 | 6x | ResNet-50 | 640 | 53.1 | 71.3 | 42 | 136 | 108 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams) | [config](./rtdetr_r50vd_6x_coco.yml)
+| RT-DETR-R101 | 6x | ResNet-101 | 640 | 54.3 | 72.7 | 76 | 259 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams) | [config](./rtdetr_r101vd_6x_coco.yml)
+| RT-DETR-L | 6x | HGNetv2 | 640 | 53.0 | 71.6 | 32 | 110 | 114 | [coming soon](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams) | [coming soon](rtdetr_hgnetv2_l_6x_coco.yml)
+| RT-DETR-X | 6x | HGNetv2 | 640 | 54.8 | 73.1 | 67 | 234 | 74 | [coming soon](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_x_6x_coco.pdparams) | [coming soon](rtdetr_hgnetv2_x_6x_coco.yml)
**Notes:**
- RT-DETR uses 4GPU to train.
From 4406f68e8dbe057b4083b9b1e58962bf1e494a9a Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Wed, 19 Apr 2023 14:15:04 +0800
Subject: [PATCH 066/116] Add rtdetr hgnetv2 l&x (#8105)
* add rtdetr hgnet
---
configs/rtdetr/README.md | 121 +++++-
configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml | 24 ++
configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml | 40 ++
ppdet/modeling/backbones/__init__.py | 4 +-
ppdet/modeling/backbones/hgnet_v2.py | 446 ++++++++++++++++++++
5 files changed, 619 insertions(+), 16 deletions(-)
create mode 100644 configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml
create mode 100644 configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml
create mode 100644 ppdet/modeling/backbones/hgnet_v2.py
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index 92ca0347ba9..e1f2a5be894 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -1,34 +1,125 @@
# DETRs Beat YOLOs on Real-time Object Detection
-## Introduction
-We propose a **R**eal-**T**ime **DE**tection **TR**ansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS. For more details, please refer to our [paper](https://arxiv.org/abs/2304.08069).
+## 最新动态
-
-

-
+- 发布RT-DETR-R50和RT-DETR-R101的代码和预训练模型。
+- 发布RT-DETR-L和RT-DETR-X的代码和预训练模型。
+## 简介
+
+RT-DETR是第一个实时端到端目标检测器。具体而言,我们设计了一个高效的混合编码器,通过解耦尺度内交互和跨尺度融合来高效处理多尺度特征,并提出了IoU感知的查询选择机制,以优化解码器查询的初始化。此外,RT-DETR支持通过使用不同的解码器层来灵活调整推理速度,而不需要重新训练,这有助于实时目标检测器的实际应用。RT-DETR-L在COCO val2017上实现了53.0%的AP,在T4 GPU上实现了114FPS,RT-DETR-X实现了54.8%的AP和74FPS,在速度和精度方面都优于相同规模的所有YOLO检测器。RT-DETR-R50实现了53.1%的AP和108FPS,RT-DETR-R101实现了54.3%的AP和74FPS,在精度上超过了全部使用相同骨干网络的DETR检测器。
+若要了解更多细节,请参考我们的论文[paper](https://arxiv.org/abs/2304.08069).
-## Model Zoo
+
+

+
-### Model Zoo on COCO
+## 模型
| Model | Epoch | backbone | input shape | $AP^{val}$ | $AP^{val}_{50}$| Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | Pretrained Model | config |
|:--------------:|:-----:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
| RT-DETR-R50 | 6x | ResNet-50 | 640 | 53.1 | 71.3 | 42 | 136 | 108 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams) | [config](./rtdetr_r50vd_6x_coco.yml)
| RT-DETR-R101 | 6x | ResNet-101 | 640 | 54.3 | 72.7 | 76 | 259 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams) | [config](./rtdetr_r101vd_6x_coco.yml)
-| RT-DETR-L | 6x | HGNetv2 | 640 | 53.0 | 71.6 | 32 | 110 | 114 | [coming soon](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams) | [coming soon](rtdetr_hgnetv2_l_6x_coco.yml)
-| RT-DETR-X | 6x | HGNetv2 | 640 | 54.8 | 73.1 | 67 | 234 | 74 | [coming soon](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_x_6x_coco.pdparams) | [coming soon](rtdetr_hgnetv2_x_6x_coco.yml)
+| RT-DETR-L | 6x | HGNetv2 | 640 | 53.0 | 71.6 | 32 | 110 | 114 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams) | [config](rtdetr_hgnetv2_l_6x_coco.yml)
+| RT-DETR-X | 6x | HGNetv2 | 640 | 54.8 | 73.1 | 67 | 234 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_x_6x_coco.pdparams) | [config](rtdetr_hgnetv2_x_6x_coco.yml)
+
+**注意事项:**
+- RT-DETR 使用4个GPU训练。
+- RT-DETR 在COCO train2017上训练,并在val2017上评估。
+
+## 快速开始
+
+
+依赖包:
+
+- PaddlePaddle == 2.4.1
+
+
+
+
+安装
-**Notes:**
-- RT-DETR uses 4GPU to train.
-- RT-DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- [安装指导文档](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/INSTALL.md)
-GPU multi-card training
-```bash
+
+
+
+训练&评估
+
+- 单卡GPU上训练:
+
+```shell
+# training on single-GPU
+export CUDA_VISIBLE_DEVICES=0
+python tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml --eval
+```
+
+- 多卡GPU上训练:
+
+```shell
+# training on multi-GPU
+export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml --fleet --eval
```
-## Citations
+- 评估:
+
+```shell
+python tools/eval.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
+ -o weights=https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams
+```
+
+- 测试:
+
+```shell
+python tools/infer.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
+ -o weights=https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams
+```
+
+详情请参考[快速开始文档](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED.md).
+
+
+
+## 部署
+
+### 导出及转换模型
+
+
+1. 导出模型
+
+```shell
+cd PaddleDetection
+python tools/export_model.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
+ -o weights=https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams trt=True \
+ --output_dir=output_inference
+```
+
+
+
+
+2. 转换模型至ONNX (点击展开)
+
+- 安装[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX) 和 ONNX
+
+```shell
+pip install onnx==1.13.0
+pip install paddle2onnx==1.0.5
+```
+
+- 转换模型:
+
+```shell
+paddle2onnx --model_dir=./output_inference/rtdetr_r50vd_6x_coco/ \
+ --model_filename model.pdmodel \
+ --params_filename model.pdiparams \
+ --opset_version 16 \
+ --save_file rtdetr_r50vd_6x_coco.onnx
+```
+
+
+
+## 引用RT-DETR
+如果需要在你的研究中使用RT-DETR,请通过以下方式引用我们的论文:
```
@misc{lv2023detrs,
title={DETRs Beat YOLOs on Real-time Object Detection},
diff --git a/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml b/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml
new file mode 100644
index 00000000000..4f3e77df187
--- /dev/null
+++ b/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml
@@ -0,0 +1,24 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_6x.yml',
+ '_base_/rtdetr_r50vd.yml',
+ '_base_/rtdetr_reader.yml',
+]
+
+weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
+find_unused_parameters: True
+log_iter: 200
+
+
+DETR:
+ backbone: PPHGNetV2
+
+PPHGNetV2:
+ arch: 'L'
+ return_idx: [1, 2, 3]
+ freeze_stem_only: True
+ freeze_at: 0
+ freeze_norm: True
+ lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
diff --git a/configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml b/configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml
new file mode 100644
index 00000000000..37f5d17930c
--- /dev/null
+++ b/configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml
@@ -0,0 +1,40 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_6x.yml',
+ '_base_/rtdetr_r50vd.yml',
+ '_base_/rtdetr_reader.yml',
+]
+
+weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams
+find_unused_parameters: True
+log_iter: 200
+
+
+
+DETR:
+ backbone: PPHGNetV2
+
+
+PPHGNetV2:
+ arch: 'X'
+ return_idx: [1, 2, 3]
+ freeze_stem_only: True
+ freeze_at: 0
+ freeze_norm: True
+ lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01]
+
+
+HybridEncoder:
+ hidden_dim: 384
+ use_encoder_idx: [2]
+ num_encoder_layers: 1
+ encoder_layer:
+ name: TransformerLayer
+ d_model: 384
+ nhead: 8
+ dim_feedforward: 2048
+ dropout: 0.
+ activation: 'gelu'
+ expansion: 1.0
diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py
index a20189c9487..e61ff711186 100644
--- a/ppdet/modeling/backbones/__init__.py
+++ b/ppdet/modeling/backbones/__init__.py
@@ -37,6 +37,7 @@
from . import trans_encoder
from . import focalnet
from . import vit_mae
+from . import hgnet_v2
from .vgg import *
from .resnet import *
@@ -63,4 +64,5 @@
from .trans_encoder import *
from .focalnet import *
from .vitpose import *
-from .vit_mae import *
\ No newline at end of file
+from .vit_mae import *
+from .hgnet_v2 import *
diff --git a/ppdet/modeling/backbones/hgnet_v2.py b/ppdet/modeling/backbones/hgnet_v2.py
new file mode 100644
index 00000000000..c4cc0787a23
--- /dev/null
+++ b/ppdet/modeling/backbones/hgnet_v2.py
@@ -0,0 +1,446 @@
+# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import KaimingNormal, Constant
+from paddle.nn import Conv2D, BatchNorm2D, ReLU, AdaptiveAvgPool2D, MaxPool2D
+from paddle.regularizer import L2Decay
+from paddle import ParamAttr
+
+import copy
+
+from ppdet.core.workspace import register, serializable
+from ..shape_spec import ShapeSpec
+
+__all__ = ['PPHGNetV2']
+
+kaiming_normal_ = KaimingNormal()
+zeros_ = Constant(value=0.)
+ones_ = Constant(value=1.)
+
+
+class LearnableAffineBlock(nn.Layer):
+ def __init__(self,
+ scale_value=1.0,
+ bias_value=0.0,
+ lr_mult=1.0,
+ lab_lr=0.01):
+ super().__init__()
+ self.scale = self.create_parameter(
+ shape=[1, ],
+ default_initializer=Constant(value=scale_value),
+ attr=ParamAttr(learning_rate=lr_mult * lab_lr))
+ self.add_parameter("scale", self.scale)
+ self.bias = self.create_parameter(
+ shape=[1, ],
+ default_initializer=Constant(value=bias_value),
+ attr=ParamAttr(learning_rate=lr_mult * lab_lr))
+ self.add_parameter("bias", self.bias)
+
+ def forward(self, x):
+ return self.scale * x + self.bias
+
+
+class ConvBNAct(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ groups=1,
+ use_act=True,
+ use_lab=False,
+ lr_mult=1.0):
+ super().__init__()
+ self.use_act = use_act
+ self.use_lab = use_lab
+ self.conv = Conv2D(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride,
+ padding=padding
+ if isinstance(padding, str) else (kernel_size - 1) // 2,
+ groups=groups,
+ bias_attr=False)
+ self.bn = BatchNorm2D(
+ out_channels,
+ weight_attr=ParamAttr(
+ regularizer=L2Decay(0.0), learning_rate=lr_mult),
+ bias_attr=ParamAttr(
+ regularizer=L2Decay(0.0), learning_rate=lr_mult))
+ if self.use_act:
+ self.act = ReLU()
+ if self.use_lab:
+ self.lab = LearnableAffineBlock(lr_mult=lr_mult)
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ if self.use_act:
+ x = self.act(x)
+ if self.use_lab:
+ x = self.lab(x)
+ return x
+
+
+class LightConvBNAct(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride,
+ groups=1,
+ use_lab=False,
+ lr_mult=1.0):
+ super().__init__()
+ self.conv1 = ConvBNAct(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=1,
+ use_act=False,
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+ self.conv2 = ConvBNAct(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ groups=out_channels,
+ use_act=True,
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.conv2(x)
+ return x
+
+
+class StemBlock(nn.Layer):
+ def __init__(self,
+ in_channels,
+ mid_channels,
+ out_channels,
+ use_lab=False,
+ lr_mult=1.0):
+ super().__init__()
+ self.stem1 = ConvBNAct(
+ in_channels=in_channels,
+ out_channels=mid_channels,
+ kernel_size=3,
+ stride=2,
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+ self.stem2a = ConvBNAct(
+ in_channels=mid_channels,
+ out_channels=mid_channels // 2,
+ kernel_size=2,
+ stride=1,
+ padding="SAME",
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+ self.stem2b = ConvBNAct(
+ in_channels=mid_channels // 2,
+ out_channels=mid_channels,
+ kernel_size=2,
+ stride=1,
+ padding="SAME",
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+ self.stem3 = ConvBNAct(
+ in_channels=mid_channels * 2,
+ out_channels=mid_channels,
+ kernel_size=3,
+ stride=2,
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+ self.stem4 = ConvBNAct(
+ in_channels=mid_channels,
+ out_channels=out_channels,
+ kernel_size=1,
+ stride=1,
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+ self.pool = nn.MaxPool2D(
+ kernel_size=2, stride=1, ceil_mode=True, padding="SAME")
+
+ def forward(self, x):
+ x = self.stem1(x)
+ x2 = self.stem2a(x)
+ x2 = self.stem2b(x2)
+ x1 = self.pool(x)
+ x = paddle.concat([x1, x2], 1)
+ x = self.stem3(x)
+ x = self.stem4(x)
+
+ return x
+
+
+class HG_Block(nn.Layer):
+ def __init__(self,
+ in_channels,
+ mid_channels,
+ out_channels,
+ kernel_size=3,
+ layer_num=6,
+ identity=False,
+ light_block=True,
+ use_lab=False,
+ lr_mult=1.0):
+ super().__init__()
+ self.identity = identity
+
+ self.layers = nn.LayerList()
+ block_type = "LightConvBNAct" if light_block else "ConvBNAct"
+ for i in range(layer_num):
+ self.layers.append(
+ eval(block_type)(in_channels=in_channels
+ if i == 0 else mid_channels,
+ out_channels=mid_channels,
+ stride=1,
+ kernel_size=kernel_size,
+ use_lab=use_lab,
+ lr_mult=lr_mult))
+ # feature aggregation
+ total_channels = in_channels + layer_num * mid_channels
+ self.aggregation_squeeze_conv = ConvBNAct(
+ in_channels=total_channels,
+ out_channels=out_channels // 2,
+ kernel_size=1,
+ stride=1,
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+ self.aggregation_excitation_conv = ConvBNAct(
+ in_channels=out_channels // 2,
+ out_channels=out_channels,
+ kernel_size=1,
+ stride=1,
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+
+ def forward(self, x):
+ identity = x
+ output = []
+ output.append(x)
+ for layer in self.layers:
+ x = layer(x)
+ output.append(x)
+ x = paddle.concat(output, axis=1)
+ x = self.aggregation_squeeze_conv(x)
+ x = self.aggregation_excitation_conv(x)
+ if self.identity:
+ x += identity
+ return x
+
+
+class HG_Stage(nn.Layer):
+ def __init__(self,
+ in_channels,
+ mid_channels,
+ out_channels,
+ block_num,
+ layer_num=6,
+ downsample=True,
+ light_block=True,
+ kernel_size=3,
+ use_lab=False,
+ lr_mult=1.0):
+ super().__init__()
+ self.downsample = downsample
+ if downsample:
+ self.downsample = ConvBNAct(
+ in_channels=in_channels,
+ out_channels=in_channels,
+ kernel_size=3,
+ stride=2,
+ groups=in_channels,
+ use_act=False,
+ use_lab=use_lab,
+ lr_mult=lr_mult)
+
+ blocks_list = []
+ for i in range(block_num):
+ blocks_list.append(
+ HG_Block(
+ in_channels=in_channels if i == 0 else out_channels,
+ mid_channels=mid_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ layer_num=layer_num,
+ identity=False if i == 0 else True,
+ light_block=light_block,
+ use_lab=use_lab,
+ lr_mult=lr_mult))
+ self.blocks = nn.Sequential(*blocks_list)
+
+ def forward(self, x):
+ if self.downsample:
+ x = self.downsample(x)
+ x = self.blocks(x)
+ return x
+
+
+def _freeze_norm(m: nn.BatchNorm2D):
+ param_attr = ParamAttr(
+ learning_rate=0., regularizer=L2Decay(0.), trainable=False)
+ bias_attr = ParamAttr(
+ learning_rate=0., regularizer=L2Decay(0.), trainable=False)
+ global_stats = True
+ norm = nn.BatchNorm2D(
+ m._num_features,
+ weight_attr=param_attr,
+ bias_attr=bias_attr,
+ use_global_stats=global_stats)
+ for param in norm.parameters():
+ param.stop_gradient = True
+ return norm
+
+
+def reset_bn(model: nn.Layer, reset_func=_freeze_norm):
+ if isinstance(model, nn.BatchNorm2D):
+ model = reset_func(model)
+ else:
+ for name, child in model.named_children():
+ _child = reset_bn(child, reset_func)
+ if _child is not child:
+ setattr(model, name, _child)
+ return model
+
+
+@register
+@serializable
+class PPHGNetV2(nn.Layer):
+ """
+ PPHGNetV2
+ Args:
+ stem_channels: list. Number of channels for the stem block.
+ stage_type: str. The stage configuration of PPHGNet. such as the number of channels, stride, etc.
+ use_lab: boolean. Whether to use LearnableAffineBlock in network.
+ lr_mult_list: list. Control the learning rate of different stages.
+ Returns:
+ model: nn.Layer. Specific PPHGNetV2 model depends on args.
+ """
+
+ arch_configs = {
+ 'L': {
+ 'stem_channels': [3, 32, 48],
+ 'stage_config': {
+ # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+ "stage1": [48, 48, 128, 1, False, False, 3, 6],
+ "stage2": [128, 96, 512, 1, True, False, 3, 6],
+ "stage3": [512, 192, 1024, 3, True, True, 5, 6],
+ "stage4": [1024, 384, 2048, 1, True, True, 5, 6],
+ }
+ },
+ 'X': {
+ 'stem_channels': [3, 32, 64],
+ 'stage_config': {
+ # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+ "stage1": [64, 64, 128, 1, False, False, 3, 6],
+ "stage2": [128, 128, 512, 2, True, False, 3, 6],
+ "stage3": [512, 256, 1024, 5, True, True, 5, 6],
+ "stage4": [1024, 512, 2048, 2, True, True, 5, 6],
+ }
+ }
+ }
+
+ def __init__(self,
+ arch,
+ use_lab=False,
+ lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
+ return_idx=[1, 2, 3],
+ freeze_stem_only=True,
+ freeze_at=0,
+ freeze_norm=True):
+ super().__init__()
+ self.use_lab = use_lab
+ self.return_idx = return_idx
+
+ stem_channels = self.arch_configs[arch]['stem_channels']
+ stage_config = self.arch_configs[arch]['stage_config']
+
+ self._out_strides = [4, 8, 16, 32]
+ self._out_channels = [stage_config[k][2] for k in stage_config]
+
+ # stem
+ self.stem = StemBlock(
+ in_channels=stem_channels[0],
+ mid_channels=stem_channels[1],
+ out_channels=stem_channels[2],
+ use_lab=use_lab,
+ lr_mult=lr_mult_list[0])
+
+ # stages
+ self.stages = nn.LayerList()
+ for i, k in enumerate(stage_config):
+ in_channels, mid_channels, out_channels, block_num, downsample, light_block, kernel_size, layer_num = stage_config[
+ k]
+ self.stages.append(
+ HG_Stage(
+ in_channels,
+ mid_channels,
+ out_channels,
+ block_num,
+ layer_num,
+ downsample,
+ light_block,
+ kernel_size,
+ use_lab,
+ lr_mult=lr_mult_list[i + 1]))
+
+ if freeze_at >= 0:
+ self._freeze_parameters(self.stem)
+ if not freeze_stem_only:
+ for i in range(min(freeze_at + 1, len(self.stages))):
+ self._freeze_parameters(self.stages[i])
+
+ if freeze_norm:
+ reset_bn(self, reset_func=_freeze_norm)
+
+ self._init_weights()
+
+ def _freeze_parameters(self, m):
+ for p in m.parameters():
+ p.stop_gradient = True
+
+ def _init_weights(self):
+ for m in self.sublayers():
+ if isinstance(m, nn.Conv2D):
+ kaiming_normal_(m.weight)
+ elif isinstance(m, (nn.BatchNorm2D)):
+ ones_(m.weight)
+ zeros_(m.bias)
+ elif isinstance(m, nn.Linear):
+ zeros_(m.bias)
+
+ @property
+ def out_shape(self):
+ return [
+ ShapeSpec(
+ channels=self._out_channels[i], stride=self._out_strides[i])
+ for i in self.return_idx
+ ]
+
+ def forward(self, inputs):
+ x = inputs['image']
+ x = self.stem(x)
+ outs = []
+ for idx, stage in enumerate(self.stages):
+ x = stage(x)
+ if idx in self.return_idx:
+ outs.append(x)
+ return outs
From 72ae571da36aff55058c70f302e57097ad6278fc Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Wed, 19 Apr 2023 19:40:35 +0800
Subject: [PATCH 067/116] update readme (#8108)
---
configs/rtdetr/README.md | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index e1f2a5be894..e68533e1acb 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -32,7 +32,7 @@ RT-DETR是第一个实时端到端目标检测器。具体而言,我们设计
依赖包:
-- PaddlePaddle == 2.4.1
+- PaddlePaddle >= 2.4.1
@@ -85,7 +85,7 @@ python tools/infer.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
### 导出及转换模型
-1. 导出模型
+1. 导出模型
```shell
cd PaddleDetection
@@ -97,7 +97,7 @@ python tools/export_model.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
-2. 转换模型至ONNX (点击展开)
+2. 转换模型至ONNX
- 安装[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX) 和 ONNX
@@ -116,6 +116,18 @@ paddle2onnx --model_dir=./output_inference/rtdetr_r50vd_6x_coco/ \
--save_file rtdetr_r50vd_6x_coco.onnx
```
+- 转换成TensorRT(可选):
+
+```shell
+# 保证TensorRT的版本>=8.5.1
+trtexec --onnx=./rtdetr_r50vd_6x_coco.onnx \
+ --workspace=4096 \
+ --shapes=image:1x3x640x640 \
+ --saveEngine=rtdetr_r50vd_6x_coco.trt \
+ --avgRuns=100 \
+ --fp16
+```
+
## 引用RT-DETR
From 4ee5e38bc092e967b0ae9a466abb43203ab71fab Mon Sep 17 00:00:00 2001
From: JYChen
Date: Thu, 20 Apr 2023 16:53:17 +0800
Subject: [PATCH 068/116] make solov2 & faster-rcnn adaptive to 0d-output for
__getitem__ (#8112)
* make solov2 adaptive to getitem output 0-D
* make faster-rcnn adaptive to getitem output 0-D
---
ppdet/modeling/heads/solov2_head.py | 12 ++++++------
ppdet/modeling/layers.py | 4 ++--
ppdet/modeling/post_process.py | 14 +++++++-------
ppdet/modeling/proposal_generator/rpn_head.py | 2 +-
4 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/ppdet/modeling/heads/solov2_head.py b/ppdet/modeling/heads/solov2_head.py
index 6989abb3a8a..0fd0f619f2b 100644
--- a/ppdet/modeling/heads/solov2_head.py
+++ b/ppdet/modeling/heads/solov2_head.py
@@ -449,7 +449,7 @@ def get_prediction(self, cate_preds, kernel_preds, seg_pred, im_shape,
seg_masks, cate_labels, cate_scores = self.get_seg_single(
cate_pred_list, seg_pred_list, kernel_pred_list, featmap_size,
im_shape[idx], scale_factor[idx][0])
- bbox_num = paddle.shape(cate_labels)[0]
+ bbox_num = paddle.shape(cate_labels)[0:1]
return seg_masks, cate_labels, cate_scores, bbox_num
def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size,
@@ -458,8 +458,8 @@ def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size,
The code of this function is based on:
https://github.com/WXinlong/SOLO/blob/master/mmdet/models/anchor_heads/solov2_head.py#L385
"""
- h = paddle.cast(im_shape[0], 'int32')[0]
- w = paddle.cast(im_shape[1], 'int32')[0]
+ h = paddle.cast(im_shape[0], 'int32')
+ w = paddle.cast(im_shape[1], 'int32')
upsampled_size_out = [featmap_size[0] * 4, featmap_size[1] * 4]
y = paddle.zeros(shape=paddle.shape(cate_preds), dtype='float32')
@@ -467,7 +467,7 @@ def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size,
inds = paddle.nonzero(inds)
cate_preds = paddle.reshape(cate_preds, shape=[-1])
# Prevent empty and increase fake data
- ind_a = paddle.cast(paddle.shape(kernel_preds)[0], 'int64')
+ ind_a = paddle.cast(paddle.shape(kernel_preds)[0:1], 'int64')
ind_b = paddle.zeros(shape=[1], dtype='int64')
inds_end = paddle.unsqueeze(paddle.concat([ind_a, ind_b]), 0)
inds = paddle.concat([inds, inds_end])
@@ -513,9 +513,9 @@ def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size,
keep = paddle.squeeze(keep, axis=[1])
# Prevent empty and increase fake data
keep_other = paddle.concat(
- [keep, paddle.cast(paddle.shape(sum_masks)[0] - 1, 'int64')])
+ [keep, paddle.cast(paddle.shape(sum_masks)[0:1] - 1, 'int64')])
keep_scores = paddle.concat(
- [keep, paddle.cast(paddle.shape(sum_masks)[0], 'int64')])
+ [keep, paddle.cast(paddle.shape(sum_masks)[0:1], 'int64')])
cate_scores_end = paddle.zeros(shape=[1], dtype='float32')
cate_scores = paddle.concat([cate_scores, cate_scores_end])
diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py
index f267b174584..86c6d9697ff 100644
--- a/ppdet/modeling/layers.py
+++ b/ppdet/modeling/layers.py
@@ -1003,7 +1003,7 @@ def __call__(self,
keep = paddle.squeeze(keep, axis=[1])
# Prevent empty and increase fake data
keep = paddle.concat(
- [keep, paddle.cast(paddle.shape(cate_scores)[0] - 1, 'int64')])
+ [keep, paddle.cast(paddle.shape(cate_scores)[0:1] - 1, 'int64')])
seg_preds = paddle.gather(seg_preds, index=keep)
cate_scores = paddle.gather(cate_scores, index=keep)
@@ -1337,7 +1337,7 @@ def conv_mixer(
Seq, ActBn = nn.Sequential, lambda x: Seq(x, nn.GELU(), nn.BatchNorm2D(dim))
Residual = type('Residual', (Seq, ),
{'forward': lambda self, x: self[0](x) + x})
- return Seq(*[
+ return Seq(* [
Seq(Residual(
ActBn(
nn.Conv2D(
diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py
index 65e8d0b8bcf..24722ff6740 100644
--- a/ppdet/modeling/post_process.py
+++ b/ppdet/modeling/post_process.py
@@ -127,8 +127,8 @@ def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
bbox_num_i = fake_bbox_num
else:
bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]
- bbox_num_i = bbox_num[i]
- id_start += bbox_num[i]
+ bbox_num_i = bbox_num[i:i + 1]
+ id_start += bbox_num[i:i + 1]
bboxes_list.append(bboxes_i)
bbox_num_list.append(bbox_num_i)
bboxes = paddle.concat(bboxes_list)
@@ -142,10 +142,10 @@ def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
# scale_factor: scale_y, scale_x
for i in range(bbox_num.shape[0]):
expand_shape = paddle.expand(origin_shape[i:i + 1, :],
- [bbox_num[i], 2])
- scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
+ [bbox_num[i:i + 1], 2])
+ scale_y, scale_x = scale_factor[i, 0:1], scale_factor[i, 1:2]
scale = paddle.concat([scale_x, scale_y, scale_x, scale_y])
- expand_scale = paddle.expand(scale, [bbox_num[i], 4])
+ expand_scale = paddle.expand(scale, [bbox_num[i:i + 1], 4])
origin_shape_list.append(expand_shape)
scale_factor_list.append(expand_scale)
@@ -158,8 +158,8 @@ def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
scale = paddle.concat(
[scale_x, scale_y, scale_x, scale_y]).unsqueeze(0)
self.origin_shape_list = paddle.expand(origin_shape,
- [bbox_num[0], 2])
- scale_factor_list = paddle.expand(scale, [bbox_num[0], 4])
+ [bbox_num[0:1], 2])
+ scale_factor_list = paddle.expand(scale, [bbox_num[0:1], 4])
# bboxes: [N, 6], label, score, bbox
pred_label = bboxes[:, 0:1]
diff --git a/ppdet/modeling/proposal_generator/rpn_head.py b/ppdet/modeling/proposal_generator/rpn_head.py
index 8a431eeac20..7c56d8d0a51 100644
--- a/ppdet/modeling/proposal_generator/rpn_head.py
+++ b/ppdet/modeling/proposal_generator/rpn_head.py
@@ -229,7 +229,7 @@ def _gen_proposal(self, scores, bbox_deltas, anchors, inputs):
topk_prob = rpn_prob_list[0].flatten()
bs_rois_collect.append(topk_rois)
- bs_rois_num_collect.append(paddle.shape(topk_rois)[0])
+ bs_rois_num_collect.append(paddle.shape(topk_rois)[0:1])
bs_rois_num_collect = paddle.concat(bs_rois_num_collect)
From 78c6b82fbcd633bdf6f27fa12d820a3581770ca5 Mon Sep 17 00:00:00 2001
From: wjm <897383984@qq.com>
Date: Fri, 21 Apr 2023 18:57:06 +0800
Subject: [PATCH 069/116] Fix bug in load weight (#8097)
* fix_weight_load
* fix_load_weight
---
ppdet/utils/checkpoint.py | 29 +++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py
index ed0433764ba..f3dafd40f0f 100644
--- a/ppdet/utils/checkpoint.py
+++ b/ppdet/utils/checkpoint.py
@@ -155,7 +155,7 @@ def student_match(a, b):
return a == b or a.endswith("." + b) or b.endswith("." + a)
def match(a, b):
- if a.startswith('backbone.res5'):
+ if b.startswith('backbone.res5'):
b = b[9:]
return a == b or a.endswith("." + b)
@@ -174,15 +174,28 @@ def match(a, b):
max_id = match_matrix.argmax(1)
max_len = match_matrix.max(1)
max_id[max_len == 0] = -1
+ load_id = set(max_id)
+ load_id.discard(-1)
not_load_weight_name = []
+ if weight_keys[0].startswith('modelStudent') or weight_keys[0].startswith(
+ 'modelTeacher'):
+ for match_idx in range(len(max_id)):
+ if max_id[match_idx] == -1:
+ not_load_weight_name.append(model_keys[match_idx])
+ if len(not_load_weight_name) > 0:
+ logger.info('{} in model is not matched with pretrained weights, '
+ 'and its will be trained from scratch'.format(
+ not_load_weight_name))
- for match_idx in range(len(max_id)):
- if max_id[match_idx] == -1:
- not_load_weight_name.append(model_keys[match_idx])
- if len(not_load_weight_name) > 0:
- logger.info('{} in model is not matched with pretrained weights, '
- 'and its will be trained from scratch'.format(
- not_load_weight_name))
+ else:
+ for idx in range(len(weight_keys)):
+ if idx not in load_id:
+ not_load_weight_name.append(weight_keys[idx])
+
+ if len(not_load_weight_name) > 0:
+ logger.info('{} in pretrained weight is not used in the model, '
+ 'and its will not be loaded'.format(
+ not_load_weight_name))
matched_keys = {}
result_state_dict = {}
for model_id, weight_id in enumerate(max_id):
From 70dbf936794a162fcbca99ca494f4cdad7110f0b Mon Sep 17 00:00:00 2001
From: thinkthinking <61035602+thinkthinking@users.noreply.github.com>
Date: Mon, 24 Apr 2023 21:38:20 +0800
Subject: [PATCH 070/116] Update README_cn.md
---
README_cn.md | 52 +++++++++++++++++++++++-----------------------------
1 file changed, 23 insertions(+), 29 deletions(-)
diff --git a/README_cn.md b/README_cn.md
index a7c587f8ad0..3d46263aec5 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -77,40 +77,16 @@ PaddleDetection是一个基于PaddlePaddle的目标检测端到端开发套件
## 📣最新进展
-**🔥PaddleDetection v2.6版本更新解读**
+**🔥超越YOLOv8,飞桨推出精度最高的实时检测器RT-DETR!**
-

+
- - `v2.6版本版本更新解读文章传送门`:
- - [《PaddleDetection v2.6发布:目标小?数据缺?标注累?泛化差?PP新员逐一应对!》](https://mp.weixin.qq.com/s/SLITj5k120d_fQc7jEO8Vw)
- - `v2.6版本重点更新体验传送门`:
- - [PP-YOLOE+:高精度通用目标检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
- - [PP-YOLOE-R:旋转框检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rotate/ppyoloe_r)
- - [PP-YOLOE-SOD:小目标检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/smalldet)
- - [PP-YOLOE-DOD:密集检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe/application)
- - [PP-YOLOE+_t:超轻量通用目标检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
- - [PP-YOLOE+少样本学习方案](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/few-shot)
- - [PP-YOLOE+半监督学习方案](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/semi_det/baseline)
- - [PP-YOLOE+模型蒸馏方案](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe/distill)
- - [PP-Human:行人分析工具箱,推理提速、多路视频流支持](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/deploy/pipeline)
- - [PP-Vehicle:车辆分析工具箱,新增逆行、压线分析、推理提速、多路视频流支持](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/deploy/pipeline)
- - [半监督检测算法合集](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/semi_det)
- - [少样本学习算法合集](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/configs/few-shot)
- - [模型蒸馏算法合集](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe/distill)
- - [YOLO新增模型YOLOv8、YOLOv6-3.0](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs)
- - [目标检测算法新增DINO](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/dino)
- - [目标检测算法新增YOLOF](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolof)
- - [新增ViTDet系列检测模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/vitdet)
- - [新增目标检测算法CenterTrack](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/mot/centertrack)
- - [新增旋转框检测算法FCOSR](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rotate/fcosr)
- - [新增实例分割算法QueryInst](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/queryinst)
- - [新增3D关键点检测算法Metro3d](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/pose3d)
- - [新增检测热力图可视化能力](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/docs/tutorials/GradCAM_cn.md)
- - [Roadmap of PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/issues/7892)
- - [飞桨黑客松第四期-PaddleDetection任务专区](https://github.com/PaddlePaddle/PaddleDetection/issues/7906)
+ - `RT-DETR解读文章传送门`:
+ - [《超越YOLOv8,飞桨推出精度最高的实时检测器RT-DETR!》](https://mp.weixin.qq.com/s/o03QM2rZNjHVto36gcV0Yw)
+ - `代码传送门`:[RT-DETR](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rtdetr)
## 👫开源社区
@@ -132,6 +108,24 @@ PaddleDetection是一个基于PaddlePaddle的目标检测端到端开发套件
- **🎈社区近期活动**
+ - **🔥PaddleDetection v2.6版本更新解读**
+
+
+

+
+
+
+ - `v2.6版本版本更新解读文章传送门`:[《PaddleDetection v2.6发布:目标小?数据缺?标注累?泛化差?PP新员逐一应对!》](https://mp.weixin.qq.com/s/SLITj5k120d_fQc7jEO8Vw)
+
+ - **🏆半监督检测**
+
+ - `文章传送门`:[CVPR 2023 | 单阶段半监督目标检测SOTA:ARSL](https://mp.weixin.qq.com/s/UZLIGL6va2KBfofC-nKG4g)
+ - `代码传送门`:[ARSL](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/semi_det)
+
+
+

+
+
- **👀YOLO系列专题**
From 2285e0c9c1e3de6b4d59d19016b71827e7ed78a9 Mon Sep 17 00:00:00 2001
From: JYChen
Date: Tue, 25 Apr 2023 10:34:26 +0800
Subject: [PATCH 071/116] fix training error brought by 0-d getitem (#8140)
* fix training error brought by 0-d getitem
* fix other model
---
ppdet/modeling/architectures/faster_rcnn.py | 27 +++++++++++--------
ppdet/modeling/heads/cascade_head.py | 2 +-
ppdet/modeling/heads/s2anet_head.py | 2 +-
ppdet/modeling/heads/tood_head.py | 10 +++++--
ppdet/modeling/proposal_generator/target.py | 6 ++---
.../transformers/deformable_transformer.py | 5 +++-
6 files changed, 33 insertions(+), 19 deletions(-)
diff --git a/ppdet/modeling/architectures/faster_rcnn.py b/ppdet/modeling/architectures/faster_rcnn.py
index 41c286fe02e..93fd0f9c64e 100644
--- a/ppdet/modeling/architectures/faster_rcnn.py
+++ b/ppdet/modeling/architectures/faster_rcnn.py
@@ -86,15 +86,16 @@ def _forward(self):
preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
- bbox, bbox_num, nms_keep_idx = self.bbox_post_process(preds, (rois, rois_num),
- im_shape, scale_factor)
+ bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
+ preds, (rois, rois_num), im_shape, scale_factor)
# rescale the prediction back to origin image
bboxes, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
bbox, bbox_num, im_shape, scale_factor)
if self.use_extra_data:
- extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
+ extra_data = {
+ } # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
@@ -102,12 +103,12 @@ def _forward(self):
"""
extra_data['scores'] = preds[1] # predict scores (probability)
# Todo: get logits output
- extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
+ extra_data[
+ 'nms_keep_idx'] = nms_keep_idx # bbox index before nms
return bbox_pred, bbox_num, extra_data
else:
return bbox_pred, bbox_num
-
def get_loss(self, ):
rpn_loss, bbox_loss = self._forward()
loss = {}
@@ -120,7 +121,11 @@ def get_loss(self, ):
def get_pred(self):
if self.use_extra_data:
bbox_pred, bbox_num, extra_data = self._forward()
- output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'extra_data': extra_data}
+ output = {
+ 'bbox': bbox_pred,
+ 'bbox_num': bbox_num,
+ 'extra_data': extra_data
+ }
else:
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
@@ -131,7 +136,7 @@ def target_bbox_forward(self, data):
if self.neck is not None:
body_feats = self.neck(body_feats)
rois = [roi for roi in data['gt_bbox']]
- rois_num = paddle.concat([paddle.shape(roi)[0] for roi in rois])
+ rois_num = paddle.concat([paddle.shape(roi)[0:1] for roi in rois])
preds, _ = self.bbox_head(body_feats, rois, rois_num, None, cot=True)
return preds
@@ -142,13 +147,13 @@ def relationship_learning(self, loader, num_classes_novel):
label_list = []
for step_id, data in enumerate(loader):
- _, bbox_prob = self.target_bbox_forward(data)
+ _, bbox_prob = self.target_bbox_forward(data)
batch_size = data['im_id'].shape[0]
for i in range(batch_size):
- num_bbox = data['gt_class'][i].shape[0]
+ num_bbox = data['gt_class'][i].shape[0]
train_labels = data['gt_class'][i]
train_labels_list.append(train_labels.numpy().squeeze(1))
- base_labels = bbox_prob.detach().numpy()[:,:-1]
+ base_labels = bbox_prob.detach().numpy()[:, :-1]
label_list.append(base_labels)
labels = np.concatenate(train_labels_list, 0)
@@ -159,4 +164,4 @@ def relationship_learning(self, loader, num_classes_novel):
this_class = probabilities[labels == i]
average = np.mean(this_class, axis=0, keepdims=True)
conditional.append(average)
- return np.concatenate(conditional)
\ No newline at end of file
+ return np.concatenate(conditional)
diff --git a/ppdet/modeling/heads/cascade_head.py b/ppdet/modeling/heads/cascade_head.py
index bb0beadbd38..d6f21d20cab 100644
--- a/ppdet/modeling/heads/cascade_head.py
+++ b/ppdet/modeling/heads/cascade_head.py
@@ -301,7 +301,7 @@ def _get_rois_from_boxes(self, boxes, im_shape):
keep = paddle.zeros([1], dtype='int32')
clip_box = paddle.gather(clip_box, keep)
rois.append(clip_box)
- rois_num = paddle.concat([paddle.shape(r)[0] for r in rois])
+ rois_num = paddle.concat([paddle.shape(r)[0:1] for r in rois])
return rois, rois_num
def _get_pred_bbox(self, deltas, proposals, weights):
diff --git a/ppdet/modeling/heads/s2anet_head.py b/ppdet/modeling/heads/s2anet_head.py
index 8abddcff135..99fd13a9a8a 100644
--- a/ppdet/modeling/heads/s2anet_head.py
+++ b/ppdet/modeling/heads/s2anet_head.py
@@ -360,7 +360,7 @@ def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
for i in range(bbox_num.shape[0]):
expand_shape = paddle.expand(origin_shape[i:i + 1, :],
[bbox_num[i], 2])
- scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
+ scale_y, scale_x = scale_factor[i, 0:1], scale_factor[i, 1:2]
scale = paddle.concat([
scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x,
scale_y
diff --git a/ppdet/modeling/heads/tood_head.py b/ppdet/modeling/heads/tood_head.py
index f463ef2397b..be840984f00 100644
--- a/ppdet/modeling/heads/tood_head.py
+++ b/ppdet/modeling/heads/tood_head.py
@@ -86,7 +86,10 @@ def _init_weights(self):
normal_(self.la_conv2.weight, std=0.001)
def forward(self, feat, avg_feat):
- b, _, h, w = get_static_shape(feat)
+ feat_shape = get_static_shape(feat)
+ b = feat_shape[0:1]
+ h = feat_shape[2:3]
+ w = feat_shape[3:4]
weight = F.relu(self.la_conv1(avg_feat))
weight = F.sigmoid(self.la_conv2(weight)).unsqueeze(-1)
feat = paddle.reshape(
@@ -204,7 +207,10 @@ def _init_weights(self):
constant_(self.reg_offset_conv2.bias)
def _reg_grid_sample(self, feat, offset, anchor_points):
- b, _, h, w = get_static_shape(feat)
+ feat_shape = get_static_shape(feat)
+ b = feat_shape[0:1]
+ h = feat_shape[2:3]
+ w = feat_shape[3:4]
feat = paddle.reshape(feat, [-1, 1, h, w])
offset = paddle.reshape(offset, [-1, 2, h, w]).transpose([0, 2, 3, 1])
grid_shape = paddle.concat([w, h]).astype('float32')
diff --git a/ppdet/modeling/proposal_generator/target.py b/ppdet/modeling/proposal_generator/target.py
index f95f906a27f..041b2c7915d 100644
--- a/ppdet/modeling/proposal_generator/target.py
+++ b/ppdet/modeling/proposal_generator/target.py
@@ -237,7 +237,7 @@ def generate_proposal_target(rpn_rois,
tgt_bboxes.append(sampled_bbox)
rois_with_gt.append(rois_per_image)
tgt_gt_inds.append(sampled_gt_ind)
- new_rois_num.append(paddle.shape(sampled_inds)[0])
+ new_rois_num.append(paddle.shape(sampled_inds)[0:1])
new_rois_num = paddle.concat(new_rois_num)
return rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num
@@ -380,7 +380,7 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
mask_index.append(fg_inds)
mask_rois.append(fg_rois)
- mask_rois_num.append(paddle.shape(fg_rois)[0])
+ mask_rois_num.append(paddle.shape(fg_rois)[0:1])
tgt_classes.append(fg_classes)
tgt_masks.append(tgt_mask)
tgt_weights.append(weight)
@@ -672,7 +672,7 @@ def libra_generate_proposal_target(rpn_rois,
rois_with_gt.append(rois_per_image)
sampled_max_overlaps.append(sampled_overlap)
tgt_gt_inds.append(sampled_gt_ind)
- new_rois_num.append(paddle.shape(sampled_inds)[0])
+ new_rois_num.append(paddle.shape(sampled_inds)[0:1])
new_rois_num = paddle.concat(new_rois_num)
# rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num
return rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num
diff --git a/ppdet/modeling/transformers/deformable_transformer.py b/ppdet/modeling/transformers/deformable_transformer.py
index fcb5a0aab1c..ab05704f4e8 100644
--- a/ppdet/modeling/transformers/deformable_transformer.py
+++ b/ppdet/modeling/transformers/deformable_transformer.py
@@ -486,7 +486,10 @@ def forward(self, src_feats, src_mask=None, *args, **kwargs):
spatial_shapes = []
valid_ratios = []
for level, src in enumerate(srcs):
- bs, _, h, w = paddle.shape(src)
+ src_shape = paddle.shape(src)
+ bs = src_shape[0:1]
+ h = src_shape[2:3]
+ w = src_shape[3:4]
spatial_shapes.append(paddle.concat([h, w]))
src = src.flatten(2).transpose([0, 2, 1])
src_flatten.append(src)
From 77333393c2b6c4e7a5a2a9169e54ddd6caa8d6c7 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Thu, 27 Apr 2023 17:18:57 +0800
Subject: [PATCH 072/116] fix typos (#8159)
---
configs/rtdetr/README.md | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index e68533e1acb..92e8ba67a75 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -73,7 +73,8 @@ python tools/eval.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
```shell
python tools/infer.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
- -o weights=https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams
+ -o weights=https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams \
+ --infer_img=./demo/000000570688.jpg
```
详情请参考[快速开始文档](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED.md).
From 4383428f3e9d316622032cadea2eaaa07d360b18 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Thu, 4 May 2023 10:46:26 +0800
Subject: [PATCH 073/116] fix infer layer (#8162)
---
ppdet/modeling/transformers/rtdetr_transformer.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/ppdet/modeling/transformers/rtdetr_transformer.py b/ppdet/modeling/transformers/rtdetr_transformer.py
index 672590edfde..0d4b108a37a 100644
--- a/ppdet/modeling/transformers/rtdetr_transformer.py
+++ b/ppdet/modeling/transformers/rtdetr_transformer.py
@@ -245,6 +245,7 @@ def forward(self,
elif i == self.eval_idx:
dec_out_logits.append(score_head[i](output))
dec_out_bboxes.append(inter_ref_bbox)
+ break
ref_points = inter_ref_bbox
ref_points_detach = inter_ref_bbox.detach(
From 5ef0264712fbf6d5e8b499e82011af19afb80636 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Thu, 4 May 2023 11:16:18 +0800
Subject: [PATCH 074/116] fix hgnetv2 conv lr multi (#8163)
---
ppdet/modeling/backbones/hgnet_v2.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/ppdet/modeling/backbones/hgnet_v2.py b/ppdet/modeling/backbones/hgnet_v2.py
index c4cc0787a23..88f989a2853 100644
--- a/ppdet/modeling/backbones/hgnet_v2.py
+++ b/ppdet/modeling/backbones/hgnet_v2.py
@@ -76,6 +76,7 @@ def __init__(self,
padding=padding
if isinstance(padding, str) else (kernel_size - 1) // 2,
groups=groups,
+ weight_attr=ParamAttr(learning_rate=lr_mult),
bias_attr=False)
self.bn = BatchNorm2D(
out_channels,
From 9ea3ac2dcaeaeb010c806facdb159eb80a9d8d9c Mon Sep 17 00:00:00 2001
From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com>
Date: Fri, 5 May 2023 10:23:24 +0800
Subject: [PATCH 075/116] update npu api (#8179)
---
deploy/python/infer.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index b167b04ce9c..1c7be6373b8 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -865,7 +865,7 @@ def load_predictor(model_dir,
elif device == 'NPU':
if config.lite_engine_enabled():
config.enable_lite_engine()
- config.enable_npu()
+ config.enable_custom_device('npu')
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
From 01f9e9d6c6f75ef054eeb047c9fa13dcce996330 Mon Sep 17 00:00:00 2001
From: zhiboniu <31800336+zhiboniu@users.noreply.github.com>
Date: Sat, 6 May 2023 13:52:13 +0800
Subject: [PATCH 076/116] fix threshold abnormal value (#8180)
---
deploy/pptracking/cpp/include/jde_predictor.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/deploy/pptracking/cpp/include/jde_predictor.h b/deploy/pptracking/cpp/include/jde_predictor.h
index 32f5921061c..72d2360ac8c 100644
--- a/deploy/pptracking/cpp/include/jde_predictor.h
+++ b/deploy/pptracking/cpp/include/jde_predictor.h
@@ -57,6 +57,7 @@ class JDEPredictor {
preprocessor_.Init(config_.preprocess_info_);
LoadModel(model_dir, run_mode);
this->conf_thresh_ = config_.conf_thresh_;
+ this->threshold_ = config_.conf_thresh_;
}
// Load Paddle inference model
From b66308dee68d9edd5dd43bd45e0191f2d9342d23 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Mon, 8 May 2023 14:44:01 +0800
Subject: [PATCH 077/116] add rtdetr m (#8181)
---
configs/rtdetr/README.md | 1 +
configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml | 28 +++++++++++++++++++++++
2 files changed, 29 insertions(+)
create mode 100644 configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index 92e8ba67a75..1a8c0026863 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -18,6 +18,7 @@ RT-DETR是第一个实时端到端目标检测器。具体而言,我们设计
| Model | Epoch | backbone | input shape | $AP^{val}$ | $AP^{val}_{50}$| Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | Pretrained Model | config |
|:--------------:|:-----:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
+| RT-DETR-R50-scaled | 6x | ResNet-50 | 640 | 51.3 | - | - | - | 145 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_m_6x_coco.pdparams) | [config](./rtdetr_r50vd_m_6x_coco.yml)
| RT-DETR-R50 | 6x | ResNet-50 | 640 | 53.1 | 71.3 | 42 | 136 | 108 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams) | [config](./rtdetr_r50vd_6x_coco.yml)
| RT-DETR-R101 | 6x | ResNet-101 | 640 | 54.3 | 72.7 | 76 | 259 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams) | [config](./rtdetr_r101vd_6x_coco.yml)
| RT-DETR-L | 6x | HGNetv2 | 640 | 53.0 | 71.6 | 32 | 110 | 114 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams) | [config](rtdetr_hgnetv2_l_6x_coco.yml)
diff --git a/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml b/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml
new file mode 100644
index 00000000000..d4ab6f9f318
--- /dev/null
+++ b/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml
@@ -0,0 +1,28 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_6x.yml',
+ '_base_/rtdetr_r50vd.yml',
+ '_base_/rtdetr_reader.yml',
+]
+
+weights: output/rtdetr_r50vd_m_6x_coco/model_final
+find_unused_parameters: True
+log_iter: 200
+
+HybridEncoder:
+ hidden_dim: 256
+ use_encoder_idx: [2]
+ num_encoder_layers: 1
+ encoder_layer:
+ name: TransformerLayer
+ d_model: 256
+ nhead: 8
+ dim_feedforward: 1024
+ dropout: 0.
+ activation: 'gelu'
+ expansion: 0.5
+ depth_mult: 1.0
+
+RTDETRTransformer:
+ eval_idx: 2 # use 3th decoder layer to eval
From d0076cb9d012944406a563b822bce1efc0a3c3e1 Mon Sep 17 00:00:00 2001
From: xiaoting <31891223+tink2123@users.noreply.github.com>
Date: Mon, 8 May 2023 17:35:53 +0800
Subject: [PATCH 078/116] update layout config (#8206)
---
.../application/layout_analysis/picodet_lcnet_x1_0_layout.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/configs/picodet/legacy_model/application/layout_analysis/picodet_lcnet_x1_0_layout.yml b/configs/picodet/legacy_model/application/layout_analysis/picodet_lcnet_x1_0_layout.yml
index b4bec58d7a9..db9a8c683ad 100644
--- a/configs/picodet/legacy_model/application/layout_analysis/picodet_lcnet_x1_0_layout.yml
+++ b/configs/picodet/legacy_model/application/layout_analysis/picodet_lcnet_x1_0_layout.yml
@@ -26,14 +26,14 @@ metric: COCO
num_classes: 5
TrainDataset:
- !COCODataSet
+ name: COCODataSet
image_dir: train
anno_path: train.json
dataset_dir: ./dataset/publaynet/
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
- !COCODataSet
+ name: COCODataSet
image_dir: val
anno_path: val.json
dataset_dir: ./dataset/publaynet/
From 6042fcd802c29b22000486514496a34edde86171 Mon Sep 17 00:00:00 2001
From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com>
Date: Mon, 8 May 2023 19:42:53 +0800
Subject: [PATCH 079/116] [npu-tipc] fix npu tipc (#8196)
* add npu inference support
* change aligned=false for npu
* fix typo
---
deploy/pipeline/pipeline.py | 4 +--
deploy/pipeline/pphuman/action_infer.py | 10 +++---
deploy/pipeline/pphuman/attr_infer.py | 6 ++--
deploy/pipeline/pphuman/reid.py | 2 +-
deploy/pipeline/pphuman/video_action_infer.py | 10 ++++--
deploy/pipeline/ppvehicle/vehicle_attr.py | 6 ++--
deploy/pipeline/ppvehicle/vehicle_plate.py | 4 +--
deploy/pptracking/python/det_infer.py | 11 ++++--
deploy/python/keypoint_infer.py | 6 ++--
deploy/python/mot_centertrack_infer.py | 34 ++++++++-----------
deploy/python/mot_jde_infer.py | 6 ++--
deploy/python/mot_keypoint_unite_infer.py | 4 +--
deploy/python/mot_keypoint_unite_utils.py | 2 +-
deploy/python/mot_sde_infer.py | 6 ++--
deploy/python/utils.py | 2 +-
test_tipc/test_train_inference_python_npu.sh | 7 ++--
16 files changed, 63 insertions(+), 57 deletions(-)
diff --git a/deploy/pipeline/pipeline.py b/deploy/pipeline/pipeline.py
index 76aebe341ab..eb82a4f5aba 100644
--- a/deploy/pipeline/pipeline.py
+++ b/deploy/pipeline/pipeline.py
@@ -1315,7 +1315,7 @@ def main():
parser = argsparser()
FLAGS = parser.parse_args()
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, XPU or NPU"
main()
diff --git a/deploy/pipeline/pphuman/action_infer.py b/deploy/pipeline/pphuman/action_infer.py
index 45c04ad5198..731d560ff3a 100644
--- a/deploy/pipeline/pphuman/action_infer.py
+++ b/deploy/pipeline/pphuman/action_infer.py
@@ -41,7 +41,7 @@ class SkeletonActionRecognizer(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -285,7 +285,7 @@ class DetActionRecognizer(object):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -454,7 +454,7 @@ class ClsActionRecognizer(AttrDetector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -684,8 +684,8 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
diff --git a/deploy/pipeline/pphuman/attr_infer.py b/deploy/pipeline/pphuman/attr_infer.py
index bf9e80bec40..cd970c447b9 100644
--- a/deploy/pipeline/pphuman/attr_infer.py
+++ b/deploy/pipeline/pphuman/attr_infer.py
@@ -42,7 +42,7 @@ class AttrDetector(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -341,8 +341,8 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, XPU or NPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
diff --git a/deploy/pipeline/pphuman/reid.py b/deploy/pipeline/pphuman/reid.py
index 21b725ce4c7..ad40a8e9fcb 100644
--- a/deploy/pipeline/pphuman/reid.py
+++ b/deploy/pipeline/pphuman/reid.py
@@ -32,7 +32,7 @@ class ReID(object):
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of per batch in inference, default 50 means at most
50 sub images can be made a batch and send into ReID model
diff --git a/deploy/pipeline/pphuman/video_action_infer.py b/deploy/pipeline/pphuman/video_action_infer.py
index 6a10355f385..3683f3cc5bd 100644
--- a/deploy/pipeline/pphuman/video_action_infer.py
+++ b/deploy/pipeline/pphuman/video_action_infer.py
@@ -47,7 +47,7 @@ class VideoActionRecognizer(object):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -105,6 +105,10 @@ def __init__(self,
if device == "GPU" or device == "gpu":
self.config.enable_use_gpu(8000, 0)
+ elif device == "XPU" or device == "xpu":
+ self.config.enable_xpu(10 * 1024 * 1024)
+ elif device == "NPU" or device == "npu":
+ self.config.enable_custom_device('npu')
else:
self.config.disable_gpu()
if self.enable_mkldnn:
@@ -308,7 +312,7 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, XPU or NPU"
main()
diff --git a/deploy/pipeline/ppvehicle/vehicle_attr.py b/deploy/pipeline/ppvehicle/vehicle_attr.py
index eb1b9423b64..3cc28e22440 100644
--- a/deploy/pipeline/ppvehicle/vehicle_attr.py
+++ b/deploy/pipeline/ppvehicle/vehicle_attr.py
@@ -41,7 +41,7 @@ class VehicleAttr(AttrDetector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -143,8 +143,8 @@ def postprocess(self, inputs, result):
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
diff --git a/deploy/pipeline/ppvehicle/vehicle_plate.py b/deploy/pipeline/ppvehicle/vehicle_plate.py
index 01f260e7f18..c43b7ffa18e 100644
--- a/deploy/pipeline/ppvehicle/vehicle_plate.py
+++ b/deploy/pipeline/ppvehicle/vehicle_plate.py
@@ -325,7 +325,7 @@ def main():
parser = argsparser()
FLAGS = parser.parse_args()
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
main()
diff --git a/deploy/pptracking/python/det_infer.py b/deploy/pptracking/python/det_infer.py
index 3dec3e6d635..bff95526aaa 100644
--- a/deploy/pptracking/python/det_infer.py
+++ b/deploy/pptracking/python/det_infer.py
@@ -70,7 +70,7 @@ class Detector(object):
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -400,7 +400,7 @@ def load_predictor(model_dir,
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8)
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
@@ -432,8 +432,13 @@ def load_predictor(model_dir,
# optimize graph and fuse op
config.switch_ir_optim(True)
elif device == 'XPU':
- config.enable_lite_engine()
+ if config.lite_engine_enabled():
+ config.enable_lite_engine()
config.enable_xpu(10 * 1024 * 1024)
+ elif device == 'NPU':
+ if config.lite_engine_enabled():
+ config.enable_lite_engine()
+ config.enable_custom_device('npu')
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py
index 52e12fda74f..03695f10ebf 100644
--- a/deploy/python/keypoint_infer.py
+++ b/deploy/python/keypoint_infer.py
@@ -50,7 +50,7 @@ class KeyPointDetector(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -408,8 +408,8 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, XPU or NPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()
diff --git a/deploy/python/mot_centertrack_infer.py b/deploy/python/mot_centertrack_infer.py
index c04a96876ae..3442ef5347a 100644
--- a/deploy/python/mot_centertrack_infer.py
+++ b/deploy/python/mot_centertrack_infer.py
@@ -65,7 +65,7 @@ class CenterTrack(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -130,7 +130,7 @@ def __init__(
vertical_ratio=vertical_ratio,
track_thresh=track_thresh,
pre_thresh=pre_thresh)
-
+
self.pre_image = None
def get_additional_inputs(self, dets, meta, with_hm=True):
@@ -173,11 +173,10 @@ def preprocess(self, image_list):
#inputs = create_inputs(im, im_info)
inputs = {}
inputs['image'] = np.array((im, )).astype('float32')
- inputs['im_shape'] = np.array(
- (im_info['im_shape'], )).astype('float32')
+ inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32')
inputs['scale_factor'] = np.array(
(im_info['scale_factor'], )).astype('float32')
-
+
inputs['trans_input'] = im_info['trans_input']
inputs['inp_width'] = im_info['inp_width']
inputs['inp_height'] = im_info['inp_height']
@@ -185,7 +184,7 @@ def preprocess(self, image_list):
inputs['scale'] = im_info['scale']
inputs['out_height'] = im_info['out_height']
inputs['out_width'] = im_info['out_width']
-
+
if self.pre_image is None:
self.pre_image = inputs['image']
# initializing tracker for the first frame
@@ -196,7 +195,7 @@ def preprocess(self, image_list):
# render input heatmap from tracker status
pre_hm = self.get_additional_inputs(
self.tracker.tracks, inputs, with_hm=True)
- inputs['pre_hm'] = pre_hm #.to_tensor(pre_hm)
+ inputs['pre_hm'] = pre_hm #.to_tensor(pre_hm)
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
@@ -256,8 +255,8 @@ def centertrack_post_process(self, dets, meta, out_thresh):
return preds
def tracking(self, inputs, det_results):
- result = self.centertrack_post_process(
- det_results, inputs, self.tracker.out_thresh)
+ result = self.centertrack_post_process(det_results, inputs,
+ self.tracker.out_thresh)
online_targets = self.tracker.update(result)
online_tlwhs, online_scores, online_ids = [], [], []
@@ -292,10 +291,7 @@ def predict(self, repeats=1):
tracking_tensor = self.predictor.get_output_handle(output_names[2])
np_tracking = tracking_tensor.copy_to_cpu()
- result = dict(
- bboxes=np_bboxes,
- cts=np_cts,
- tracking=np_tracking)
+ result = dict(bboxes=np_bboxes, cts=np_cts, tracking=np_tracking)
return result
def predict_image(self,
@@ -333,8 +329,8 @@ def predict_image(self,
# tracking
result_warmup = self.tracking(inputs, det_result)
self.det_times.tracking_time_s.start()
- online_tlwhs, online_scores, online_ids = self.tracking(inputs,
- det_result)
+ online_tlwhs, online_scores, online_ids = self.tracking(
+ inputs, det_result)
self.det_times.tracking_time_s.end()
self.det_times.img_num += 1
@@ -358,8 +354,8 @@ def predict_image(self,
# tracking process
self.det_times.tracking_time_s.start()
- online_tlwhs, online_scores, online_ids = self.tracking(inputs,
- det_result)
+ online_tlwhs, online_scores, online_ids = self.tracking(
+ inputs, det_result)
self.det_times.tracking_time_s.end()
self.det_times.img_num += 1
@@ -499,7 +495,7 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
main()
diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py
index 51a2562ee55..793d5271bf0 100644
--- a/deploy/python/mot_jde_infer.py
+++ b/deploy/python/mot_jde_infer.py
@@ -45,7 +45,7 @@ class JDE_Detector(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -375,7 +375,7 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
main()
diff --git a/deploy/python/mot_keypoint_unite_infer.py b/deploy/python/mot_keypoint_unite_infer.py
index edf394152c2..d69622b1a67 100644
--- a/deploy/python/mot_keypoint_unite_infer.py
+++ b/deploy/python/mot_keypoint_unite_infer.py
@@ -295,7 +295,7 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
main()
diff --git a/deploy/python/mot_keypoint_unite_utils.py b/deploy/python/mot_keypoint_unite_utils.py
index 246f46fe95d..48bc86e6fce 100644
--- a/deploy/python/mot_keypoint_unite_utils.py
+++ b/deploy/python/mot_keypoint_unite_utils.py
@@ -78,7 +78,7 @@ def argsparser():
"--device",
type=str,
default='cpu',
- help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+ help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
)
parser.add_argument(
"--run_benchmark",
diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py
index b4a487facdd..acfc940d50c 100644
--- a/deploy/python/mot_sde_infer.py
+++ b/deploy/python/mot_sde_infer.py
@@ -40,7 +40,7 @@ class SDE_Detector(Detector):
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
tracker_config (str): tracker config path
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -516,7 +516,7 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
main()
diff --git a/deploy/python/utils.py b/deploy/python/utils.py
index 7fc8148b3a0..b05a5d03dfe 100644
--- a/deploy/python/utils.py
+++ b/deploy/python/utils.py
@@ -64,7 +64,7 @@ def argsparser():
"--device",
type=str,
default='cpu',
- help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+ help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
)
parser.add_argument(
"--use_gpu",
diff --git a/test_tipc/test_train_inference_python_npu.sh b/test_tipc/test_train_inference_python_npu.sh
index 5b51ac7ac36..9ca6ee3a748 100644
--- a/test_tipc/test_train_inference_python_npu.sh
+++ b/test_tipc/test_train_inference_python_npu.sh
@@ -49,8 +49,8 @@ grep -n '.yml' $FILENAME | cut -d ":" -f 1 \
| while read line_num ; do
train_cmd=$(func_parser_value "${lines[line_num-1]}")
trainer_config=$(func_parser_config ${train_cmd})
- echo ${trainer_config}
sed -i 's/use_gpu/use_npu/g' "$REPO_ROOT_PATH/$trainer_config"
+ sed -i 's/aligned: True/aligned: False/g' "$REPO_ROOT_PATH/$trainer_config"
# fine use_gpu in those included yaml
sub_datalinee=`cat $REPO_ROOT_PATH/$trainer_config`
IFS=$'\n'
@@ -60,9 +60,10 @@ grep -n '.yml' $FILENAME | cut -d ":" -f 1 \
sub_config=${sub_lines[sub_line_num-1]}
dst=${#sub_config}-5
sub_path=$(func_parser_dir "${trainer_config}")
- sub_config_path="${REPO_ROOT_PATH}${sub_path}/${sub_config:3:${dst}}"
- echo ${sub_config_path}
+ sub_config_name=$(echo "$sub_config" | awk -F"'" '{ print $2 }')
+ sub_config_path="${REPO_ROOT_PATH}${sub_path}/${sub_config_name}"
sed -i 's/use_gpu/use_npu/g' "$sub_config_path"
+ sed -i 's/aligned: True/aligned: False/g' "$sub_config_path"
done
done
# pass parameters to test_train_inference_python.sh
From 73291012622c0f0845912d77850cdd9e39ed88c5 Mon Sep 17 00:00:00 2001
From: xiaoluomi <49263480+xiaoluomi@users.noreply.github.com>
Date: Tue, 9 May 2023 10:34:06 +0800
Subject: [PATCH 080/116] add rtdetr act demo (#8211)
* add rtdetr act demo
* add act rtdetr
---
deploy/auto_compression/README.md | 21 ++++++++++++
.../configs/rtdetr_hgnetv2_l_qat_dis.yaml | 32 +++++++++++++++++++
.../configs/rtdetr_hgnetv2_x_qat_dis.yaml | 32 +++++++++++++++++++
.../configs/rtdetr_r101vd_qat_dis.yaml | 32 +++++++++++++++++++
.../configs/rtdetr_r50vd_qat_dis.yaml | 32 +++++++++++++++++++
.../configs/rtdetr_reader.yml | 26 +++++++++++++++
6 files changed, 175 insertions(+)
create mode 100644 deploy/auto_compression/configs/rtdetr_hgnetv2_l_qat_dis.yaml
create mode 100644 deploy/auto_compression/configs/rtdetr_hgnetv2_x_qat_dis.yaml
create mode 100644 deploy/auto_compression/configs/rtdetr_r101vd_qat_dis.yaml
create mode 100644 deploy/auto_compression/configs/rtdetr_r50vd_qat_dis.yaml
create mode 100644 deploy/auto_compression/configs/rtdetr_reader.yml
diff --git a/deploy/auto_compression/README.md b/deploy/auto_compression/README.md
index 7b50430693b..11e50138663 100644
--- a/deploy/auto_compression/README.md
+++ b/deploy/auto_compression/README.md
@@ -60,6 +60,27 @@ git
- mAP的指标均在COCO val2017数据集中评测得到,IoU=0.5:0.95。
+### RT-DETR
+
+| 模型 | Base mAP | ACT量化mAP | TRT-FP32 | TRT-FP16 | TRT-INT8 | 配置文件 | 量化模型 |
+| :---------------- | :------- | :--------: | :------: | :------: | :--------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| RT-DETR-R50 | 53.1 | 53.0 | 32.05ms | 9.12ms | **6.96ms** | [config](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/detection/configs/rtdetr_r50vd_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/rtdetr_r50vd_6x_coco_quant.tar) |
+| RT-DETR-R101 | 54.3 | 54.1 | 54.13ms | 12.68ms | **9.20ms** | [config](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/detection/configs/rtdetr_r101vd_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/rtdetr_r101vd_6x_coco_quant.tar) |
+| RT-DETR-HGNetv2-L | 53.0 | 52.9 | 26.16ms | 8.54ms | **6.65ms** | [config](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/detection/configs/rtdetr_hgnetv2_l_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/rtdetr_hgnetv2_l_6x_coco_quant.tar) |
+| RT-DETR-HGNetv2-X | 54.8 | 54.6 | 49.22ms | 12.50ms | **9.24ms** | [config](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/detection/configs/rtdetr_hgnetv2_x_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/rtdetr_hgnetv2_x_6x_coco_quant.tar) |
+
+- 上表测试环境:Tesla T4,TensorRT 8.6.0,CUDA 11.7,batch_size=1。
+
+| 模型 | Base mAP | ACT量化mAP | TRT-FP32 | TRT-FP16 | TRT-INT8 | 配置文件 | 量化模型 |
+| :---------------- | :------- | :--------: | :------: | :------: | :--------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
+| RT-DETR-R50 | 53.1 | 53.0 | 9.64ms | 5.00ms | **3.99ms** | [config](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/detection/configs/rtdetr_r50vd_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/rtdetr_r50vd_6x_coco_quant.tar) |
+| RT-DETR-R101 | 54.3 | 54.1 | 14.93ms | 7.15ms | **5.12ms** | [config](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/detection/configs/rtdetr_r101vd_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/rtdetr_r101vd_6x_coco_quant.tar) |
+| RT-DETR-HGNetv2-L | 53.0 | 52.9 | 8.17ms | 4.77ms | **4.00ms** | [config](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/detection/configs/rtdetr_hgnetv2_l_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/rtdetr_hgnetv2_l_6x_coco_quant.tar) |
+| RT-DETR-HGNetv2-X | 54.8 | 54.6 | 12.81ms | 6.97ms | **5.32ms** | [config](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/detection/configs/rtdetr_hgnetv2_x_qat_dis.yaml) | [Model](https://bj.bcebos.com/v1/paddle-slim-models/act/rtdetr_hgnetv2_x_6x_coco_quant.tar) |
+
+- 上表测试环境:A10,TensorRT 8.6.0,CUDA 11.6,batch_size=1。
+- mAP的指标均在COCO val2017数据集中评测得到,IoU=0.5:0.95。
+
## 3. 自动压缩流程
#### 3.1 准备环境
diff --git a/deploy/auto_compression/configs/rtdetr_hgnetv2_l_qat_dis.yaml b/deploy/auto_compression/configs/rtdetr_hgnetv2_l_qat_dis.yaml
new file mode 100644
index 00000000000..83ebd78b1a3
--- /dev/null
+++ b/deploy/auto_compression/configs/rtdetr_hgnetv2_l_qat_dis.yaml
@@ -0,0 +1,32 @@
+
+Global:
+ reader_config: configs/rtdetr_reader.yml
+ include_nms: True
+ Evaluation: True
+ model_dir: ./rtdetr_hgnetv2_l_6x_coco/
+ model_filename: model.pdmodel
+ params_filename: model.pdiparams
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+QuantAware:
+ onnx_format: true
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+ - matmul_v2
+
+TrainConfig:
+ train_iter: 200
+ eval_iter: 50
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 10000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 4.0e-05
diff --git a/deploy/auto_compression/configs/rtdetr_hgnetv2_x_qat_dis.yaml b/deploy/auto_compression/configs/rtdetr_hgnetv2_x_qat_dis.yaml
new file mode 100644
index 00000000000..c4e2889bbd3
--- /dev/null
+++ b/deploy/auto_compression/configs/rtdetr_hgnetv2_x_qat_dis.yaml
@@ -0,0 +1,32 @@
+
+Global:
+ reader_config: configs/rtdetr_reader.yml
+ include_nms: True
+ Evaluation: True
+ model_dir: ./rtdetr_r50vd_6x_coco/
+ model_filename: model.pdmodel
+ params_filename: model.pdiparams
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+QuantAware:
+ onnx_format: true
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+ - matmul_v2
+
+TrainConfig:
+ train_iter: 500
+ eval_iter: 100
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 10000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 4.0e-05
diff --git a/deploy/auto_compression/configs/rtdetr_r101vd_qat_dis.yaml b/deploy/auto_compression/configs/rtdetr_r101vd_qat_dis.yaml
new file mode 100644
index 00000000000..bd96d085e8a
--- /dev/null
+++ b/deploy/auto_compression/configs/rtdetr_r101vd_qat_dis.yaml
@@ -0,0 +1,32 @@
+
+Global:
+ reader_config: configs/rtdetr_reader.yml
+ include_nms: True
+ Evaluation: True
+ model_dir: ./rtdetr_hgnetv2_x_6x_coco/
+ model_filename: model.pdmodel
+ params_filename: model.pdiparams
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+QuantAware:
+ onnx_format: true
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+ - matmul_v2
+
+TrainConfig:
+ train_iter: 200
+ eval_iter: 50
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 10000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 4.0e-05
diff --git a/deploy/auto_compression/configs/rtdetr_r50vd_qat_dis.yaml b/deploy/auto_compression/configs/rtdetr_r50vd_qat_dis.yaml
new file mode 100644
index 00000000000..c4e2889bbd3
--- /dev/null
+++ b/deploy/auto_compression/configs/rtdetr_r50vd_qat_dis.yaml
@@ -0,0 +1,32 @@
+
+Global:
+ reader_config: configs/rtdetr_reader.yml
+ include_nms: True
+ Evaluation: True
+ model_dir: ./rtdetr_r50vd_6x_coco/
+ model_filename: model.pdmodel
+ params_filename: model.pdiparams
+
+Distillation:
+ alpha: 1.0
+ loss: soft_label
+
+QuantAware:
+ onnx_format: true
+ activation_quantize_type: 'moving_average_abs_max'
+ quantize_op_types:
+ - conv2d
+ - depthwise_conv2d
+ - matmul_v2
+
+TrainConfig:
+ train_iter: 500
+ eval_iter: 100
+ learning_rate:
+ type: CosineAnnealingDecay
+ learning_rate: 0.00003
+ T_max: 10000
+ optimizer_builder:
+ optimizer:
+ type: SGD
+ weight_decay: 4.0e-05
diff --git a/deploy/auto_compression/configs/rtdetr_reader.yml b/deploy/auto_compression/configs/rtdetr_reader.yml
new file mode 100644
index 00000000000..7b213ffa202
--- /dev/null
+++ b/deploy/auto_compression/configs/rtdetr_reader.yml
@@ -0,0 +1,26 @@
+metric: COCO
+num_classes: 80
+
+# Datset configuration
+TrainDataset:
+ !COCODataSet
+ image_dir: train2017
+ anno_path: annotations/instances_train2017.json
+ dataset_dir: dataset/coco/
+ !COCODataSet
+ image_dir: val2017
+ anno_path: annotations/instances_val2017.json
+ dataset_dir: dataset/coco/
+
+worker_num: 0
+
+# preprocess reader in test
+EvalReader:
+ sample_transforms:
+ - Decode: {}
+ - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+ - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+ - Permute: {}
+ batch_size: 1
+ shuffle: false
+ drop_last: false
From 597ee656bd2f8ab4da5e5d0f7b1dbd1d3dd37ff0 Mon Sep 17 00:00:00 2001
From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com>
Date: Tue, 9 May 2023 10:43:18 +0800
Subject: [PATCH 081/116] fix npu tipc script (#8213)
---
deploy/pptracking/python/mot_jde_infer.py | 6 +++---
deploy/pptracking/python/mot_sde_infer.py | 6 +++---
deploy/pptracking/python/mot_utils.py | 2 +-
test_tipc/test_train_inference_python_npu.sh | 2 ++
4 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/deploy/pptracking/python/mot_jde_infer.py b/deploy/pptracking/python/mot_jde_infer.py
index e3a9958f7a7..d381a211fca 100644
--- a/deploy/pptracking/python/mot_jde_infer.py
+++ b/deploy/pptracking/python/mot_jde_infer.py
@@ -45,7 +45,7 @@ class JDE_Detector(Detector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -502,7 +502,7 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
main()
diff --git a/deploy/pptracking/python/mot_sde_infer.py b/deploy/pptracking/python/mot_sde_infer.py
index 499ee2c2dfe..5a2693eecaa 100644
--- a/deploy/pptracking/python/mot_sde_infer.py
+++ b/deploy/pptracking/python/mot_sde_infer.py
@@ -46,7 +46,7 @@ class SDE_Detector(Detector):
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
tracker_config (str): tracker config path
- device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+ device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
@@ -946,7 +946,7 @@ def main():
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
- assert FLAGS.device in ['CPU', 'GPU', 'XPU'
- ], "device should be CPU, GPU or XPU"
+ assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+ ], "device should be CPU, GPU, NPU or XPU"
main()
diff --git a/deploy/pptracking/python/mot_utils.py b/deploy/pptracking/python/mot_utils.py
index 9d7b18f921d..055d3817a2f 100644
--- a/deploy/pptracking/python/mot_utils.py
+++ b/deploy/pptracking/python/mot_utils.py
@@ -64,7 +64,7 @@ def argsparser():
"--device",
type=str,
default='cpu',
- help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+ help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
)
parser.add_argument(
"--use_gpu",
diff --git a/test_tipc/test_train_inference_python_npu.sh b/test_tipc/test_train_inference_python_npu.sh
index 9ca6ee3a748..e759738d359 100644
--- a/test_tipc/test_train_inference_python_npu.sh
+++ b/test_tipc/test_train_inference_python_npu.sh
@@ -33,6 +33,8 @@ FILENAME=$1
# change gpu to npu in tipc txt configs
sed -i "s/use_gpu:True/use_npu:True/g" $FILENAME
sed -i "s/--device:gpu|cpu/--device:npu|cpu/g" $FILENAME
+ sed -i "s/--device:gpu/--device:npu/g" $FILENAME
+ sed -i "s/--device:cpu|gpu/--device:cpu|npu/g" $FILENAME
sed -i "s/trainer:pact_train/trainer:norm_train/g" $FILENAME
sed -i "s/trainer:fpgm_train/trainer:norm_train/g" $FILENAME
sed -i "s/--slim_config _template_pact/ /g" $FILENAME
From ae2ff473d1b55014c75cb3aadad5416359a59e3b Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Tue, 9 May 2023 17:26:56 +0800
Subject: [PATCH 082/116] update rtdetr doc (#8207)
* update rtdetr doc
---
configs/rtdetr/README.md | 56 ++++++++++++++++++++++++++++++++++------
1 file changed, 48 insertions(+), 8 deletions(-)
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index 1a8c0026863..eb208af4163 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -2,8 +2,9 @@
## 最新动态
-- 发布RT-DETR-R50和RT-DETR-R101的代码和预训练模型。
-- 发布RT-DETR-L和RT-DETR-X的代码和预训练模型。
+- 发布RT-DETR-R50和RT-DETR-R101的代码和预训练模型
+- 发布RT-DETR-L和RT-DETR-X的代码和预训练模型
+- 发布RT-DETR-R50-Scaled伸缩更小的范例模型
## 简介
@@ -11,18 +12,19 @@ RT-DETR是第一个实时端到端目标检测器。具体而言,我们设计
若要了解更多细节,请参考我们的论文[paper](https://arxiv.org/abs/2304.08069).
-

+
## 模型
| Model | Epoch | backbone | input shape | $AP^{val}$ | $AP^{val}_{50}$| Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | Pretrained Model | config |
|:--------------:|:-----:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
-| RT-DETR-R50-scaled | 6x | ResNet-50 | 640 | 51.3 | - | - | - | 145 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_m_6x_coco.pdparams) | [config](./rtdetr_r50vd_m_6x_coco.yml)
| RT-DETR-R50 | 6x | ResNet-50 | 640 | 53.1 | 71.3 | 42 | 136 | 108 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams) | [config](./rtdetr_r50vd_6x_coco.yml)
| RT-DETR-R101 | 6x | ResNet-101 | 640 | 54.3 | 72.7 | 76 | 259 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams) | [config](./rtdetr_r101vd_6x_coco.yml)
| RT-DETR-L | 6x | HGNetv2 | 640 | 53.0 | 71.6 | 32 | 110 | 114 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams) | [config](rtdetr_hgnetv2_l_6x_coco.yml)
| RT-DETR-X | 6x | HGNetv2 | 640 | 54.8 | 73.1 | 67 | 234 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_x_6x_coco.pdparams) | [config](rtdetr_hgnetv2_x_6x_coco.yml)
+| RT-DETR-R50-Scaled | 6x | ResNet-50 | 640 | 51.3 | 69.6 | 35 | 100 | 145 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_m_6x_coco.pdparams) | [config](./rtdetr_r50vd_m_6x_coco.yml)
+
**注意事项:**
- RT-DETR 使用4个GPU训练。
@@ -84,8 +86,6 @@ python tools/infer.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml \
## 部署
-### 导出及转换模型
-
1. 导出模型
@@ -117,11 +117,15 @@ paddle2onnx --model_dir=./output_inference/rtdetr_r50vd_6x_coco/ \
--opset_version 16 \
--save_file rtdetr_r50vd_6x_coco.onnx
```
+
-- 转换成TensorRT(可选):
+
+3. 转换成TensorRT(可选)
+
+- 确保TensorRT的版本>=8.5.1
+- 推理可以[参考例子的部分代码](https://github.com/lyuwenyu/AI/tree/master/rt)或者其他网络资源
```shell
-# 保证TensorRT的版本>=8.5.1
trtexec --onnx=./rtdetr_r50vd_6x_coco.onnx \
--workspace=4096 \
--shapes=image:1x3x640x640 \
@@ -130,8 +134,44 @@ trtexec --onnx=./rtdetr_r50vd_6x_coco.onnx \
--fp16
```
+-
+
+
+
+## 其他
+
+
+1. 参数量和计算量统计
+可以使用以下代码片段实现参数量和计算量的统计
+
+```
+import paddle
+from ppdet.core.workspace import load_config, merge_config
+from ppdet.core.workspace import create
+
+cfg_path = './configs/dino/ppdetr_r50_3x_coco.yml'
+cfg = load_config(cfg_path)
+model = create(cfg.architecture)
+
+blob = {
+ 'image': paddle.randn([1, 3, 640, 640]),
+ 'im_shape': paddle.to_tensor([[640], [640]]),
+ 'scale_factor': paddle.to_tensor([[1.], [1.]])
+}
+paddle.flops(model, None, blob, custom_ops=None, print_detail=False)
+```
+
+
+
+
+2. YOLOs端到端速度测速
+
+- 可以[参考的部分代码](https://github.com/lyuwenyu/AI/tree/master/rt)或者其他网络资源
+
+
+
## 引用RT-DETR
如果需要在你的研究中使用RT-DETR,请通过以下方式引用我们的论文:
```
From ac9cc8ec8ef5317e98b5ed7dc1960e136a138aa8 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Tue, 9 May 2023 18:55:53 +0800
Subject: [PATCH 083/116] update doc, test=document_fix (#8220)
---
configs/rtdetr/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index eb208af4163..6af6645e51b 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -149,7 +149,7 @@ import paddle
from ppdet.core.workspace import load_config, merge_config
from ppdet.core.workspace import create
-cfg_path = './configs/dino/ppdetr_r50_3x_coco.yml'
+cfg_path = './configs/rtdetr/rtdetr_r50vd_6x_coco.yml'
cfg = load_config(cfg_path)
model = create(cfg.architecture)
From 13bed646036f38dd81273b0a93b43508cf14d3f7 Mon Sep 17 00:00:00 2001
From: wangguanzhong
Date: Wed, 10 May 2023 11:57:50 +0800
Subject: [PATCH 084/116] update contribution, test=document_fix (#8218)
---
docs/contribution/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/contribution/README.md b/docs/contribution/README.md
index e8adc2a4847..dc285947009 100644
--- a/docs/contribution/README.md
+++ b/docs/contribution/README.md
@@ -28,6 +28,6 @@ PaddleDetection非常欢迎你加入到飞桨社区的开源建设中,你可
- 感谢[yangyudong](https://github.com/yangyudong2020), [hchhtc123](https://github.com/hchhtc123) 开发PP-Tracking GUI界面
- 感谢Shigure19 开发PP-TinyPose健身APP
- 感谢[manangoel99](https://github.com/manangoel99)贡献Wandb可视化方式
-
+- 感谢百度ACG政务产品部统管通办研发组视觉研发团队贡献PP-YOLOE蒸馏方案
非常感谢大家为飞桨贡献!共建飞桨繁荣社区!
From ec37e66685f3bc5a38cd13f60685acea175922e1 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Wed, 10 May 2023 19:04:20 +0800
Subject: [PATCH 085/116] update, test=document_fix (#8228)
---
configs/rtdetr/README.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index 6af6645e51b..a07d0d8f8ea 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -123,7 +123,7 @@ paddle2onnx --model_dir=./output_inference/rtdetr_r50vd_6x_coco/ \
3. 转换成TensorRT(可选)
- 确保TensorRT的版本>=8.5.1
-- 推理可以[参考例子的部分代码](https://github.com/lyuwenyu/AI/tree/master/rt)或者其他网络资源
+- TRT推理可以参考[RT-DETR](https://github.com/lyuwenyu/RT-DETR)的部分代码或者其他网络资源
```shell
trtexec --onnx=./rtdetr_r50vd_6x_coco.onnx \
@@ -163,10 +163,10 @@ paddle.flops(model, None, blob, custom_ops=None, print_detail=False)
-
+
2. YOLOs端到端速度测速
-- 可以[参考的部分代码](https://github.com/lyuwenyu/AI/tree/master/rt)或者其他网络资源
+- 可以参考[RT-DETR](https://github.com/lyuwenyu/RT-DETR) benchmark部分或者其他网络资源
From 33d8697f1fc5d70c5dadcdccf895735a3b59b2b1 Mon Sep 17 00:00:00 2001
From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com>
Date: Wed, 10 May 2023 19:11:53 +0800
Subject: [PATCH 086/116] [npu-tipc] Minimize coco datasets for npu tipc
(#8226)
* change coco dataset for npu tipc
* update link
---
test_tipc/test_train_inference_python_npu.sh | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/test_tipc/test_train_inference_python_npu.sh b/test_tipc/test_train_inference_python_npu.sh
index e759738d359..13fa7114380 100644
--- a/test_tipc/test_train_inference_python_npu.sh
+++ b/test_tipc/test_train_inference_python_npu.sh
@@ -68,6 +68,22 @@ grep -n '.yml' $FILENAME | cut -d ":" -f 1 \
sed -i 's/aligned: True/aligned: False/g' "$sub_config_path"
done
done
+
+
+# NPU lacks operators such as deformable_conv, depthwise_conv2d_transpose,
+# which will affects ips. Here, we reduce the number of coco training sets
+# for npu tipc bencnmark. This is a temporary hack.
+# # TODO(duanyanhui): add vision ops for npu
+train_img_num=`cat $REPO_ROOT_PATH/dataset/coco/annotations/instances_train2017.json | grep -o file_name | wc -l`
+exp_num=8
+if [ ${train_img_num} != ${exp_num} ];then
+ echo "Replace with npu tipc coco training annotations"
+ mv $REPO_ROOT_PATH/dataset/coco/annotations/instances_train2017.json $REPO_ROOT_PATH/dataset/coco/annotations/instances_train2017_bak.json
+ wget https://paddle-device.bj.bcebos.com/tipc/instances_train2017.json
+ mv instances_train2017.json $REPO_ROOT_PATH/dataset/coco/annotations/
+ rm -f instances_train2017.json
+fi
+
# pass parameters to test_train_inference_python.sh
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $2"
echo $cmd
From 2fedefa3f2211fe04f7bf69d412124a09eacba3c Mon Sep 17 00:00:00 2001
From: zhoujun <572459439@qq.com>
Date: Mon, 15 May 2023 16:09:28 +0800
Subject: [PATCH 087/116] add rt-detr, picodet-lcnet1x, picodet-lcnet2.5x to
tipc (#8202)
* add ppyoloe and picodet config
* add config
* update ppyoloe config
* update picodet lcnet 2.5x
* update picodet lcnet tipc
* update picodet lcnet2.5x
* make picodet stable
* update batchsize
* update ppyoloe tipc batchsize
* update detr config
* update dino config
* update ppyoloe config
---------
Co-authored-by: user3984 <2287245853@qq.com>
---
.../picodet_lcnet_1_0x_416_coco.yml | 26 ++++++++
.../picodet_lcnet_2_5x_416_coco.yml | 26 ++++++++
ppdet/modeling/heads/simota_head.py | 8 +--
..._r50_4scale_1x_coco_train_infer_python.txt | 2 +-
...lcnet_1_0x_416_coco_train_infer_python.txt | 60 ++++++++++++++++++
...lcnet_2_5x_416_coco_train_infer_python.txt | 60 ++++++++++++++++++
.../picodet_s_320_coco_train_infer_python.txt | 2 +-
...plus_crn_l_80e_coco_train_infer_python.txt | 62 +++++++++++++++++++
...r_hgnetv2_l_6x_coco_train_infer_python.txt | 58 +++++++++++++++++
9 files changed, 298 insertions(+), 6 deletions(-)
create mode 100644 configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml
create mode 100644 configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml
create mode 100644 test_tipc/configs/picodet/picodet_lcnet_1_0x_416_coco_train_infer_python.txt
create mode 100644 test_tipc/configs/picodet/picodet_lcnet_2_5x_416_coco_train_infer_python.txt
create mode 100644 test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_l_80e_coco_train_infer_python.txt
create mode 100644 test_tipc/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco_train_infer_python.txt
diff --git a/configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml b/configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml
new file mode 100644
index 00000000000..bf4fb1cac9d
--- /dev/null
+++ b/configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+ '../../../datasets/coco_detection.yml',
+ '../../../runtime.yml',
+ '../_base_/picodet_esnet.yml',
+ '../_base_/optimizer_300e.yml',
+ '../_base_/picodet_416_reader.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/LCNet_x1_0_pretrained.pdparams
+weights: output/picodet_lcnet_1_5x_416_coco/model_final
+find_unused_parameters: True
+use_ema: true
+cycle_epoch: 40
+snapshot_epoch: 10
+
+PicoDet:
+ backbone: LCNet
+ neck: CSPPAN
+ head: PicoHead
+
+LCNet:
+ scale: 1.0
+ feature_maps: [3, 4, 5]
+
+TrainReader:
+ batch_size: 90
\ No newline at end of file
diff --git a/configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml b/configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml
new file mode 100644
index 00000000000..6708cba3e88
--- /dev/null
+++ b/configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+ '../../../datasets/coco_detection.yml',
+ '../../../runtime.yml',
+ '../_base_/picodet_esnet.yml',
+ '../_base_/optimizer_300e.yml',
+ '../_base_/picodet_416_reader.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/LCNet_x2_5_ssld_pretrained.pdparams
+weights: output/picodet_lcnet_1_5x_416_coco/model_final
+find_unused_parameters: True
+use_ema: true
+cycle_epoch: 40
+snapshot_epoch: 10
+
+PicoDet:
+ backbone: LCNet
+ neck: CSPPAN
+ head: PicoHead
+
+LCNet:
+ scale: 2.5
+ feature_maps: [3, 4, 5]
+
+TrainReader:
+ batch_size: 48
\ No newline at end of file
diff --git a/ppdet/modeling/heads/simota_head.py b/ppdet/modeling/heads/simota_head.py
index e74f017570e..037c395c68d 100644
--- a/ppdet/modeling/heads/simota_head.py
+++ b/ppdet/modeling/heads/simota_head.py
@@ -179,7 +179,7 @@ def get_loss(self, head_outs, gt_meta):
num_level_anchors)
num_total_pos = sum(pos_num_l)
try:
- paddle.distributed.all_reduce(num_total_pos)
+ paddle.distributed.all_reduce(paddle.to_tensor(num_total_pos))
num_total_pos = paddle.clip(
num_total_pos / paddle.distributed.get_world_size(), min=1.)
except:
@@ -256,7 +256,7 @@ def get_loss(self, head_outs, gt_meta):
avg_factor = sum(avg_factor)
try:
- paddle.distributed.all_reduce(avg_factor)
+ paddle.distributed.all_reduce(paddle.to_tensor(avg_factor))
avg_factor = paddle.clip(
avg_factor / paddle.distributed.get_world_size(), min=1)
except:
@@ -397,7 +397,7 @@ def get_loss(self, head_outs, gt_meta):
num_level_anchors)
num_total_pos = sum(pos_num_l)
try:
- paddle.distributed.all_reduce(num_total_pos)
+ paddle.distributed.all_reduce(paddle.to_tensor(num_total_pos))
num_total_pos = paddle.clip(
num_total_pos / paddle.distributed.get_world_size(), min=1.)
except:
@@ -477,7 +477,7 @@ def get_loss(self, head_outs, gt_meta):
avg_factor = sum(avg_factor)
try:
- paddle.distributed.all_reduce(avg_factor)
+ paddle.distributed.all_reduce(paddle.to_tensor(avg_factor))
avg_factor = paddle.clip(
avg_factor / paddle.distributed.get_world_size(), min=1)
except:
diff --git a/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt b/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
index ee694fdd7ab..475397867e9 100644
--- a/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
+++ b/test_tipc/configs/dino/dino_r50_4scale_1x_coco_train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./dataset/coco/test2017/
filename:null
##
trainer:norm_train
-norm_train:tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml -o
+norm_train:tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml -o worker_num=32
pact_train:tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --slim_config _template_pact -o
fpgm_train:tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --slim_config _template_fpgm -o
distill_train:null
diff --git a/test_tipc/configs/picodet/picodet_lcnet_1_0x_416_coco_train_infer_python.txt b/test_tipc/configs/picodet/picodet_lcnet_1_0x_416_coco_train_infer_python.txt
new file mode 100644
index 00000000000..f79e641526e
--- /dev/null
+++ b/test_tipc/configs/picodet/picodet_lcnet_1_0x_416_coco_train_infer_python.txt
@@ -0,0 +1,60 @@
+===========================train_params===========================
+model_name:picodet_lcnet_1_0x_416_coco
+python:python3.7
+gpu_list:0|0,1
+use_gpu:True
+auto_cast:null
+epoch:lite_train_lite_infer=1|lite_train_whole_infer=1|whole_train_whole_infer=300
+save_dir:null
+TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_train_whole_infer=80
+pretrain_weights:null
+trained_model_name:model_final.pdparams
+train_infer_img_dir:./dataset/coco/test2017/
+filename:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml -o
+pact_train:tools/train.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml --slim_config _template_pact -o
+fpgm_train:tools/train.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml --slim_config _template_fpgm -o
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml -o
+null:null
+##
+===========================infer_params===========================
+--output_dir:./output_inference
+weights:null
+norm_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml -o
+pact_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml --slim_config _template_pact -o
+fpgm_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml --slim_config _template_fpgm -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_0x_416_coco.yml --slim_config configs/slim/post_quant/yolov3_darknet53_ptq.yml -o
+##
+infer_mode:norm|kl_quant
+infer_quant:False|True
+inference:./deploy/python/infer.py
+--device:gpu|cpu
+--enable_mkldnn:False
+--cpu_threads:4
+--batch_size:1
+--use_tensorrt:null
+--run_mode:paddle
+--model_dir:
+--image_dir:./dataset/coco/test2017/
+--save_log_path:null
+--run_benchmark:False
+null:null
+===========================train_benchmark_params==========================
+batch_size:90
+fp_items:fp32|fp16
+epoch:1
+repeat:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:null
+===========================infer_benchmark_params===========================
+numpy_infer_input:3x416x416_2.npy
\ No newline at end of file
diff --git a/test_tipc/configs/picodet/picodet_lcnet_2_5x_416_coco_train_infer_python.txt b/test_tipc/configs/picodet/picodet_lcnet_2_5x_416_coco_train_infer_python.txt
new file mode 100644
index 00000000000..eeb2cf25f64
--- /dev/null
+++ b/test_tipc/configs/picodet/picodet_lcnet_2_5x_416_coco_train_infer_python.txt
@@ -0,0 +1,60 @@
+===========================train_params===========================
+model_name:picodet_lcnet_2_5x_416_coco
+python:python3.7
+gpu_list:0|0,1
+use_gpu:True
+auto_cast:null
+epoch:lite_train_lite_infer=1|lite_train_whole_infer=1|whole_train_whole_infer=300
+save_dir:null
+TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_train_whole_infer=80
+pretrain_weights:null
+trained_model_name:model_final.pdparams
+train_infer_img_dir:./dataset/coco/test2017/
+filename:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml -o
+pact_train:tools/train.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml --slim_config _template_pact -o
+fpgm_train:tools/train.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml --slim_config _template_fpgm -o
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml -o
+null:null
+##
+===========================infer_params===========================
+--output_dir:./output_inference
+weights:null
+norm_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml -o
+pact_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml --slim_config _template_pact -o
+fpgm_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml --slim_config _template_fpgm -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_2_5x_416_coco.yml --slim_config configs/slim/post_quant/yolov3_darknet53_ptq.yml -o
+##
+infer_mode:norm|kl_quant
+infer_quant:False|True
+inference:./deploy/python/infer.py
+--device:gpu|cpu
+--enable_mkldnn:False
+--cpu_threads:4
+--batch_size:1
+--use_tensorrt:null
+--run_mode:paddle
+--model_dir:
+--image_dir:./dataset/coco/test2017/
+--save_log_path:null
+--run_benchmark:False
+null:null
+===========================train_benchmark_params==========================
+batch_size:48
+fp_items:fp32|fp16
+epoch:1
+repeat:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:null
+===========================infer_benchmark_params===========================
+numpy_infer_input:3x416x416_2.npy
\ No newline at end of file
diff --git a/test_tipc/configs/picodet/picodet_s_320_coco_train_infer_python.txt b/test_tipc/configs/picodet/picodet_s_320_coco_train_infer_python.txt
index 0317fcf6f64..01aa4644ca5 100644
--- a/test_tipc/configs/picodet/picodet_s_320_coco_train_infer_python.txt
+++ b/test_tipc/configs/picodet/picodet_s_320_coco_train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./dataset/coco/test2017/
filename:null
##
trainer:norm_train
-norm_train:tools/train.py -c configs/picodet/legacy_model/picodet_s_320_coco.yml -o
+norm_train:tools/train.py -c configs/picodet/legacy_model/picodet_s_320_coco.yml -o worker_num=16
pact_train:tools/train.py -c configs/picodet/legacy_model/picodet_s_320_coco.yml --slim_config _template_pact -o
fpgm_train:tools/train.py -c configs/picodet/legacy_model/picodet_s_320_coco.yml --slim_config _template_fpgm -o
distill_train:null
diff --git a/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_l_80e_coco_train_infer_python.txt b/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_l_80e_coco_train_infer_python.txt
new file mode 100644
index 00000000000..2f8c4558b8f
--- /dev/null
+++ b/test_tipc/configs/ppyoloe/ppyoloe+/ppyoloe_plus_crn_l_80e_coco_train_infer_python.txt
@@ -0,0 +1,62 @@
+===========================train_params===========================
+model_name:ppyoloe_plus_crn_l_80e_coco
+python:python3.7
+gpu_list:0|0,1
+use_gpu:True
+auto_cast:null
+epoch:lite_train_lite_infer=1|lite_train_whole_infer=1|whole_train_whole_infer=300
+save_dir:null
+TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_train_whole_infer=2
+pretrain_weights:https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams
+trained_model_name:model_final.pdparams
+train_infer_img_dir:./dataset/coco/test2017/
+filename:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml -o
+pact_train:tools/train.py -c configs/ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml --slim_config _template_pact -o
+fpgm_train:tools/train.py -c configs/ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml --slim_config _template_fpgm -o
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c configs/ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml -o
+null:null
+##
+===========================infer_params===========================
+--output_dir:./output_inference
+weights:https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams
+norm_export:tools/export_model.py -c configs/ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml -o
+pact_export:tools/export_model.py -c configs/ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml --slim_config _template_pact -o
+fpgm_export:tools/export_model.py -c configs/ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml --slim_config _template_fpgm -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/ppyoloe/ppyoloe_plus_crn_s_80e_coco.yml --slim_config configs/slim/post_quant/ppyoloe_crn_s_300e_coco_ptq.yml -o
+##
+infer_mode:norm|kl_quant
+infer_quant:False|True
+inference:./deploy/python/infer.py
+--device:gpu|cpu
+--enable_mkldnn:False
+--cpu_threads:4
+--batch_size:1|2
+--use_tensorrt:null
+--run_mode:paddle
+--model_dir:
+--image_dir:./dataset/coco/test2017/
+--save_log_path:null
+--run_benchmark:False
+--trt_max_shape:1600
+===========================train_benchmark_params==========================
+batch_size:16
+fp_items:fp32|fp16
+epoch:1
+repeat:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:null
+===========================infer_benchmark_params===========================
+numpy_infer_input:3x640x640_2.npy
+===========================to_static_train_benchmark_params===========================
+to_static_train:--to_static
diff --git a/test_tipc/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco_train_infer_python.txt b/test_tipc/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco_train_infer_python.txt
new file mode 100644
index 00000000000..db9dafd716e
--- /dev/null
+++ b/test_tipc/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco_train_infer_python.txt
@@ -0,0 +1,58 @@
+===========================train_params===========================
+model_name:rtdetr_hgnetv2_l_6x_coco
+python:python3.7
+gpu_list:0|0,1
+use_gpu:True
+auto_cast:null
+epoch:lite_train_lite_infer=1|lite_train_whole_infer=1|whole_train_whole_infer=50
+save_dir:null
+TrainReader.batch_size:lite_train_lite_infer=1|lite_train_whole_infer=4|whole_train_whole_infer=4
+pretrain_weights:https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams
+trained_model_name:model_final.pdparams
+train_infer_img_dir:./dataset/coco/test2017/
+filename:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml -o worker_num=16
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml -o
+null:null
+##
+===========================infer_params===========================
+--output_dir:./output_inference
+weights:https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams
+norm_export:tools/export_model.py -c configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml -o
+pact_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:null
+##
+infer_mode:norm
+infer_quant:False
+inference:./deploy/python/infer.py
+--device:gpu|cpu
+--enable_mkldnn:False
+--cpu_threads:4
+--batch_size:1|2
+--use_tensorrt:null
+--run_mode:paddle
+--model_dir:
+--image_dir:./dataset/coco/test2017/
+--save_log_path:null
+--run_benchmark:False
+--trt_max_shape:1600
+===========================train_benchmark_params==========================
+batch_size:16
+fp_items:fp32|fp16
+epoch:1
+repeat:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:null
\ No newline at end of file
From caf4602065e8664ab5eb78e2f821dab8fb16e18b Mon Sep 17 00:00:00 2001
From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com>
Date: Tue, 16 May 2023 16:57:09 +0800
Subject: [PATCH 088/116] fix syn_bn for npu (#8246)
---
ppdet/engine/trainer.py | 6 ++----
ppdet/modeling/heads/sparsercnn_head.py | 5 ++++-
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py
index 55890a979ec..f022793b61b 100644
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -429,10 +429,8 @@ def train(self, validate=False):
model = self.model
if self.cfg.get('to_static', False):
model = apply_to_static(self.cfg, model)
- sync_bn = (
- getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
- (self.cfg.use_gpu or self.cfg.use_npu or self.cfg.use_mlu) and
- self._nranks > 1)
+ sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and
+ (self.cfg.use_gpu or self.cfg.use_mlu) and self._nranks > 1)
if sync_bn:
model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
diff --git a/ppdet/modeling/heads/sparsercnn_head.py b/ppdet/modeling/heads/sparsercnn_head.py
index 801ff04fb77..0534cf4ad70 100644
--- a/ppdet/modeling/heads/sparsercnn_head.py
+++ b/ppdet/modeling/heads/sparsercnn_head.py
@@ -317,12 +317,15 @@ def _init_box_pooler(input_shape):
pooler_scales = [1.0 / 4.0, 1.0 / 8.0, 1.0 / 16.0, 1.0 / 32.0]
end_level = 3
+ aligned = True
+ if paddle.device.is_compiled_with_custom_device('npu'):
+ aligned = False
box_pooler = RoIAlign(
resolution=pooler_resolution,
spatial_scale=pooler_scales,
sampling_ratio=sampling_ratio,
end_level=end_level,
- aligned=True)
+ aligned=aligned)
return box_pooler
def forward(self, features, input_whwh):
From 90b0aada430ea5852f83af23a299acb006759c8e Mon Sep 17 00:00:00 2001
From: UsielLau <824797605@qq.com>
Date: Wed, 17 May 2023 14:40:39 +0800
Subject: [PATCH 089/116] Update GETTING_STARTED_cn.md (#8251)
---
docs/tutorials/GETTING_STARTED_cn.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/tutorials/GETTING_STARTED_cn.md b/docs/tutorials/GETTING_STARTED_cn.md
index c0230f51474..1aa4ed5d7c1 100644
--- a/docs/tutorials/GETTING_STARTED_cn.md
+++ b/docs/tutorials/GETTING_STARTED_cn.md
@@ -159,7 +159,7 @@ python tools/eval.py -c configs/yolov3/yolov3_mobilenet_v1_roadsign.yml \
## 6 预测
```bash
- python tools/infer.py -c configs/yolov3/yolov3_mobilenet_v1_roadsign.yml --infer_img=demo/000000570688.jpg -o weights=https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_roadsign.pdparams
+ python tools/infer.py -c configs/yolov3/yolov3_mobilenet_v1_roadsign.yml --infer_img=demo/road554.png -o weights=https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_roadsign.pdparams
```
* 设置参数预测
From 79267419e1743157f376a7cb251e01caa3338ce0 Mon Sep 17 00:00:00 2001
From: Wenyu
Date: Wed, 17 May 2023 14:40:55 +0800
Subject: [PATCH 090/116] add rtdetr r18/r34 (#8253)
* add r18 r34
* rtdetr add r18 r34 models
* update rtdetr, test=document_fix
---
configs/rtdetr/README.md | 10 +++++--
configs/rtdetr/rtdetr_r18vd_6x_coco.yml | 38 +++++++++++++++++++++++++
configs/rtdetr/rtdetr_r34vd_6x_coco.yml | 38 +++++++++++++++++++++++++
3 files changed, 83 insertions(+), 3 deletions(-)
create mode 100644 configs/rtdetr/rtdetr_r18vd_6x_coco.yml
create mode 100644 configs/rtdetr/rtdetr_r34vd_6x_coco.yml
diff --git a/configs/rtdetr/README.md b/configs/rtdetr/README.md
index a07d0d8f8ea..47c5d98d346 100644
--- a/configs/rtdetr/README.md
+++ b/configs/rtdetr/README.md
@@ -4,7 +4,9 @@
- 发布RT-DETR-R50和RT-DETR-R101的代码和预训练模型
- 发布RT-DETR-L和RT-DETR-X的代码和预训练模型
-- 发布RT-DETR-R50-Scaled伸缩更小的范例模型
+- 发布RT-DETR-R50-m模型(scale模型的范例)
+- 发布RT-DETR-R34模型
+- 发布RT-DETR-R18模型
## 简介
@@ -12,18 +14,20 @@ RT-DETR是第一个实时端到端目标检测器。具体而言,我们设计
若要了解更多细节,请参考我们的论文[paper](https://arxiv.org/abs/2304.08069).
-

+
## 模型
| Model | Epoch | backbone | input shape | $AP^{val}$ | $AP^{val}_{50}$| Params(M) | FLOPs(G) | T4 TensorRT FP16(FPS) | Pretrained Model | config |
|:--------------:|:-----:|:----------:| :-------:|:--------------------------:|:---------------------------:|:---------:|:--------:| :---------------------: |:------------------------------------------------------------------------------------:|:-------------------------------------------:|
+| RT-DETR-R18 | 6x | ResNet-18 | 640 | 46.5 | 63.8 | 20 | 60 | 217 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r18vd_dec3_6x_coco.pdparams) | [config](./rtdetr_r18vd_6x_coco.yml)
+| RT-DETR-R34 | 6x | ResNet-34 | 640 | 48.9 | 66.8 | 31 | 92 | 161 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r34vd_dec4_6x_coco.pdparams) | [config](./rtdetr_r34vd_6x_coco.yml)
+| RT-DETR-R50-m | 6x | ResNet-50 | 640 | 51.3 | 69.6 | 36 | 100 | 145 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_m_6x_coco.pdparams) | [config](./rtdetr_r50vd_m_6x_coco.yml)
| RT-DETR-R50 | 6x | ResNet-50 | 640 | 53.1 | 71.3 | 42 | 136 | 108 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_6x_coco.pdparams) | [config](./rtdetr_r50vd_6x_coco.yml)
| RT-DETR-R101 | 6x | ResNet-101 | 640 | 54.3 | 72.7 | 76 | 259 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r101vd_6x_coco.pdparams) | [config](./rtdetr_r101vd_6x_coco.yml)
| RT-DETR-L | 6x | HGNetv2 | 640 | 53.0 | 71.6 | 32 | 110 | 114 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_l_6x_coco.pdparams) | [config](rtdetr_hgnetv2_l_6x_coco.yml)
| RT-DETR-X | 6x | HGNetv2 | 640 | 54.8 | 73.1 | 67 | 234 | 74 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_hgnetv2_x_6x_coco.pdparams) | [config](rtdetr_hgnetv2_x_6x_coco.yml)
-| RT-DETR-R50-Scaled | 6x | ResNet-50 | 640 | 51.3 | 69.6 | 35 | 100 | 145 | [download](https://bj.bcebos.com/v1/paddledet/models/rtdetr_r50vd_m_6x_coco.pdparams) | [config](./rtdetr_r50vd_m_6x_coco.yml)
**注意事项:**
diff --git a/configs/rtdetr/rtdetr_r18vd_6x_coco.yml b/configs/rtdetr/rtdetr_r18vd_6x_coco.yml
new file mode 100644
index 00000000000..8cf98187679
--- /dev/null
+++ b/configs/rtdetr/rtdetr_r18vd_6x_coco.yml
@@ -0,0 +1,38 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_6x.yml',
+ '_base_/rtdetr_r50vd.yml',
+ '_base_/rtdetr_reader.yml',
+]
+
+weights: output/rtdetr_r18_6x_coco/model_final
+find_unused_parameters: True
+log_iter: 200
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
+ResNet:
+ depth: 18
+ variant: d
+ return_idx: [1, 2, 3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+HybridEncoder:
+ hidden_dim: 256
+ use_encoder_idx: [2]
+ num_encoder_layers: 1
+ encoder_layer:
+ name: TransformerLayer
+ d_model: 256
+ nhead: 8
+ dim_feedforward: 1024
+ dropout: 0.
+ activation: 'gelu'
+ expansion: 0.5
+ depth_mult: 1.0
+
+RTDETRTransformer:
+ eval_idx: -1
+ num_decoder_layers: 3
diff --git a/configs/rtdetr/rtdetr_r34vd_6x_coco.yml b/configs/rtdetr/rtdetr_r34vd_6x_coco.yml
new file mode 100644
index 00000000000..2ab07baa976
--- /dev/null
+++ b/configs/rtdetr/rtdetr_r34vd_6x_coco.yml
@@ -0,0 +1,38 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_6x.yml',
+ '_base_/rtdetr_r50vd.yml',
+ '_base_/rtdetr_reader.yml',
+]
+
+weights: output/rtdetr_r34vd_6x_coco/model_final
+find_unused_parameters: True
+log_iter: 200
+
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams
+ResNet:
+ depth: 34
+ variant: d
+ return_idx: [1, 2, 3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+HybridEncoder:
+ hidden_dim: 256
+ use_encoder_idx: [2]
+ num_encoder_layers: 1
+ encoder_layer:
+ name: TransformerLayer
+ d_model: 256
+ nhead: 8
+ dim_feedforward: 1024
+ dropout: 0.
+ activation: 'gelu'
+ expansion: 0.5
+ depth_mult: 1.0
+
+RTDETRTransformer:
+ eval_idx: -1
+ num_decoder_layers: 4
From eeebef9f3f8347eb18f6b55cf45c2ed38221c3f6 Mon Sep 17 00:00:00 2001
From: xiaoluomi <49263480+xiaoluomi@users.noreply.github.com>
Date: Fri, 19 May 2023 15:54:52 +0800
Subject: [PATCH 091/116] fix rtdetr yaml and infer (#8268)
---
.../configs/rtdetr_hgnetv2_x_qat_dis.yaml | 2 +-
.../configs/rtdetr_r101vd_qat_dis.yaml | 2 +-
.../configs/rtdetr_reader.yml | 12 +++++
.../auto_compression/paddle_inference_eval.py | 53 -------------------
4 files changed, 14 insertions(+), 55 deletions(-)
diff --git a/deploy/auto_compression/configs/rtdetr_hgnetv2_x_qat_dis.yaml b/deploy/auto_compression/configs/rtdetr_hgnetv2_x_qat_dis.yaml
index c4e2889bbd3..a8a4ac970a5 100644
--- a/deploy/auto_compression/configs/rtdetr_hgnetv2_x_qat_dis.yaml
+++ b/deploy/auto_compression/configs/rtdetr_hgnetv2_x_qat_dis.yaml
@@ -3,7 +3,7 @@ Global:
reader_config: configs/rtdetr_reader.yml
include_nms: True
Evaluation: True
- model_dir: ./rtdetr_r50vd_6x_coco/
+ model_dir: ./rtdetr_hgnetv2_x_6x_coco/
model_filename: model.pdmodel
params_filename: model.pdiparams
diff --git a/deploy/auto_compression/configs/rtdetr_r101vd_qat_dis.yaml b/deploy/auto_compression/configs/rtdetr_r101vd_qat_dis.yaml
index bd96d085e8a..45162b7189d 100644
--- a/deploy/auto_compression/configs/rtdetr_r101vd_qat_dis.yaml
+++ b/deploy/auto_compression/configs/rtdetr_r101vd_qat_dis.yaml
@@ -3,7 +3,7 @@ Global:
reader_config: configs/rtdetr_reader.yml
include_nms: True
Evaluation: True
- model_dir: ./rtdetr_hgnetv2_x_6x_coco/
+ model_dir: ./rtdetr_r101vd_6x_coco/
model_filename: model.pdmodel
params_filename: model.pdiparams
diff --git a/deploy/auto_compression/configs/rtdetr_reader.yml b/deploy/auto_compression/configs/rtdetr_reader.yml
index 7b213ffa202..04b0db6a7fd 100644
--- a/deploy/auto_compression/configs/rtdetr_reader.yml
+++ b/deploy/auto_compression/configs/rtdetr_reader.yml
@@ -12,6 +12,18 @@ TrainDataset:
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco/
+EvalDataset:
+ !COCODataSet
+ image_dir: val2017
+ anno_path: annotations/instances_val2017.json
+ dataset_dir: dataset/coco/
+
+TestDataset:
+ !COCODataSet
+ image_dir: val2017
+ anno_path: annotations/instances_val2017.json
+ dataset_dir: dataset/coco/
+
worker_num: 0
# preprocess reader in test
diff --git a/deploy/auto_compression/paddle_inference_eval.py b/deploy/auto_compression/paddle_inference_eval.py
index 053ee35e752..b128d524987 100644
--- a/deploy/auto_compression/paddle_inference_eval.py
+++ b/deploy/auto_compression/paddle_inference_eval.py
@@ -284,48 +284,6 @@ def load_predictor(
return predictor, rerun_flag
-def get_current_memory_mb():
- """
- It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
- And this function Current program is time-consuming.
- """
- try:
- pkg.require('pynvml')
- except:
- from pip._internal import main
- main(['install', 'pynvml'])
- try:
- pkg.require('psutil')
- except:
- from pip._internal import main
- main(['install', 'psutil'])
- try:
- pkg.require('GPUtil')
- except:
- from pip._internal import main
- main(['install', 'GPUtil'])
- import pynvml
- import psutil
- import GPUtil
-
- gpu_id = int(os.environ.get("CUDA_VISIBLE_DEVICES", 0))
-
- pid = os.getpid()
- p = psutil.Process(pid)
- info = p.memory_full_info()
- cpu_mem = info.uss / 1024.0 / 1024.0
- gpu_mem = 0
- gpu_percent = 0
- gpus = GPUtil.getGPUs()
- if gpu_id is not None and len(gpus) > 0:
- gpu_percent = gpus[gpu_id].load
- pynvml.nvmlInit()
- handle = pynvml.nvmlDeviceGetHandleByIndex(0)
- meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
- gpu_mem = meminfo.used / 1024.0 / 1024.0
- return round(cpu_mem, 4), round(gpu_mem, 4)
-
-
def predict_image(predictor,
image_file,
image_shape=[640, 640],
@@ -367,13 +325,7 @@ def predict_image(predictor,
time_min = min(time_min, timed)
time_max = max(time_max, timed)
predict_time += timed
- cpu_mem, gpu_mem = get_current_memory_mb()
- cpu_mems += cpu_mem
- gpu_mems += gpu_mem
-
time_avg = predict_time / repeats
- print("[Benchmark]Avg cpu_mem:{} MB, avg gpu_mem: {} MB".format(
- cpu_mems / repeats, gpu_mems / repeats))
print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
round(time_min * 1000, 2),
round(time_max * 1000, 1), round(time_avg * 1000, 1)))
@@ -418,9 +370,6 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
time_min = min(time_min, timed)
time_max = max(time_max, timed)
predict_time += timed
- cpu_mem, gpu_mem = get_current_memory_mb()
- cpu_mems += cpu_mem
- gpu_mems += gpu_mem
if not FLAGS.include_nms:
postprocess = PPYOLOEPostProcess(
score_threshold=0.3, nms_threshold=0.6)
@@ -436,8 +385,6 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
map_res = metric.get_results()
metric.reset()
time_avg = predict_time / sample_nums
- print("[Benchmark]Avg cpu_mem:{} MB, avg gpu_mem: {} MB".format(
- cpu_mems / sample_nums, gpu_mems / sample_nums))
print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
round(time_min * 1000, 2),
round(time_max * 1000, 1), round(time_avg * 1000, 1)))
From a694be1e948dc3437f525e3aedad5bde41183f97 Mon Sep 17 00:00:00 2001
From: Yang Nie
Date: Mon, 22 May 2023 13:53:51 +0800
Subject: [PATCH 092/116] =?UTF-8?q?=E3=80=90Hackathon=20+=20No.163?=
=?UTF-8?q?=E3=80=91=E5=9F=BA=E4=BA=8EPaddleDetection=20PP-TinyPose?=
=?UTF-8?q?=EF=BC=8C=E6=96=B0=E5=A2=9E=E6=89=8B=E5=8A=BF=E5=85=B3=E9=94=AE?=
=?UTF-8?q?=E7=82=B9=E6=A3=80=E6=B5=8B=E6=A8=A1=E5=9E=8B=20(#8066)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* support COCO Whole Bady Hand
* update transforms
* disable `AugmentationbyInformantionDropping`
* fix infer bug
* fix getImgIds
---
.../tiny_pose/tinypose_256x256_hand.yml | 145 +++++++++++++++
ppdet/data/source/category.py | 6 +-
ppdet/data/source/keypoint_coco.py | 116 ++++++++++++
ppdet/data/transform/keypoint_operators.py | 129 ++++++++++++++
ppdet/engine/trainer.py | 17 +-
ppdet/metrics/keypoint_metrics.py | 165 +++++++++++++++++-
ppdet/modeling/keypoint_utils.py | 148 ++++++++++++++++
7 files changed, 720 insertions(+), 6 deletions(-)
create mode 100644 configs/keypoint/tiny_pose/tinypose_256x256_hand.yml
diff --git a/configs/keypoint/tiny_pose/tinypose_256x256_hand.yml b/configs/keypoint/tiny_pose/tinypose_256x256_hand.yml
new file mode 100644
index 00000000000..db691f06bae
--- /dev/null
+++ b/configs/keypoint/tiny_pose/tinypose_256x256_hand.yml
@@ -0,0 +1,145 @@
+use_gpu: true
+log_iter: 5
+save_dir: output
+snapshot_epoch: 10
+weights: output/tinypose_256x256_hand/model_final
+epoch: 210
+num_joints: &num_joints 21
+pixel_std: &pixel_std 200
+metric: KeyPointTopDownCOCOWholeBadyHandEval
+num_classes: 1
+train_height: &train_height 256
+train_width: &train_width 256
+trainsize: &trainsize [*train_width, *train_height]
+hmsize: &hmsize [64, 64]
+flip_perm: &flip_perm []
+
+
+#####model
+architecture: TopDownHRNet
+
+TopDownHRNet:
+ backbone: LiteHRNet
+ post_process: HRNetPostProcess
+ flip_perm: *flip_perm
+ num_joints: *num_joints
+ width: &width 40
+ loss: KeyPointMSELoss
+ use_dark: true
+
+LiteHRNet:
+ network_type: wider_naive
+ freeze_at: -1
+ freeze_norm: false
+ return_idx: [0]
+
+KeyPointMSELoss:
+ use_target_weight: true
+ loss_scale: 1.0
+
+
+#####optimizer
+LearningRate:
+ base_lr: 0.002
+ schedulers:
+ - !PiecewiseDecay
+ milestones: [170, 200]
+ gamma: 0.1
+ - !LinearWarmup
+ start_factor: 0.001
+ steps: 500
+
+OptimizerBuilder:
+ optimizer:
+ type: Adam
+ regularizer:
+ factor: 0.0
+ type: L2
+
+
+#####data
+TrainDataset:
+ !KeypointTopDownCocoWholeBodyHandDataset
+ image_dir: train2017
+ anno_path: annotations/coco_wholebody_train_v1.0.json
+ dataset_dir: dataset/coco
+ num_joints: *num_joints
+ trainsize: *trainsize
+ pixel_std: *pixel_std
+
+EvalDataset:
+ !KeypointTopDownCocoWholeBodyHandDataset
+ image_dir: val2017
+ anno_path: annotations/coco_wholebody_val_v1.0.json
+ dataset_dir: dataset/coco
+ num_joints: *num_joints
+ trainsize: *trainsize
+ pixel_std: *pixel_std
+
+TestDataset:
+ !ImageFolder
+ anno_path: dataset/coco/keypoint_imagelist.txt
+
+worker_num: 2
+global_mean: &global_mean [0.485, 0.456, 0.406]
+global_std: &global_std [0.229, 0.224, 0.225]
+TrainReader:
+ sample_transforms:
+ - TopDownRandomShiftBboxCenter:
+ shift_prob: 0.3
+ shift_factor: 0.16
+ - TopDownRandomFlip:
+ flip_prob: 0.5
+ flip_perm: *flip_perm
+ - TopDownGetRandomScaleRotation:
+ rot_prob: 0.6
+ rot_factor: 90
+ scale_factor: 0.3
+ # - AugmentationbyInformantionDropping:
+ # prob_cutout: 0.5
+ # offset_factor: 0.05
+ # num_patch: 1
+ # trainsize: *trainsize
+ - TopDownAffine:
+ trainsize: *trainsize
+ use_udp: true
+ - ToHeatmapsTopDown_DARK:
+ hmsize: *hmsize
+ sigma: 2
+ batch_transforms:
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 128
+ shuffle: true
+ drop_last: false
+
+EvalReader:
+ sample_transforms:
+ - TopDownAffine:
+ trainsize: *trainsize
+ use_udp: true
+ batch_transforms:
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 128
+
+TestReader:
+ inputs_def:
+ image_shape: [3, *train_height, *train_width]
+ sample_transforms:
+ - Decode: {}
+ - TopDownEvalAffine:
+ trainsize: *trainsize
+ - NormalizeImage:
+ mean: *global_mean
+ std: *global_std
+ is_scale: true
+ - Permute: {}
+ batch_size: 1
+ fuse_normalize: false
diff --git a/ppdet/data/source/category.py b/ppdet/data/source/category.py
index 4da25a2d2f5..8ed1f9e0461 100644
--- a/ppdet/data/source/category.py
+++ b/ppdet/data/source/category.py
@@ -114,8 +114,10 @@ def get_categories(metric_type, anno_file=None, arch=None):
elif metric_type.lower() == 'widerface':
return _widerface_category()
- elif metric_type.lower() == 'keypointtopdowncocoeval' or metric_type.lower(
- ) == 'keypointtopdownmpiieval':
+ elif metric_type.lower() in [
+ 'keypointtopdowncocoeval', 'keypointtopdownmpiieval',
+ 'keypointtopdowncocowholebadyhandeval'
+ ]:
return (None, {'id': 'keypoint'})
elif metric_type.lower() == 'pose3deval':
diff --git a/ppdet/data/source/keypoint_coco.py b/ppdet/data/source/keypoint_coco.py
index 6e072dc6e88..86d83439b5e 100644
--- a/ppdet/data/source/keypoint_coco.py
+++ b/ppdet/data/source/keypoint_coco.py
@@ -635,6 +635,122 @@ def _load_coco_person_detection_results(self):
return kpt_db
+@register
+@serializable
+class KeypointTopDownCocoWholeBodyHandDataset(KeypointTopDownBaseDataset):
+ """CocoWholeBody dataset for top-down hand pose estimation.
+
+ The dataset loads raw features and apply specified transforms
+ to return a dict containing the image tensors and other information.
+
+ COCO-WholeBody Hand keypoint indexes:
+
+ 0: 'wrist',
+ 1: 'thumb1',
+ 2: 'thumb2',
+ 3: 'thumb3',
+ 4: 'thumb4',
+ 5: 'forefinger1',
+ 6: 'forefinger2',
+ 7: 'forefinger3',
+ 8: 'forefinger4',
+ 9: 'middle_finger1',
+ 10: 'middle_finger2',
+ 11: 'middle_finger3',
+ 12: 'middle_finger4',
+ 13: 'ring_finger1',
+ 14: 'ring_finger2',
+ 15: 'ring_finger3',
+ 16: 'ring_finger4',
+ 17: 'pinky_finger1',
+ 18: 'pinky_finger2',
+ 19: 'pinky_finger3',
+ 20: 'pinky_finger4'
+
+ Args:
+ dataset_dir (str): Root path to the dataset.
+ image_dir (str): Path to a directory where images are held.
+ anno_path (str): Relative path to the annotation file.
+ num_joints (int): Keypoint numbers
+ trainsize (list):[w, h] Image target size
+ transform (composed(operators)): A sequence of data transforms.
+ pixel_std (int): The pixel std of the scale
+ Default: 200.
+ """
+
+ def __init__(self,
+ dataset_dir,
+ image_dir,
+ anno_path,
+ num_joints,
+ trainsize,
+ transform=[],
+ pixel_std=200):
+ super().__init__(dataset_dir, image_dir, anno_path, num_joints,
+ transform)
+
+ self.trainsize = trainsize
+ self.pixel_std = pixel_std
+ self.dataset_name = 'coco_wholebady_hand'
+
+ def _box2cs(self, box):
+ x, y, w, h = box[:4]
+ center = np.zeros((2), dtype=np.float32)
+ center[0] = x + w * 0.5
+ center[1] = y + h * 0.5
+ aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
+
+ if w > aspect_ratio * h:
+ h = w * 1.0 / aspect_ratio
+ elif w < aspect_ratio * h:
+ w = h * aspect_ratio
+ scale = np.array(
+ [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
+ dtype=np.float32)
+ if center[0] != -1:
+ scale = scale * 1.25
+
+ return center, scale
+
+ def parse_dataset(self):
+ gt_db = []
+ num_joints = self.ann_info['num_joints']
+ coco = COCO(self.get_anno())
+ img_ids = list(coco.imgs.keys())
+ for img_id in img_ids:
+ im_ann = coco.loadImgs(img_id)[0]
+ image_file = os.path.join(self.img_prefix, im_ann['file_name'])
+ im_id = int(im_ann["id"])
+
+ ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ for type in ['left', 'right']:
+ if (obj[f'{type}hand_valid'] and
+ max(obj[f'{type}hand_kpts']) > 0):
+
+ joints = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_vis = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj[f'{type}hand_kpts'])
+ keypoints = keypoints.reshape(-1, 3)
+ joints[:, :2] = keypoints[:, :2]
+ joints_vis[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ center, scale = self._box2cs(obj[f'{type}hand_box'][:4])
+ gt_db.append({
+ 'image_file': image_file,
+ 'center': center,
+ 'scale': scale,
+ 'gt_joints': joints,
+ 'joints_vis': joints_vis,
+ 'im_id': im_id,
+ })
+
+ self.db = gt_db
+
+
@register
@serializable
class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
diff --git a/ppdet/data/transform/keypoint_operators.py b/ppdet/data/transform/keypoint_operators.py
index fea23d696c2..d29aa2397ce 100644
--- a/ppdet/data/transform/keypoint_operators.py
+++ b/ppdet/data/transform/keypoint_operators.py
@@ -38,6 +38,7 @@
__all__ = [
'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps',
'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform',
+ 'TopDownRandomFlip', 'TopDownRandomShiftBboxCenter', 'TopDownGetRandomScaleRotation',
'TopDownAffine', 'ToHeatmapsTopDown', 'ToHeatmapsTopDown_DARK',
'ToHeatmapsTopDown_UDP', 'TopDownEvalAffine',
'AugmentationbyInformantionDropping', 'SinglePoseAffine', 'NoiseJitter',
@@ -687,6 +688,134 @@ def __call__(self, records):
return records
+@register_keypointop
+class TopDownRandomFlip(object):
+ """Data augmentation with random image flip.
+
+ Args:
+ flip_perm: (list[tuple]): Pairs of keypoints which are mirrored
+ (for example, left ear and right ear).
+ flip_prob (float): Probability of flip.
+ """
+
+ def __init__(self, flip_perm=[], flip_prob=0.5):
+ self.flip_perm = flip_perm
+ self.flip_prob = flip_prob
+
+ def flip_joints(self, joints_3d, joints_3d_visible, img_width, flip_pairs):
+ assert len(joints_3d) == len(joints_3d_visible)
+ assert img_width > 0
+
+ joints_3d_flipped = joints_3d.copy()
+ joints_3d_visible_flipped = joints_3d_visible.copy()
+
+ # Swap left-right parts
+ for left, right in flip_pairs:
+ joints_3d_flipped[left, :] = joints_3d[right, :]
+ joints_3d_flipped[right, :] = joints_3d[left, :]
+
+ joints_3d_visible_flipped[left, :] = joints_3d_visible[right, :]
+ joints_3d_visible_flipped[right, :] = joints_3d_visible[left, :]
+
+ # Flip horizontally
+ joints_3d_flipped[:, 0] = img_width - 1 - joints_3d_flipped[:, 0]
+ joints_3d_flipped = joints_3d_flipped * (joints_3d_visible_flipped > 0)
+
+ return joints_3d_flipped, joints_3d_visible_flipped
+
+ def __call__(self, results):
+ """Perform data augmentation with random image flip."""
+ if np.random.rand() <= self.flip_prob:
+ return results
+
+ img = results['image']
+ joints_3d = results['gt_joints']
+ joints_3d_visible = results['joints_vis']
+ center = results['center']
+
+ # A flag indicating whether the image is flipped,
+ # which can be used by child class.
+ if not isinstance(img, list):
+ img = img[:, ::-1, :]
+ else:
+ img = [i[:, ::-1, :] for i in img]
+ if not isinstance(img, list):
+ joints_3d, joints_3d_visible = self.flip_joints(
+ joints_3d, joints_3d_visible, img.shape[1],
+ self.flip_perm)
+ center[0] = img.shape[1] - center[0] - 1
+ else:
+ joints_3d, joints_3d_visible = self.flip_joints(
+ joints_3d, joints_3d_visible, img[0].shape[1],
+ self.flip_perm)
+ center[0] = img[0].shape[1] - center[0] - 1
+
+ results['image'] = img
+ results['gt_joints'] = joints_3d
+ results['joints_vis'] = joints_3d_visible
+ results['center'] = center
+
+ return results
+
+
+@register_keypointop
+class TopDownRandomShiftBboxCenter(object):
+ """Random shift the bbox center.
+
+ Args:
+ shift_factor (float): The factor to control the shift range, which is
+ scale*pixel_std*scale_factor. Default: 0.16
+ shift_prob (float): Probability of applying random shift. Default: 0.3
+ """
+
+ def __init__(self, shift_factor=0.16, shift_prob=0.3):
+ self.shift_factor = shift_factor
+ self.shift_prob = shift_prob
+
+ def __call__(self, results):
+ center = results['center']
+ scale = results['scale']
+ if np.random.rand() < self.shift_prob:
+ center += np.random.uniform(
+ -1, 1, 2) * self.shift_factor * scale * 200.0
+
+ results['center'] = center
+ return results
+
+@register_keypointop
+class TopDownGetRandomScaleRotation(object):
+ """Data augmentation with random scaling & rotating.
+
+ Args:
+ rot_factor (int): Rotating to ``[-2*rot_factor, 2*rot_factor]``.
+ scale_factor (float): Scaling to ``[1-scale_factor, 1+scale_factor]``.
+ rot_prob (float): Probability of random rotation.
+ """
+
+ def __init__(self, rot_factor=40, scale_factor=0.5, rot_prob=0.6):
+ self.rot_factor = rot_factor
+ self.scale_factor = scale_factor
+ self.rot_prob = rot_prob
+
+ def __call__(self, results):
+ """Perform data augmentation with random scaling & rotating."""
+ s = results['scale']
+
+ sf = self.scale_factor
+ rf = self.rot_factor
+
+ s_factor = np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+ s = s * s_factor
+
+ r_factor = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)
+ r = r_factor if np.random.rand() <= self.rot_prob else 0
+
+ results['scale'] = s
+ results['rotate'] = r
+
+ return results
+
+
@register_keypointop
class TopDownAffine(object):
"""apply affine transform to image and coords
diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py
index f022793b61b..bfd92fd62fb 100644
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -38,8 +38,8 @@
from ppdet.core.workspace import create
from ppdet.utils.checkpoint import load_weight, load_pretrain_weight
from ppdet.utils.visualizer import visualize_results, save_result
-from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval, Pose3DEval
-from ppdet.metrics import RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
+from ppdet.metrics import get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownCOCOWholeBadyHandEval, KeyPointTopDownMPIIEval, Pose3DEval
+from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, RBoxMetric, JDEDetMetric, SNIPERCOCOMetric
from ppdet.data.source.sniper_coco import SniperCOCODataSet
from ppdet.data.source.category import get_categories
import ppdet.utils.stats as stats
@@ -348,6 +348,19 @@ def _init_metrics(self, validate=False):
self.cfg.save_dir,
save_prediction_only=save_prediction_only)
]
+ elif self.cfg.metric == 'KeyPointTopDownCOCOWholeBadyHandEval':
+ eval_dataset = self.cfg['EvalDataset']
+ eval_dataset.check_or_download_dataset()
+ anno_file = eval_dataset.get_anno()
+ save_prediction_only = self.cfg.get('save_prediction_only', False)
+ self._metrics = [
+ KeyPointTopDownCOCOWholeBadyHandEval(
+ anno_file,
+ len(eval_dataset),
+ self.cfg.num_joints,
+ self.cfg.save_dir,
+ save_prediction_only=save_prediction_only)
+ ]
elif self.cfg.metric == 'KeyPointTopDownMPIIEval':
eval_dataset = self.cfg['EvalDataset']
eval_dataset.check_or_download_dataset()
diff --git a/ppdet/metrics/keypoint_metrics.py b/ppdet/metrics/keypoint_metrics.py
index cbd52d02d4a..26e9ecb5edc 100644
--- a/ppdet/metrics/keypoint_metrics.py
+++ b/ppdet/metrics/keypoint_metrics.py
@@ -19,12 +19,15 @@
import paddle
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
-from ..modeling.keypoint_utils import oks_nms
+from ..modeling.keypoint_utils import oks_nms, keypoint_pck_accuracy, keypoint_auc, keypoint_epe
from scipy.io import loadmat, savemat
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
-__all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval']
+__all__ = [
+ 'KeyPointTopDownCOCOEval', 'KeyPointTopDownCOCOWholeBadyHandEval',
+ 'KeyPointTopDownMPIIEval'
+]
class KeyPointTopDownCOCOEval(object):
@@ -226,6 +229,164 @@ def get_results(self):
return self.eval_results
+class KeyPointTopDownCOCOWholeBadyHandEval(object):
+ def __init__(self,
+ anno_file,
+ num_samples,
+ num_joints,
+ output_eval,
+ save_prediction_only=False):
+ super(KeyPointTopDownCOCOWholeBadyHandEval, self).__init__()
+ self.coco = COCO(anno_file)
+ self.num_samples = num_samples
+ self.num_joints = num_joints
+ self.output_eval = output_eval
+ self.res_file = os.path.join(output_eval, "keypoints_results.json")
+ self.save_prediction_only = save_prediction_only
+ self.parse_dataset()
+ self.reset()
+
+ def parse_dataset(self):
+ gt_db = []
+ num_joints = self.num_joints
+ coco = self.coco
+ img_ids = coco.getImgIds()
+ for img_id in img_ids:
+ ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
+ objs = coco.loadAnns(ann_ids)
+
+ for obj in objs:
+ for type in ['left', 'right']:
+ if (obj[f'{type}hand_valid'] and
+ max(obj[f'{type}hand_kpts']) > 0):
+
+ joints = np.zeros((num_joints, 3), dtype=np.float32)
+ joints_vis = np.zeros((num_joints, 3), dtype=np.float32)
+
+ keypoints = np.array(obj[f'{type}hand_kpts'])
+ keypoints = keypoints.reshape(-1, 3)
+ joints[:, :2] = keypoints[:, :2]
+ joints_vis[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+ gt_db.append({
+ 'bbox': obj[f'{type}hand_box'],
+ 'gt_joints': joints,
+ 'joints_vis': joints_vis,
+ })
+ self.db = gt_db
+
+ def reset(self):
+ self.results = {
+ 'preds': np.zeros(
+ (self.num_samples, self.num_joints, 3), dtype=np.float32),
+ }
+ self.eval_results = {}
+ self.idx = 0
+
+ def update(self, inputs, outputs):
+ kpts, _ = outputs['keypoint'][0]
+ num_images = inputs['image'].shape[0]
+ self.results['preds'][self.idx:self.idx + num_images, :, 0:
+ 3] = kpts[:, :, 0:3]
+ self.idx += num_images
+
+ def accumulate(self):
+ self.get_final_results(self.results['preds'])
+ if self.save_prediction_only:
+ logger.info(f'The keypoint result is saved to {self.res_file} '
+ 'and do not evaluate the mAP.')
+ return
+
+ self.eval_results = self.evaluate(self.res_file, ('PCK', 'AUC', 'EPE'))
+
+ def get_final_results(self, preds):
+ kpts = []
+ for idx, kpt in enumerate(preds):
+ kpts.append({'keypoints': kpt.tolist()})
+
+ self._write_keypoint_results(kpts)
+
+ def _write_keypoint_results(self, keypoints):
+ if not os.path.exists(self.output_eval):
+ os.makedirs(self.output_eval)
+ with open(self.res_file, 'w') as f:
+ json.dump(keypoints, f, sort_keys=True, indent=4)
+ logger.info(f'The keypoint result is saved to {self.res_file}.')
+ try:
+ json.load(open(self.res_file))
+ except Exception:
+ content = []
+ with open(self.res_file, 'r') as f:
+ for line in f:
+ content.append(line)
+ content[-1] = ']'
+ with open(self.res_file, 'w') as f:
+ for c in content:
+ f.write(c)
+
+ def log(self):
+ if self.save_prediction_only:
+ return
+ for item, value in self.eval_results.items():
+ print("{} : {}".format(item, value))
+
+ def get_results(self):
+ return self.eval_results
+
+ def evaluate(self, res_file, metrics, pck_thr=0.2, auc_nor=30):
+ """Keypoint evaluation.
+
+ Args:
+ res_file (str): Json file stored prediction results.
+ metrics (str | list[str]): Metric to be performed.
+ Options: 'PCK', 'AUC', 'EPE'.
+ pck_thr (float): PCK threshold, default as 0.2.
+ auc_nor (float): AUC normalization factor, default as 30 pixel.
+
+ Returns:
+ List: Evaluation results for evaluation metric.
+ """
+ info_str = []
+
+ with open(res_file, 'r') as fin:
+ preds = json.load(fin)
+ assert len(preds) == len(self.db)
+
+ outputs = []
+ gts = []
+ masks = []
+ threshold_bbox = []
+
+ for pred, item in zip(preds, self.db):
+ outputs.append(np.array(pred['keypoints'])[:, :-1])
+ gts.append(np.array(item['gt_joints'])[:, :-1])
+ masks.append((np.array(item['joints_vis'])[:, 0]) > 0)
+ if 'PCK' in metrics:
+ bbox = np.array(item['bbox'])
+ bbox_thr = np.max(bbox[2:])
+ threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+
+ outputs = np.array(outputs)
+ gts = np.array(gts)
+ masks = np.array(masks)
+ threshold_bbox = np.array(threshold_bbox)
+
+ if 'PCK' in metrics:
+ _, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+ threshold_bbox)
+ info_str.append(('PCK', pck))
+
+ if 'AUC' in metrics:
+ info_str.append(('AUC', keypoint_auc(outputs, gts, masks, auc_nor)))
+
+ if 'EPE' in metrics:
+ info_str.append(('EPE', keypoint_epe(outputs, gts, masks)))
+
+ name_value = OrderedDict(info_str)
+
+ return name_value
+
+
class KeyPointTopDownMPIIEval(object):
def __init__(self,
anno_file,
diff --git a/ppdet/modeling/keypoint_utils.py b/ppdet/modeling/keypoint_utils.py
index 377f1d75c94..382e3731716 100644
--- a/ppdet/modeling/keypoint_utils.py
+++ b/ppdet/modeling/keypoint_utils.py
@@ -401,3 +401,151 @@ def flip_back(output_flipped, flip_pairs, target_type='GaussianHeatmap'):
# Flip horizontally
output_flipped_back = output_flipped_back[..., ::-1]
return output_flipped_back
+
+
+def _calc_distances(preds, targets, mask, normalize):
+ """Calculate the normalized distances between preds and target.
+
+ Note:
+ batch_size: N
+ num_keypoints: K
+ dimension of keypoints: D (normally, D=2 or D=3)
+
+ Args:
+ preds (np.ndarray[N, K, D]): Predicted keypoint location.
+ targets (np.ndarray[N, K, D]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ normalize (np.ndarray[N, D]): Typical value is heatmap_size
+
+ Returns:
+ np.ndarray[K, N]: The normalized distances. \
+ If target keypoints are missing, the distance is -1.
+ """
+ N, K, _ = preds.shape
+ # set mask=0 when normalize==0
+ _mask = mask.copy()
+ _mask[np.where((normalize == 0).sum(1))[0], :] = False
+ distances = np.full((N, K), -1, dtype=np.float32)
+ # handle invalid values
+ normalize[np.where(normalize <= 0)] = 1e6
+ distances[_mask] = np.linalg.norm(
+ ((preds - targets) / normalize[:, None, :])[_mask], axis=-1)
+ return distances.T
+
+
+def _distance_acc(distances, thr=0.5):
+ """Return the percentage below the distance threshold, while ignoring
+ distances values with -1.
+
+ Note:
+ batch_size: N
+ Args:
+ distances (np.ndarray[N, ]): The normalized distances.
+ thr (float): Threshold of the distances.
+
+ Returns:
+ float: Percentage of distances below the threshold. \
+ If all target keypoints are missing, return -1.
+ """
+ distance_valid = distances != -1
+ num_distance_valid = distance_valid.sum()
+ if num_distance_valid > 0:
+ return (distances[distance_valid] < thr).sum() / num_distance_valid
+ return -1
+
+
+def keypoint_pck_accuracy(pred, gt, mask, thr, normalize):
+ """Calculate the pose accuracy of PCK for each individual keypoint and the
+ averaged accuracy across all keypoints for coordinates.
+
+ Note:
+ PCK metric measures accuracy of the localization of the body joints.
+ The distances between predicted positions and the ground-truth ones
+ are typically normalized by the bounding box size.
+ The threshold (thr) of the normalized distance is commonly set
+ as 0.05, 0.1 or 0.2 etc.
+
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ thr (float): Threshold of PCK calculation.
+ normalize (np.ndarray[N, 2]): Normalization factor for H&W.
+
+ Returns:
+ tuple: A tuple containing keypoint accuracy.
+
+ - acc (np.ndarray[K]): Accuracy of each keypoint.
+ - avg_acc (float): Averaged accuracy across all keypoints.
+ - cnt (int): Number of valid keypoints.
+ """
+ distances = _calc_distances(pred, gt, mask, normalize)
+
+ acc = np.array([_distance_acc(d, thr) for d in distances])
+ valid_acc = acc[acc >= 0]
+ cnt = len(valid_acc)
+ avg_acc = valid_acc.mean() if cnt > 0 else 0
+ return acc, avg_acc, cnt
+
+
+def keypoint_auc(pred, gt, mask, normalize, num_step=20):
+ """Calculate the pose accuracy of PCK for each individual keypoint and the
+ averaged accuracy across all keypoints for coordinates.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+ normalize (float): Normalization factor.
+
+ Returns:
+ float: Area under curve.
+ """
+ nor = np.tile(np.array([[normalize, normalize]]), (pred.shape[0], 1))
+ x = [1.0 * i / num_step for i in range(num_step)]
+ y = []
+ for thr in x:
+ _, avg_acc, _ = keypoint_pck_accuracy(pred, gt, mask, thr, nor)
+ y.append(avg_acc)
+
+ auc = 0
+ for i in range(num_step):
+ auc += 1.0 / num_step * y[i]
+ return auc
+
+
+def keypoint_epe(pred, gt, mask):
+ """Calculate the end-point error.
+
+ Note:
+ - batch_size: N
+ - num_keypoints: K
+
+ Args:
+ pred (np.ndarray[N, K, 2]): Predicted keypoint location.
+ gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+ mask (np.ndarray[N, K]): Visibility of the target. False for invisible
+ joints, and True for visible. Invisible joints will be ignored for
+ accuracy calculation.
+
+ Returns:
+ float: Average end-point error.
+ """
+
+ normalize = np.ones((pred.shape[0], pred.shape[2]), dtype=np.float32)
+ distances = _calc_distances(pred, gt, mask, normalize)
+ distance_valid = distances[distances != -1]
+ return distance_valid.sum() / max(1, len(distance_valid))
From 129ddbb219d22ef697bf2d3d16a5eb0277e40359 Mon Sep 17 00:00:00 2001
From: Kehan Yin <838278270@qq.com>
Date: Thu, 1 Jun 2023 14:15:48 +0800
Subject: [PATCH 093/116] =?UTF-8?q?=E3=80=90Hackathon=20+=20No.161?=
=?UTF-8?q?=E3=80=91=E8=AE=BA=E6=96=87=E5=A4=8D=E7=8E=B0=EF=BC=9ACLRNet:?=
=?UTF-8?q?=20Cross=20Layer=20Refinement=20Network=20for=20Lane=20Detectio?=
=?UTF-8?q?n=20(#8278)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* Feat add CLRNet
* Update CLRNet README.md
* Update requirements: add imgaug>=0.4.0 for CLRNet
* Update CLRNet README.md
* Update README.md
* Update Rename clrnet_utils.py
* Update CLRNet demo & delete demo result
* Update README.md add weight for culane
* Update README.cn.md add training logs
* Feat add dataset download
* Fix bugs when lanes is empty
* Update README
* Update README for dataset info
* Fix export model
* Update configs & README
* style: update codestyle
* Style update op codestyple
* Fix eval process
* Fix eval process
* Update README&configs
* Fix deploy infer
* Fix mkdir in lane visualize
* Docs Update README
* Docs Rename configs
* Docs update weights
---------
Co-authored-by: LokeZhou
---
configs/clrnet/README.cn.md | 68 ++
configs/clrnet/README.md | 68 ++
configs/clrnet/_base_/clrnet_r18_fpn.yml | 41 ++
configs/clrnet/_base_/clrnet_reader.yml | 37 +
configs/clrnet/_base_/optimizer_1x.yml | 14 +
configs/clrnet/clrnet_resnet18_culane.yml | 9 +
configs/datasets/culane.yml | 28 +
demo/lane00000.jpg | Bin 0 -> 263368 bytes
deploy/python/clrnet_postprocess.py | 180 +++++
deploy/python/infer.py | 125 +++-
deploy/python/preprocess.py | 27 +
deploy/python/visualize.py | 60 ++
ppdet/data/culane_utils.py | 130 ++++
ppdet/data/source/__init__.py | 2 +
ppdet/data/source/culane.py | 206 ++++++
ppdet/data/transform/__init__.py | 3 +
ppdet/data/transform/culane_operators.py | 366 +++++++++
ppdet/engine/export_utils.py | 32 +-
ppdet/engine/trainer.py | 122 ++-
ppdet/metrics/__init__.py | 6 +-
ppdet/metrics/culane_metrics.py | 327 ++++++++
ppdet/modeling/architectures/__init__.py | 4 +-
ppdet/modeling/architectures/clrnet.py | 67 ++
ppdet/modeling/assigners/clrnet_assigner.py | 147 ++++
ppdet/modeling/backbones/__init__.py | 2 +
ppdet/modeling/backbones/clrnet_resnet.py | 697 ++++++++++++++++++
ppdet/modeling/clrnet_utils.py | 309 ++++++++
ppdet/modeling/heads/__init__.py | 4 +-
ppdet/modeling/heads/clrnet_head.py | 399 ++++++++++
ppdet/modeling/lane_utils.py | 111 +++
ppdet/modeling/losses/__init__.py | 4 +
ppdet/modeling/losses/clrnet_line_iou_loss.py | 41 ++
ppdet/modeling/losses/clrnet_loss.py | 283 +++++++
ppdet/modeling/necks/__init__.py | 2 +
ppdet/modeling/necks/clrnet_fpn.py | 254 +++++++
ppdet/utils/download.py | 3 +-
requirements.txt | 3 +
tools/infer_culane.py | 165 +++++
38 files changed, 4334 insertions(+), 12 deletions(-)
create mode 100644 configs/clrnet/README.cn.md
create mode 100644 configs/clrnet/README.md
create mode 100644 configs/clrnet/_base_/clrnet_r18_fpn.yml
create mode 100644 configs/clrnet/_base_/clrnet_reader.yml
create mode 100644 configs/clrnet/_base_/optimizer_1x.yml
create mode 100644 configs/clrnet/clrnet_resnet18_culane.yml
create mode 100644 configs/datasets/culane.yml
create mode 100644 demo/lane00000.jpg
create mode 100644 deploy/python/clrnet_postprocess.py
create mode 100644 ppdet/data/culane_utils.py
create mode 100644 ppdet/data/source/culane.py
create mode 100644 ppdet/data/transform/culane_operators.py
create mode 100644 ppdet/metrics/culane_metrics.py
create mode 100644 ppdet/modeling/architectures/clrnet.py
create mode 100644 ppdet/modeling/assigners/clrnet_assigner.py
create mode 100644 ppdet/modeling/backbones/clrnet_resnet.py
create mode 100644 ppdet/modeling/clrnet_utils.py
create mode 100644 ppdet/modeling/heads/clrnet_head.py
create mode 100644 ppdet/modeling/lane_utils.py
create mode 100644 ppdet/modeling/losses/clrnet_line_iou_loss.py
create mode 100644 ppdet/modeling/losses/clrnet_loss.py
create mode 100644 ppdet/modeling/necks/clrnet_fpn.py
create mode 100644 tools/infer_culane.py
diff --git a/configs/clrnet/README.cn.md b/configs/clrnet/README.cn.md
new file mode 100644
index 00000000000..422709e4329
--- /dev/null
+++ b/configs/clrnet/README.cn.md
@@ -0,0 +1,68 @@
+简体中文 | [English](README.md)
+
+# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
+
+## 目录
+- [简介](#简介)
+- [模型库](#模型库)
+- [引用](#引用)
+
+## 介绍
+
+[CLRNet](https://arxiv.org/abs/2203.10350)是一个车道线检测模型。CLRNet模型设计了车道线检测的直线先验轨迹,车道线iou以及nms方法,融合提取车道线轨迹的上下文高层特征与底层特征,利用FPN多尺度进行refine,在车道线检测相关数据集取得了SOTA的性能。
+
+## 模型库
+
+### CLRNet在CUlane上结果
+
+| 骨架网络 | mF1 | F1@50 | F1@75 | 下载链接 | 配置文件 |训练日志|
+| :--------------| :------- | :----: | :------: | :----: |:-----: |:-----: |
+| ResNet-18 | 54.98 | 79.46 | 62.10 | [下载链接](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [配置文件](./clrnet_resnet18_culane.yml) |[训练日志](https://bj.bcebos.com/v1/paddledet/logs/train_clrnet_r18_15_culane.log)|
+
+### 数据集下载
+下载[CULane数据集](https://xingangpan.github.io/projects/CULane.html)并解压到`dataset/culane`目录。
+
+您的数据集目录结构如下:
+```shell
+culane/driver_xx_xxframe # data folders x6
+culane/laneseg_label_w16 # lane segmentation labels
+culane/list # data lists
+```
+如果您使用百度云链接下载,注意确保`driver_23_30frame_part1.tar.gz`和`driver_23_30frame_part2.tar.gz`解压后的文件都在`driver_23_30frame`目录下。
+
+现已将用于测试的小数据集上传到PaddleDetection,可通过运行训练脚本,自动下载并解压数据,如需复现结果请下载链接中的全量数据集训练。
+
+### 训练
+- GPU单卡训练
+```shell
+python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
+```
+- GPU多卡训练
+```shell
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
+```
+
+### 评估
+```shell
+python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
+```
+
+### 预测
+```shell
+python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
+```
+
+注意:预测功能暂不支持模型静态图推理部署。
+
+## 引用
+```
+@InProceedings{Zheng_2022_CVPR,
+ author = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
+ title = {CLRNet: Cross Layer Refinement Network for Lane Detection},
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2022},
+ pages = {898-907}
+}
+```
diff --git a/configs/clrnet/README.md b/configs/clrnet/README.md
new file mode 100644
index 00000000000..f61b0c86c0d
--- /dev/null
+++ b/configs/clrnet/README.md
@@ -0,0 +1,68 @@
+English | [简体中文](README_cn.md)
+
+# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
+
+## Table of Contents
+- [Introduction](#Introduction)
+- [Model Zoo](#Model_Zoo)
+- [Citations](#Citations)
+
+## Introduction
+
+[CLRNet](https://arxiv.org/abs/2203.10350) is a lane detection model. The CLRNet model is designed with line prior for lane detection, line iou loss as well as nms method, fused to extract contextual high-level features of lane line with low-level features, and refined by FPN multi-scale. Finally, the model achieved SOTA performance in lane detection datasets.
+
+## Model Zoo
+
+### CLRNet Results on CULane dataset
+
+| backbone | mF1 | F1@50 | F1@75 | download | config |
+| :--------------| :------- | :----: | :------: | :----: |:-----: |
+| ResNet-18 | 54.98 | 79.46 | 62.10 | [model](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [config](./clrnet_resnet18_culane.yml) |
+
+### Download
+Download [CULane](https://xingangpan.github.io/projects/CULane.html). Then extract them to `dataset/culane`.
+
+For CULane, you should have structure like this:
+```shell
+culane/driver_xx_xxframe # data folders x6
+culane/laneseg_label_w16 # lane segmentation labels
+culane/list # data lists
+```
+If you use Baidu Cloud, make sure that images in `driver_23_30frame_part1.tar.gz` and `driver_23_30frame_part2.tar.gz` are located in one folder `driver_23_30frame` instead of two seperate folders after you decompress them.
+
+Now we have uploaded a small subset of CULane dataset to PaddleDetection for code checking. You can simply run the training script below to download it automatically. If you want to implement the results, you need to download the full dataset at th link for training.
+
+### Training
+- single GPU
+```shell
+python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
+```
+- multi GPU
+```shell
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
+```
+
+### Evaluation
+```shell
+python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
+```
+
+### Inference
+```shell
+python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
+```
+
+Notice: The inference phase does not support static model graph deploy at present.
+
+## Citations
+```
+@InProceedings{Zheng_2022_CVPR,
+ author = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
+ title = {CLRNet: Cross Layer Refinement Network for Lane Detection},
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2022},
+ pages = {898-907}
+}
+```
diff --git a/configs/clrnet/_base_/clrnet_r18_fpn.yml b/configs/clrnet/_base_/clrnet_r18_fpn.yml
new file mode 100644
index 00000000000..5b109814c8a
--- /dev/null
+++ b/configs/clrnet/_base_/clrnet_r18_fpn.yml
@@ -0,0 +1,41 @@
+architecture: CLRNet
+
+CLRNet:
+ backbone: CLRResNet
+ neck: CLRFPN
+ clr_head: CLRHead
+
+CLRResNet:
+ resnet: 'resnet18'
+ pretrained: True
+
+CLRFPN:
+ in_channels: [128,256,512]
+ out_channel: 64
+ extra_stage: 0
+
+CLRHead:
+ prior_feat_channels: 64
+ fc_hidden_dim: 64
+ num_priors: 192
+ num_fc: 2
+ refine_layers: 3
+ sample_points: 36
+ loss: CLRNetLoss
+ conf_threshold: 0.4
+ nms_thres: 0.8
+
+CLRNetLoss:
+ cls_loss_weight : 2.0
+ xyt_loss_weight : 0.2
+ iou_loss_weight : 2.0
+ seg_loss_weight : 1.0
+ refine_layers : 3
+ ignore_label: 255
+ bg_weight: 0.4
+
+# for visualize lane detection results
+sample_y:
+ start: 589
+ end: 230
+ step: -20
diff --git a/configs/clrnet/_base_/clrnet_reader.yml b/configs/clrnet/_base_/clrnet_reader.yml
new file mode 100644
index 00000000000..b5eb77daed1
--- /dev/null
+++ b/configs/clrnet/_base_/clrnet_reader.yml
@@ -0,0 +1,37 @@
+worker_num: 10
+
+img_h: &img_h 320
+img_w: &img_w 800
+ori_img_h: &ori_img_h 590
+ori_img_w: &ori_img_w 1640
+num_points: &num_points 72
+max_lanes: &max_lanes 4
+
+TrainReader:
+ batch_size: 24
+ batch_transforms:
+ - CULaneTrainProcess: {img_h: *img_h, img_w: *img_w}
+ - CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
+ shuffle: True
+ drop_last: False
+
+
+
+
+EvalReader:
+ batch_size: 24
+ batch_transforms:
+ - CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
+ - CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
+ shuffle: False
+ drop_last: False
+
+
+
+TestReader:
+ batch_size: 24
+ batch_transforms:
+ - CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
+ - CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
+ shuffle: False
+ drop_last: False
diff --git a/configs/clrnet/_base_/optimizer_1x.yml b/configs/clrnet/_base_/optimizer_1x.yml
new file mode 100644
index 00000000000..f35407e1edd
--- /dev/null
+++ b/configs/clrnet/_base_/optimizer_1x.yml
@@ -0,0 +1,14 @@
+epoch: 15
+snapshot_epoch: 5
+
+LearningRate:
+ base_lr: 0.6e-3
+ schedulers:
+ - !CosineDecay
+ max_epochs: 15
+ use_warmup: False
+
+OptimizerBuilder:
+ regularizer: False
+ optimizer:
+ type: AdamW
diff --git a/configs/clrnet/clrnet_resnet18_culane.yml b/configs/clrnet/clrnet_resnet18_culane.yml
new file mode 100644
index 00000000000..f7e7acd34f8
--- /dev/null
+++ b/configs/clrnet/clrnet_resnet18_culane.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+ '../datasets/culane.yml',
+ '_base_/clrnet_reader.yml',
+ '_base_/clrnet_r18_fpn.yml',
+ '_base_/optimizer_1x.yml',
+ '../runtime.yml'
+]
+
+weights: output/clr_resnet18_culane/model_final
diff --git a/configs/datasets/culane.yml b/configs/datasets/culane.yml
new file mode 100644
index 00000000000..79e59e3ebd5
--- /dev/null
+++ b/configs/datasets/culane.yml
@@ -0,0 +1,28 @@
+metric: CULaneMetric
+num_classes: 5 # 4 lanes + background
+
+cut_height: &cut_height 270
+dataset_dir: &dataset_dir dataset/culane
+
+TrainDataset:
+ name: CULaneDataSet
+ dataset_dir: *dataset_dir
+ list_path: 'list/train_gt.txt'
+ split: train
+ cut_height: *cut_height
+
+
+EvalDataset:
+ name: CULaneDataSet
+ dataset_dir: *dataset_dir
+ list_path: 'list/test.txt'
+ split: test
+ cut_height: *cut_height
+
+
+TestDataset:
+ name: CULaneDataSet
+ dataset_dir: *dataset_dir
+ list_path: 'list/test.txt'
+ split: test
+ cut_height: *cut_height
diff --git a/demo/lane00000.jpg b/demo/lane00000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..01f3d1db1fee6eeb8ceb9ee084ebd4a666544061
GIT binary patch
literal 263368
zcmbTdWl$VX)IPen1b270;O+$X1(qNoxGe7OK?93Jg1fuBySqbRaSaX$l0U!qt-ANi
z{c!K?sX0AUbLQ0ZRM&L(k@>g!ZwG*-B&R3`fPn!3VBQbFzYTy402b!I_Me3PuffCp
zClTP`;ouPw5E1|9L_$GEL_$GAL`438jDq@Kd%r^afQt6t&3{Jz_f=Q~I5-4UBt)eD
zQThKQ{`CT|kzv&kt>Iv30I=9FaM&>a1_6}sI1&B_-aFX;Z7{HK@Cb-V?=U{RD>Pxf
z!-s==2aWKK_+8uo{W$<08v%!!O9~NJ(*%je8ILl&ICn+IW9kxuf=WP0
zL`*`@z{teR!pp}mASfg(^GQ}tUO`bwOIrt|`&m!l%-q7#%Gw6(>gMj@>E-Pc`YkLx
zA~GsEF)2AEHSNdG^!$RtqT-U$vhwN=KyQo0>jpZPrxWINYQ_tHWgG-sCUS+Y>Y
z2qdbvCONwK@zLH-Jg1nND&ijV<5KtVrb#$FC@@ELc*TaTKqOwknQi0&f&gHkl#k8f
zbXAO{!$5kbhtyz6RUO_0={Ezs55JUzv_zFIZLMLGBbQc4pnezl7-MIjWX$uMz~umD
z*);R@6rGuyYQpP+UMu$=D>;5=-kTiYXi@u0A=~6j#~0x+a9`b$Kr4d6Q4V+|YGcx%
z(5{w9)EyrKaV~2LIpIU1$c?N@!o`6>mxjS^4_wSrsl%r%bg6oqG^_*f3Jl|pT7OoJ
zCJ1z0Gm8BvhFf4s9bZjIkdjYNQP*Bsmw4>y*G_dTl#6^jqRgq0qB9@ayCoh9YzQg0jz!Fv
zX!q?%;qe%G2<;
zNEPOnux;(lc-~M~w4D^H-5^miP-Sg65$lvUL(dhfwqo&$AG5dEe&O%*(>3gm+NJ%S
zbFg*WX3u8_Q*lr}$Nn7BreEj=le+HeIL>0=@KWcI29m*R*EQ=@IQclnsQg|ioN(&{wemRJVc*o;S>Y;
zMON0GtWIERT5!sJDAT3kF?F)|>r6}o8h0jV`FeI-$yvK!JE*Fp&)Z%YFMIirK2Zqo
z9t0YtU0rI7ksFp?LrngWxexHZ8*->zF48Jv;noUm(9c$e&a6)C=tc=N^@JVG(|b%P
z7jE$e2FY&`QOe>U8ZrL^g!rDur)Kp217Pi>vEOW*E>}^^{igG+58Q$6F1OVER-Ww(
zH>E3c7_$#69BSl18&NOn{#t~WQ7Jd4}+u%jwdz?_)V8hKm{o5(hZ)Qr?x!oaek1|5|rDdI>sKUz^M9H?I??1UL
zU{-^uFs>k+eg9YB7A9VTq@?PY1S!g1qTxj`PY+D+XC}cHoLC$XgItp_bVZK9VaIMo
zc}@2ZfN$A@doNmYLjI%Fmu&?BHNi(*Zm%TMVmha2Ui{0G)jt4&_-oS}VRHBk^kjuS
zUozqIQ-Qj2>q-s%%0$-8^L%#7o%(>p6UTBMz`!vk5GH0cA@*^baB*>r^)}OB6JDEK
zczVk=`P%W^|B7&M_aJTn=R`}v#cIAfUDO}DMNR^tM&R1<1$X&EX44B7O6Gv_y3||+
z)`i>te4F#UqnpK6@ZIJ2uKM~%#=w64*O8{TCa(`eJvW?vm7>3lk>zYmmgPc>-H#XL4Z7Y~)H|X%$S_(sS;On8eYBd4av_
zors1)6QM1`p_KZeL8Kx0WQ2GqK*6i#LE-6smzwRUFSb+)8|u&Co)h?77eXiTIOKiU
zl7IR}(w5F1DA!Tj8W!kFKGh-1MDYy^(;<5)1FG^1?h;z6_h>Wna>*i&=H)qv<2u$L
zz|_+)qh{}^Pq#Al#ocON;tJTazR(-k6alH^2q4#QI9ZpiQ}+F{XT9#SYAT&A|LFbp
z8{ncoLjiXk-m~L~d-BG4)m=cJT~ur&?`q*nJRr})X(9ds$@&XY@l3T`HS{m?YnFt{
z8cV|O;6i?ix@y_9`@dBLl0(8i7^$;{LAKYFMa+YcHK)0q)TOq&LDf9i-v`^rIi8Q>
zwp>&)7bdJQPIV4n$_jRP`+
zdQPzR4-%rNAmwt%$Lc!IwgKbq5*K<``p>;kg{mg6FAj~ie+X$v!^EB#Z8`P@-yVE*
zp>MI}FQ12d$vb$#HY5aj1dV0QG3rFiO2wo_62uRxq$)q
z4@@R8;c~PXg)YP{=&n?uqkPW^*GRjR=X=j4ei9-6fok)9ibY?j?M3uzQ}jRIAWsUQ
z+V51ITyPPz1NX${s2ud3sKKy1lVoJ`lkC9QqERG{@$d-hH^U?o%+$>@8NLeq_e
z#rSP!fl$dz@St@0+kSEn!*tFL($JLa&evgdIB0mNJDZD*aE!}jg3woC@`1^iof~2o
z%;g@B7oqBtZ1$_Ah-f$8Z`li)4G>>%t;=@daCl$im0PF-`yyeYg$^8j)XCSno-3ue
zNRod5NT8WN>aRhM%bJ1-_OQy!fyY_7Uma9dEU@uEs3?ou^(_w)Q+suLwDREQb3dS=VY|j9Q-=g6RbAcNz+0x&63?1ciN(dBV>Bx@qXx
zzEB|SzC$bM3L?K?TmHMysgU*!7)1d
z0O@G_B2d2lhJfD=wj|!GvIg(Ty(B~fCoo#8FNu+}K4ewelcF%yg`YxEhX(+kJYRRo)lYTkN
z4N)IOs{tb~Tqg~!Fa-T88_c>oUlddwbsHQ&Pl+SS?2BF!4Q(JOHVBijT)J31u;*9`
zxCv3FYyF6eqf1rK9}(WEQ{c7UZ=0biZZG^cEQL{*zs>n9mm|(?<|G^(b%9U$LRy0CNf^o`s%qgtCFM>{&|Xw4(mmk<5VfMG@3a1~4_lPd
zIIV3=61KOw5!_c~4z8l^Cb?Ek``(yyQ6;|p*)n>!s_BdH3G7VC*2iBVI&TET@E#T{
z6Dh{368IcFxnT5b++H8ot!;emYl2W54!#T>^a!c-8@5CK(d~|W2h`*^{aL=BO|7D4
zJ8n^Yjj_~=?A7#Zh)s4S-@Og85~~fyGbm|ss*+t;%?7fm=3dlG7$bR6_PN+D;IGv(
z7@D#$C{U8h6(Yj5?#Zx~lZ8~#;ehi^uMEEig~?AhSGO6SZIg-6xTWAJbb{gWMMIFO
zuxyn?F<}1cGM256?bs_F4Yw2I(5;J|5x*Fe=>N8gmQ-_Rq8-~@rJa!_c9)|1lM;Ft
zUkfYp-FMW%z9h<`sl7Q~SQQtoqKphn-X}f^oKYJZZ;ew{k_UpfYQbBu&ZmKM$ICV}
ze;J7Ob55g3F5Z^((GgR4@Q-|(ULALP%rkTH)^9=M=w&tv?fMx1=j8LavufV67;1;X
ze7reHGtGK{iAo~4X$GQaf>QuS!e-JxQq+qGodfV~TvDJB71cs@h9d!w2SR
zgLx>OL=E+%xFe2{w>DxGV0dJ>a8OYoSLJq%92=5#78uL)EbX34h~2U&{et&E#<-XM
zl}qtVlamM@c>b8rygAG*o1B$~SRB3I0xlt5=N!(iH!A?eikB|@XrUWQOHx%-5JJ~r
zE4&v%EJPVK-eOXv6^;Y;{*2&b%G^uP(Nj1Krp34$`^db>1N@j4mr(_Tcy2t_MW=9}
zkVOL;nshH0pr$4GM$A&9L&~HFt#=aRL2SJ{-Nfr`vUT|*TPzs!3qSc=m
zb9&DO;7j^r$<+_p>sc{%-tD|^cl+4*5>Iah8c{TIUsRi1H}C8~Oas
znA@Ix5c|Csc|)p)ggb`me~SGF@E2=iysfI@c|!?)2f9!n|7Go6MKl0WmW47^p&RFT
zeBfBN$(Ov*MAl|~u*f<6u!5-~Lr!3Jx0CppF9!$!QzoH9dgxgj7iVya#c0WAEf(
zkn78hjn9{qdi~Ya0fcrkHqK#u-D4!!#7|r?1uiyKyutiQpNADY(ocw1N3SW?%ozl)
z@|=*sz1gs2XdYwLCBV4JpsY5&FlSu0ALeI9-6Wb19-^Dt1}Wpr*dX-K5uXH3WN68a
z6gkt4yh`vFvN=I-Lg!#*3IvhSr1_!{ZV2Au!h75xAQN~L&G(JgejLikNK}@(b|v)6
ztZg2H+Caj?iQ4uTGTq{wu>F
zV9<-a;qK!NgJ>3EV*`XztD8#`MB
zz#%nYvgf{|HP`CCZ>0D#f;y&9B5=Ii}E%6-mk)TDg7i@Y+msn45
zeWV?uJ(ayx_`FbTKTm;j7}<9>1nau^Q_V@T%r!U&K{{d72ZO~z0UCSBQKC=~`(c8u
z5?@ntj4nF}-<~?HE_tDO4`M%Z{XK?>U$>SifNnN`mJaR)b&R#saOQ
zh2FMKn1ry!@Mm^75`cfp8hoj@Tl`7-YQ7Kx#9)^hyXA+N_$*>>{o}MmhJsq
zua&jq46oUY&8SqC2A29rc;tva=iOhF4EOq6v%9!n&W#6UHn*)n?wRbHNJn?w}kq_w$R+
zHkrHBch;2>mET;g^U#GbEP_jWU^}!vnk$H;m6Pu
ztN~dWU6Y=cI(!u(eo?;qq%vvE9@R0FXzLhSW~8Ks>|e^^@ENka@sWO$oepI2n+E)0
zDO}*^UuIVN8#{!(*;4~9l^OL2?yV3vx_urPyU>y{RoFLeC+r~2JQba|^DLEgA
zxm?Xoq$(ta8qSuP-hlSuAc8h&RwuG?jjmVwY`#YS7*b9=9f(@Ohlf3&WRGXuU?~!L
z*m!;cWHa|D8L_6NG9xNcWckzz+iC^;Ei+pZp|O+-RDc?RySC=r`4?hP!ui(YXXdf-@j+k=iQ`_uRlyA1nhOffftVQ*iiHc<}am+5djJ1^(j0|pC#TkoH7xOZi
zmIc1DU%NAXm%_3YHry_Q<*!Q1)6$3PaT8{mY>?Bn_oG=)g~PgY;8d9?KT6uBcVB->
zJ02sz7L)gxUA|}i5VpR&TX*WSOg{c0pT|YawN8e5^>3frvE_mCriD@3tEk(Nq}iMAmHUuag?#ZSr^)Rt>1u#H;5Fc9n=nk{M)
zePl-FEQmR(qH`W)u-Sb1y|A9S{OrYeUp|MKtOw8fc<=&KT#{Pv9sP4;iy<@h6j4=y
zCXU!F2G~v>KiK;DyFj|9z?a4WUaO>$4wCdOTFswY2j>Mn%f+b7i-yuLx!=SwZ2+%y%!R&UnVVgx4j=A-8e!DI2ip
zkbq;xW9Dx}zmE1FG(kkJF$*9*(59R!6;-N@XJmqqm~{mT7Kj5$yfr_#aBVLrZztVJ
zYmO+uUE6SYVgO(>#^WJ06rgMNSEa|)B3q|9z{P8hc-9XchDhECn
z4&Q5Sp>s_i*Ia}TStrW7`G6hQ(dL4*71uqI>0I0}1PMTS?t3zhtQ*Lv-2+k8oK&kCYl$F7k41P&?U^?;w1
z8DCUCz;Ku{mSVOA##da8*K<5=a^B--6i*2)$W|Eq`POY;z{%oGwNu5TclGcSIyUt>
z8@{3GhTUCAO<`6dL6=perar#2=#;~v#b!lOz-O#h&q=($48BUh2J@_#MX$WTK_y@&
zUTb!zEJXx8EDjb@4(heC;{b>Tnh?+8r*Ll_eAfX=vdZOa-j7kjDi4@$K_~55Kb~cq
zc7~N{b7^z{A$l0PPbs#QTEd*Z-b#Y|*z5J3Qdb-5*D0|>S%tNqmEFVJLzFeyQ}#l$
z7Nmy1&TF$$osSfG3@fb&k~5+v7H#MpJ7y@-H!Gik^r{ZN%uV4Uf+%vMBOJC@YlI`@
zp0ch$P=hJNlj^Dxp&a?1ZZk*POjm@7*xaNTwm!o>3X>@hBdn*C-Cu;GZm-Jf?Q`x#
zdor#j6$U>j7#xDak1=%|Th~bSDy+35+vt8)mR55smO8MH*Igo48B`HKxB2Cx;;$qv
zMsCX&WZI-g1ouI6wypx`4$%p6)Dg_o#@CGLu+-s7OMLQh;A|?EpHsWRP*m6QRi1%adXoJnS$H#
z-=7=ex_Ra+Y<)ZuDmVTCtZaRF1^6!oL|M5
z4?@CQ&fUmihvD!carS^-RH|SNrVot(zz)KWS?*quO1=!Mq9c>euyoSQd?Ci90C!vf
zEAm{4lI}O%O-R|H;5uXptfUKCRz2AOIlsr9-;lis_xRJ$<6_+A9qhx|`3y{P4yn{x
zJ44&l-Hqk<)=S#?KCnjCUdN_5Hy)Cu^^>cEsa-v?8`!%&|mxPXM{&8ml}P0>NhVaYPpH_9i6V@Oup#Gi6i+@JY~78r^SjQn0Wdg
zcv}HK`*jo$$11e-u-%eIpF)
zGwv}9Broc!iEMf`>u+&wUTR)xj9|&~_O9w&H6wGk!ax7}^?Q%y$92a~+}3{|L;So;
z1`kNbvsW~u1@^H5uC)3dp*=L6y3Ze8jORW~=*VOXIa%DRflJzo>9rF?*nWk9LJPzYVVM_g@AEk?`6d{^YSUkHPUzsYR;Y
zTkn$S>%r)xzM7GY=-)=2tGe6bM?+?}-|MddxNc^`j2%V4=s)>CyS=S`{a|(@O8G4F
zls3+d=;dx(SH=pE9~WPxXX>gA^E#IGAy0cDz9NYB|5ZNi>N^pLtj#+*AlV{SE17wii9r!p4{Jhdj2r`wY<2s4gA+mKacS_VGJk*2jSW
z%8n)Xv)0|lmf&otqB&|Fr|Z~`^N85ll_PYzX!rFi)Uu<@O9#tl0o>43lFS)~78_Df
zOB*j@g}>ov_1Y|9^A)o0Kb&%n3AMjl<|#!|Z_sH8+9Tu6CG8tlpu08Fp$pf;-)d#z
zx*KMloaPbM%g_v2ebw#HLBAD8E{UxAURnYwSDNigsaab8t#wlK#E1#WP`~DHx`N&q
z%9k2c-6tlE{3KrA@vX1VDRh5;A=X%}f%NQTenmf8?9?mwQ}?Ykt&4aTXL^KtJ|bPk
z_=76q_e;QnPGI#avrKEbyCuRZvM6OZ^Fg}p8B|ru5gW0nlQr%unC*VNy-~VLGL8Zq?
zgG5(}LhB9DLf{abw7IvKx-=~oLtswPwR*0aE^;CRcg3&RGa0qBU~Tm^<7Fw@NODn9
zO$kJjH6srM(FYwo;z8&2!;^3r{$Oi5CqzuYP{qbb9~>{&ArkdSw~9bKF9V=uY*tEg
zg*)hJK?%Thqmxr5+dFyGx2CDM}|X+GC=VI%}ea~u&$$0ckSZcnSY>6
zHtCf31FS};c|`$i4O!#>)$6{wG4ei^5MB)dq1?AW&6xuN@HptI06KFQbeD8)CaF7~Xw52BV5wld8AyAD7oyKXuSc3th6vi6Nth0xMoN
zqeC>qr@`)h8qVv
z8D2sAjoE8e8!~erxAJf4QJMLCdsGxmVUZ%P(LU3Ie*O78OoDfLQ=_HqeDsMb9LVLD
z+a@qR3qK#-c!$yc%Lr>4^sV>Xpj%l=x1FpPlpq==oq89pZdI7iWme5WK#FEuCno#!dhq*NAv`
z$#je9tXbKnnV^I$#&m$ESP*X*JLi9
zE&AK&1q)9q8!ra!99D!vWR=?@eAs$xiH;Di?@Ja*#~#blNadIT7gJ|I)@G#n{F0h2
zJizPGtmdJwmUS+YclM%j3r&W)T23rq8~SHFe0(~8;RWWCh3ksL+K6%<4D^pFjAnlq
z@}3+24r}=hXGY~yawxeJz|5B@44};`WT4xi(8)6s4BwSd(F)O&K=Ht9>RZvlU2nlK1IC=;s{6
zAJOTyghUqEei!DS^>x({v5M}T!YhU8?Jaj12~D}Lgr4KXuAxQiIkAdEr*+K^AnHxv
zKR}ocGOabI%oxjd6U`Ab;sqe2SldU~4k60&5<#zd&VFt9*?A?ErutK9j^}+@FR7;x
z-Zsq_J*yh1HPfIZ_u--WuW`07i&9M75V#HRwr95pbvsmG8^^M?WJ-*KS1I)`F)A+`m&FUWc*n$%~
zNexFZJg{7|{uwcj9>E|&4P=)LcU8q|`+13f?=BCT=F}9!zc4L{3^pm@{4RK=KLhK~
zi~keagYy*Fr%yf-*{NQ}-jq@v%3Jh-tPZujJh!2N3-w*{FaZ(~lxijs6szEo7nG^A
zW!cV0U?leweNlG{dluig4ZOt(mmKbTkQF&N_%?{qbbS`Va-eZZsqnQ(7Kex_n*F|@
zF)?dQRIeOpcP*bpq$XcP*q+9+IF?uV^Wn(fEy>JqqW}ZV8#IUb4s5b7SyOd>4J~lF
zQ&@wBb93FrMmc7MPl2+{S4ZJ@6~5@=)71D?DVeNfnpDU5+5n{a@V<*#ov7Si82J;;
zA8S%@d@Cg1%@_%yx0N7D^K9PnfKKfbT-?yi)LwuUL)(ep*r55@Cn!HVYybMvg)E|vswgZF~Oh=8fQhrptWzOLIvZpyouAw
zE{hAoOvAPogk}(+E*$_PB(F6CyaQD945QOw{hsfl462K5;Vc6&e|EdgG~-RUa6s3&
z^~`QET@%V5;}M)1Z9-!*@IYrD%$G<|W0pZ4F$yVH?pAaPjJ#GYGCjCFS82daO1Fs|
zb=hJD{<2v|Hwf8q9(zx5VwW&ir0lhze^VGa(5>R_fm)n^F6@>hsRM;O(7QPSudJv|
z_W8ocyYox&>#H(Q*XqdPNc
zRwVh@Icq+iz96H1);;4EU0mL38Qoex>i(*UhqKX`lbrr5O^Z{=aD0peh|MzG^NOj+4m9gE)TYJGBRAy^YM!W6oAkWpA+4!
zxf9Q9640Bh&TA%6@6-?35App%005UxUC&@kc7toXYMeb#liwSG^jvXEwy@UXguq$A)yu~(bAZ6+
zgBHMBHNqfuhsR##+?EMVVXvDtU%&wJgm_KCpp<8vK~MAeu#X4mp!>
zg|RmjIpoSAPorN&iex9*U@{Jo2JlicdT+eEj>Eml$9#Ab)PzkC_o-T_!#_aL=jN_k
zF%01tZDCzX9)7b$xtX6bnB$2yRr{m~#Hv<(&ct>Ym_7}p%#~UE-_-_UBy&%Rs%Ft{;o#22
zzOOQ4h)GwOZU?odFg{BUOZ`caVj4JE1YVHqgp0*JMR)Oaf}O@oHc4XE%E`*1jXHEu
zZqRXavX_acg|yIKe74$stkCS$QmR|J_yIbL&mOc@jwDe(h3!((Xb84h>&|ms30*CQTE%wdU)W;xxdro!MLiaF9U_>+-&PVW!{O{DD<^A-9yS@V+rC
zLWsqaQf?!&%VAm9F(^NRGHTWH9xGY2)V0CIKYPgg#gUUl-M|etPEtEqwitSkY7~rs
zN5GY>-1c#K9wT+cR~bzLXeIOWZA^QCNwvPk2RcQXe*ip}Tnm$9sh#MfPO~d3$?K^?
z=M!6dhwDVD`nJ|Y-k@eUQr|EJWuUxCJ8dRj-;~|P78igb#e@-X<0T%RCj2p>3xXtb
zZ8i8SbaETJ%Qp`9B>odxyBJiSpu{1RG@Gi2em*8H8!iUJ6p3%)$qGst5{flP99BO|RhHK7YgEnlS07%uF#MUs_IVrW2O3bEdu_eNxz*54E_
zq_s<*PXma;^{Us|t2`})2TCvCIf3V&bR$t?=uQpY^zKlcijE~zHhT#`ApeglbbI$PB5Twv{;xzeN8e0i#`pGIrdI}_3=?&@3?r+gv(vC
zM?7SQ2BdKTP`EPVF_q3sbQHmsJC~r()$q9^)JV;;>bMh3Q_FKLu>&jH#nG43O!({e
zYo?W`uL~*(n#DHQPks(YBirE#nYkCW9hwl77g-@V>hZC7UlPZ7TG9eWyvXN1a|^*!
znEe2}7(*=N2j!Lqw=&oiSr!Gm-Eg13B%efg;-<4dRqOms5ktPvm)2h$FBIE?kJVFC
zAbS{6%d3^PS|Og|-jc*5i^vU&F=uZz+cY;N~3<5qa)023YP^h!9;P8oJL>h=rMhW`O{O_LkFOjYj0
ziE^nXa>UpJUTn;HzLdw$co@dMAPe1?FKp?Sxv3Sp!(T%<->UQzQ@X6o%x-z>!Iiy4
z6~HL;CDC*Vi%!`G1mrFUW+w8>yfxmZ1fyYI?OmN2l(=R!&32NumRxF}%qUghCL+oz
z2qZf7T9Je8=5&D+^D!wS6iZn(Pev<4JZnaMfv;H=C-nsLe61$BsheDXcAdOZQ8?if
z1ki(|KsqK~h$@$wV`E3WL2^#cGo`!?vsI?qp*0*;{?56!4`U^^4+=HDg^rsf40P<1
z=>Guh#-!YaqmM)tA1I*|kN{z-dJ3G4W`m5~?W)se*
z9*+OjSbqq%aoBilz*$)?ZKJRjoxp;dLT{6D^8DI5+o(?XWWM@kyJ|Y4l*UkRdrd{)
z)9M!fAcy$=_ssB9On7&EmPW@imW;&NI=0WIv8xZf$AqlWQ}gbsl4m$x9|s??yX)*R
zbI|8M;fU!kUO$xf?r_(AU^BZEffbhny8Tp9L%Odx`VMNVp_OUoQw%-Pyfk(Ko6}V*
zlqQ?m<}Q93gE!4jHcx;o8M}$jqEvzea71=RM%m6DO_bU-zQ4hG$4l5B&Ty4;N&w;>z<*e^1u=BXW|
zNPo1;=Nu--=FhE&)b(-rF$hkbkSKvJ6NnPRSV?
z`z$ddTksXIxsYMn=>(JPB`BESOG-!gy8TvH!$2A3qN5@c(c(3xx7lnm_1zV~-{IYzJT0!pScnM+@HT2%
zIW-M|Hr6vsoTi!cYPni?4}{*x0`PG>epE)`mh1r;m7>M(&J$*r5E3jYJCt4nQNx9Z
zIC!>`M@PCW7;R#HzFTP`a{-8(DSef2TX0n+C`_#5HKV1tdm14wKjYZ0QsUs`B)u;K
z)?rMg?&0%t^g_#q1Zh+Gc)7!eNxM{DFT?s8WAQ}N1d9-){*)tMep3$d)&A~Q&8_M>
zp1Iuo(Pf=Q2{yoQQPHOgA!3;q
zWi-k1*|LURP=e5@JdW-~up4IX
z?@4*;&o8TGNt*C+8TXZB`*Zg6?_UiOmO)&9<R0e7R70jg~6+nnC2!;-|3W`ah
zccR~V)e})ks<%SI7kED54O`XTEIr}O3@=3?wI6bh-%OiAQ(-NEe&7$rB=VD9*40}!
z2HN9m7eUfLqxY}lW(pA9W;PZQz$B1bxXDYCIhbci;J@hACU6WF?K)yiR@UG&7=YU|
z&5iv|Wme_nHMpEygo4V@7oL0v)+NGuKZ}a(R#Bh}_pghkE_)=vL%7b74%J;0MbnqF
z$$xS2!PRL6gZ3A=>-5!=&r6HG3HISz=O?AR3>TV5<7M>?p8+FQf4F@eFZ&`--$B5<
zl3V>v4%UP)y`Ze0pTK11;0u?hJX?WTdTY9TfG%Rb%hQC;nlUIjo@YiM;;YW-f7PRZ
z83GbwWabD%5qIr_=9~Y4sH)Sv46>L}+*78m4YN!dX5a|+iJ~nH*|e)VnU{uK+e6^IBZf%`=xTG=eT33S)`&^k6##W}fprZL6^
zw~v=$&V=$_=n=t_PRYCJ|9iYb&;~;yOF>fWmPDPsP!O?61H_pnx0)cAdtbqs1i=ij
zsoMx67b;66@3KyemTNT3z)O^8i03e%as|r5#P4mtN7~AcEWLjlCMG+$t^WpgWmRxZ
zzy<4r9_`*T^vU%ZSH26p^7t_EPUN0`)D&KI;h3N5|d(>K^vZf;JnuXN#C`+?GPLeQCm5K#Tis
zj8xW!j9t%-H_MKJf-tE=IAcqNgw0L*Qu~u$b6(&J7*VO4}?`2)?9=gZku2Icz_+P$zNMyl$i4fwO
zZxk&0`tv(o9GxlKt~@w0{^xI?z_?JU;r@<>a+&jo921pIlrfipkeu4=#e7i;XQ>QN
zTDH1sb$nv~BO=#3d}|gAk6VQ;!I3@vp~7#U=ITr7x3s$ZVcxVWgEC`(T``Q6nW4!Y
zI`w+c!bVw)?#K0)AA4AML?#VUf1?@`5(_GstZHR&PDocaCjQ_WbNyT6-ei1q43+x{
zFJG{jW&5*n39Tosgi=aAXbfehiZNNk*jX<00LR-RgF%rykXmyJ>skD`h`J-`mBnjU
zYa$(Dj>C_82}#Bnh4f!SG1EVfa0nj>t0x@QVc8X-2ElecYzWo*qmbjyKNE4m#Ux_uWeO+kKeC5`*jL>*L(%2eb*<$p>s|`aGj|
zXObk?r=JuGoK|1Couuv_D-ynhK8Bgbelsb6hl@u(2AjA{jE?eF{`ixqpP|gFI--=w
ztf_{#Oy?6bD!G+#SK+kH+ZhzGAy~FAqxGF{3;L6Dl;@G7
zBPb?;8ehO>D+qvFK`NptW6Tx5yPB*!Hy5k|jnK`~#&skp?d)AHf=%FTG!^E*%88DI
zy6N>M`09N5sfW{>Z6X>ktCFY?K@80k*^8SVA%DtMQ9xgXFm;$}Hq5rCf0rjR+zB0^
z$dS>n1;f)cIH88f7r9>Z@JsEJG?5QR2UF6*#Vn3v4A-T}xh?+r4iD*52o>x1!U+BOx{bbFaqx#GvTkuo0Ym`Bf6I>We%#5Vk2Y$Jc@SHO$>
z6Fv3y$bbay0G?9s)~XRPrNU1qNjcZ9y9Yr*929EG{*!E8o#JNm8^gQ922>gIJ<$|F
zXfQ4{Ys_s-!lKVIW5^rdlgJRsqBz>Mc%1WYAa*D9bW*_FAJ-VnY!v)83u)K!RJi31
zgDPRcVe&xi+302b{jHzLr4`nu$X$pmpVZ|{?(praw7J%W(b8=VcC6CZbfUf&%h%S>
zhX&wfVuTdl*r2hoq*!0LkaNNGU;nD%gOAT=PEp(|9a6))DbCO8&jaR$a5~v2`Gn?H
zxgfDf_8n%#T%W0b-iKhPnPN(+*~Eqj?@1I{2(fOKN)-w-O14@I%j?tr$R?UStyI+N
zBNscxc~(O?xzHOcsk2ukfE!=iaJVezo-7FaJCBnXGw395roq-rfuVSg!Ln#TN_PJ{
zI-@>!;aJ3m&hC#_32W^>#9vmW`GVB62(sYGjpiGipfj0$-0`Ur0t4qDf!csuMhgDaoWp)>xy$iRkB-w*NVz%rdX?jdeqvFhqSK{X{l$aT~BE>
ztloZ^s@a@eY4d@BEKU5-{H{J$1aVLzWK{87v`1?kk3&+VwqQHMI=-=cay%*G`*>s5
zQZp=b*@7)~Xa)hfx6dk6kR*hX(UaAXtDQxIU8(p#+R$C%8|^oTU^@7
z<=olMx!W`jIY}2ic?Y8gQbCtU@kRWagnkb3&Y@`9%Of_OsM|tfgIbM2C6O_S)-VS9T*;zOKtxE2)j+K=oSq6UBj7mohH%(?F@|Bd)svb
zIV6)TDZ}K*pf=$pJ8O*eY;)eUEj2r<7fYK-O1dy1l=406IYF|b`9o3D?R-b1MQv}Z
zn1TNQO3?MaIz^D%NXSB5LQf6|ZdClPq^klLihhT!KE9LbTC{fd+TNHHYF1G&DJnKZ
zcTgPs(8vKJx)4#Y%4;J2>R$(&pApzvwAv(+H2SukYYAu5
zf;)p+a?h7THz?lByr_JZla2=<)p2zh>cXR!Q$<4<%H&{*tM-GpZU?nRr+ra8*4dNI
zGVVAv5S5gW%}m^1oOYy>=2M#1oUBZ}?MYREIn7TC5ZoG}103>mR+eA}K{=|LzN1?k
z+Bj0Uaxt2_Hk`8j!1nK4<+iCMxf@!wY_-One6}4uE0$4rxvG>}u<5OWAyP(axrI+*
z`BkZ%WF@oBSKaMh5W7YbN-T}xz{;A5k%MCys}9OWGf}uX&MR2m893aB1a%Zt%Gm3g
zPcJ-xD`h0eacGPoegL4xdSFzrpbwJ0s%@xGPu81~K%&|y`B)>4^zj}D>(;C^0eJ`X
zrx-<2lMvAh5C}COg|XIwkTHss6)BK8s#AB=%{gCCRBfY*u^KT!$7*a*t0h>8cCIT+
zqOO7!J8_DY17o3~I({`QZV2QGW)p+Hgifca^r1R_H7k_QPtuzh7(XcM=|Gg6P#^?y
zDd?c*ih4qv@k(OH+@#gfT=5&ReJI$6s?zcf5Avl|4c4QUMW=K%R3|}5;GTo>6(+&!
zPDD%{$MmUjmf%Mcf(}JLVD&WNAK^>o`_j7ss<;>w^pAI3ihL=YigAZMz^hS;&~BP0$DbGlbXzS?EQn1FN5O>1yQb6Met?@xt@Z0EgZ
z=%$U~R?)jPl2UmtDIVfMz-<0CUIaTcntgfhjk+Bws*z&e?SbUfruBj0kLOg-jP<92
zI~rchIniKVw&8(O+s@Lk&m7e#)U$vp%mBtgH4>zf&=l^?F((6xu?c4w0r3$aFkxcBp}N&jss8|6KJ~l@
zAo*NK_yP7V=-)m`GOmkPUMJQ3cXuX*;0xcgY4N_x{{Ryz21AT)iqjyhvJBwM%yG9L
zJF6n>=sE_6;XNsIpAXuYAX3c5q%S1S$1Nm|#I%mbC7p?8B$7!q`%!OWX%3yI+C^_|
zZwke4Zv
zeJ{gzaOty0E#{+ijP`n6rqeviq;7whg+v({cPgNeGm-(?FtQ_*(7n2X&eB~<_T?dh
z2%1@?mBOkbf?1f7000bIhI{vw*xeWvC-5KWv?sR{|{
zJgQWIl#RnIg6+S6?z}^NZ{mN7CfefS2NK!ncC5zA(Z)8jgO9aLqyi*x6-uc{(T4NC
zaWHqnpup)=knB;=yg8zHTFdNO1%QG!c9K_TFD&vmEhMrwURfB7mvUJ5HE~^(qToFjIS=^npqpGQ)s0@lT21ZQ`BGzS}Co@4I?1PHBSADrdhjHnQ}%2L3azf;0oFw
z5sZw9UNp`HQMfHEn~&jIlG!K+by1lW3BeWL2Dx-Nq@DFEq$KqeiEc~u;-BXPB(qX|
zt{}q%pQUP(Fr1@%7E&b&Dc)Ekii$ge{BS;|pCrLY4N&0jUpi4Em<(i^t8&q*%~TtgFlQB;zCw)vYK>=RHc=YBKsjNyTQ)+#YIMo5pTPz|ZqI{&iW*l0HJ=aopFV
zNphpat5G&XI3!eJ9x_c#)^{^LLJ&PoEY`+2$?J;QYR+jpvJjx)De7tR?qh+0#~7%_
z5N~h8nwcdkM#b&UYi?MhCn-IOvL;_{Iqz0&pe0AWD1fwu$mE{&e%9C!2?XMlszxf~
zg(=P4i6USy1vp!txT~Vv4%9L#oE6P-du1o0J7Z17U6I<`Am&LnfI8I1`pyBe84Dj;
zj!=a@ZapfLvMPciU-%KCG3H?9$qSQQYOKwGO;NbMmAF$;Nh_T42jfxfXFH4WTf&k~
z_Bg6nn~vps$)%9;^H=bE8$uu59kupP@Bnv69Bg34fC#xLoIT&Za>;*v0lf#qiL+*2e{{V@k3J`t#pGw{~
zwTveugnWhTiiPqH0I8UlIb=oSl8KDeBr`^F)tmAZ?v7UFEs-3dqX2=DPXeQ7mv+~4
z{^+YpfE%FTm`31z2NV^`La`R<ovbv}a=<
zps8>{K3B@1t0P2a0e2Q(Cc%SP2+a
zC!X}OT0R3vz$%=PnsaSogOWa{`cq$Xs6|`>+Nw^-5fcS+Shh*xoQkNSRA35KO`L6~
zg6DvMR#uxU-9Q>8iyK2R{VKY~RhYAuB=_y~t3FxVmNd~4j%wo@3A>O?7@&?Z$vGyU
zu!RUZ0o3=Yb^+RyjxZ|3K>~}1WI{e(2`2)QRM<$)ed=i95g}On_4lZ-B+d?T)YM8z
zA*9H54CK;<^c6UOd3%8yzq&h7o
z2;2Q$=0C*8r8Z=iTbvFf9Zv2~;g9Q6gi+d!tLaYdnrJoIRQ~OmM{R>2;(uCWWx6lo
zYEq-PpeP-x^eaLYiHYT?spzG2E2HO0?UiB@LQa2(8h4P-Y^+-&(Dtc91{eoFjXWYC
z!RTxAMKk&h#f*kVLxdX24hqq4V3oat47KXW5_g!(nTMX
z=t_>5Zhs?I<+h1%!bVbkPZaisIbkcGm~=VDX>%c68N|lq?Q`^WBmV%`NMjArzf4tz
zvbiHJi?{upllfCkv#7ye%u~2KHDV7W^TDGW^KBi5{2%u8)&Bi^aX;h4#|lb)o|vba2dDlKrkTAC6{vB|Jp^NzsP
zh%OpX7E!!*Bi6G;%+3PH#DUwcDc@<984=^TO3NWWG*MT+&G<
z6ph)e<9O=jLw-KA^}J#*->pZpn&@fm+uX3Kin~zYdQvkwjH`V`Byz;v#Xkr}GAg4c
zw=$nB(9e=n^JDU-#Qsod&&;jVQwHTY3JCuIXl9*|K5r;z)D!%TYNVOX$iU6!6BGT+
ze7HZQF+#?H0c-=t>X7Up?nOR?Q*M8IY-5O(VfRftlG8(@IR^)&ET(A!;$kt=nk-r{
zn{GHb!Kb8g#@F;MIukig3zt>s1qTKa{Kk4up_Dok+4=2Qf%N
z`t$nIaZKH6u3e7xh5>QUOq#PD+lIi%_N;R0Avu>}Q^+G9{c5)z*O;IznP58{ipF%<
z)`rc!(Bm9c=q_XjB}oUku6{eWP+5r1I$(_d0QKtKuBRf5d6@ibl60GQHjBDCJ4?JD
zaC%n7mY@Rt&~h+C41zrmt#a1dkZcw`d)Ae$`t9|{YQf$nuzHaApW&B;d{LlDqMaJS
zVR^jO(`B?K&&>Y%DiI~S=O4U0E3Yzs7HZg5JkRvbphAfXY?$d+VTYV_m~P
zWx9Q_T>+n#7ctF%f7TJ2^hhs3=-}hf@m5P|_ttNw-`d4JyugWInlR|#^#xA^eP~mC
z8NV!9kKoUWpAG&av6I2t(?xL`7_PLfMka#p+F_m0$s2&m!x_N$U^tO;Xm}CDo)uy!Ogj*+9uNL*_-tA|ca7T1d^%gFO}CC{9P?OxO=YPSSO~6U7=*
zYn~g_mip39yF4He#1alRk{DzWxk+DoS3;w4UbQl1
zg*d3LCwP=IAm@RRRSK$4Ln%;{N=A86e6XAkT8?wZ7+~k1IjJVPQ}=)wII5Gn54k2=
zu&tDv(UTP@?nq_^B6o5*0--Yv#|@r()Go)J>Ce4HBmshi!S*##S7Un@P_w=iDfTsD
z*5L%IxomD5gAe|_XUB*68>Tz`fBjWFNC5#s_Q9+6W@<@0qi!jmeh(wD_Ny^YRB$UE
z8@XcO${1$?t)<%|ZFR;v)bdEbXrAVp+(1rUdG@IT#Ov~oKDC;+(SII)3YHtFnBq3;
z_)=-0yjgDnK|#`@c8w1qzY1eb$RVm3<-yKsmc*7)LJs9T4#ZS~`3b`wYb@O|oCe~U
zZdgADJOW6eq{f|%pR@(#X|Hi3kTNmfE6?D2RR}IgJk_OjZicdS%DN;4Uyb-U^`_q0
z>DtzrCWEio3!7(^C09urIiQseW0olthK@x%t0@E;VaQ-<$)7NKAO5{RWUX;IG{@t%IMm_fnrTd3zHLt0VBE!3ltbv3RyACX{fi8V5-PQI*&@vyVG>d
zQ%{#k*6bj?vbj*RTij=F1Q5WERe=D8Uztb*5J0S}ePdGarS-mv;<@Fzw!NKT(!4`6
zkKV}4+bwPY0a9#WB!FUUeAtukXDUNgJDpX&rE?Sci8~`>0np%zp|QLSGxqnY=kH
zhFwQaw+b6ak(jgG!5A568zrTUa!}?)1QrzbkgMRS1P(zMBm62&KJsg8rMQA(tc{U_
zSvVU;ME)bw?=Gc-#l8%CCe^L&2id&)oLlOaq=y#L$h(Ha%SR%DV`eNetm@I|ej%~(
z9iR5DlG6x;%?$9)n`O0~alQ#x_+&zGa7a8AB}gYa>S#O>3tm`4ui?oZ-%_$zer+9&
zNuDfnZAm1_7C&}!>GJMTrK{=p_7`Ku-W0Qj_WEQ-)Gd7A6Iiqh@|AD8XGQ-2Qz7Aj
zF|i8D@sYaFCX>|~2=0FI&(@?Mo>T+%6%LbqrFe@ysK3yM4PfLvVRHV-mO_Yh&epwq4fk4^r5D!sH8m~N3QlaL6X;_&+
z9SuD&dWwKBClsjQF$7Rzl6w-$fZ&cPsDvB?M)BxqSriA!P-Z#JVC%L$NmKtTE@43B`1o?3%bAgh_2#O0kA}cV1QP(bQZ-$HEn^nKm
z_aTxaF37DBvx#SX9ixsA0`J&uS9G`GZwTAyQa-1u>IYNSE)*^9t27}?4nKDiWce_H
zK3Ks%tX<17%}q%c8(fD<@Q$OX-^1d69Zek5Tg~R_dQ>b;wTa^l+2VP){{UFS3H%DA
zvp($7NjqnEC#?ooB&O~=3P1=sB8#HjqKBx!+v`(pNZ6Jb0dmYw09J0d;vG9f@*}$Q
zWVpL+wpLcjx@(361F>=j21ZzrtDLa{t4HC@OT>z@cF5g9H@Gpw=-2AQ5#;r@pFrB1TU3hW88d4EM&yQ=K|dhJoAhnGyPT=V8}x#x1nen
zcZZ>6u@cKXz+JUPzC@T00kUxa+6wb6+nWu4^Po*V2x*BC#*kMji^xArKrUKDL0A%8T9w8^6T7vkP
zlMBsUsyeITNcN{oR$(WplXWr2Em`x(Gm?7Mo6XEfK~$x;k(;f0bk>I?-Pu~BO%;a(`LJqkgRC9C0`0Nk7|NLh0SRN)XGv#SrE-D
zwsPI-q``>JD$!lsR5Hbu#_z(qsl`3cO7dzpMM6e;bL~wIOYKz^W>k$bF`hG3q?vbo
zgX>*Tf;cRmtdzM6jnxdw@-7W37GuJn#-_$b2*qgBY(sEmA!hCOsq1K}!1t?L7t)0Q
zzXG$AqSk{-77Ke~NFa6ZS~m8@R}{-26$xR^c&Ev76;I8I&T8?o(@s%>3!9kZ0x-P~
zIH+X0k7nb?6<&LxwB+(nTvlbx?C&w!aHFP2T6C#B>~mtV8YL2Y-AwK&9&!i-{$3t8hIk)E4es
z31JPN_Nz)8NX7y}tMmX>NwkL*%(XKubj!I{)8BX4t)>6(lCTJ9sZ*c|7mT-0*ilJO8l%ZP4g9&OQWx#K3N
zYEH7oAq1^Wy~TJf
zlb5^v5lO1^+*_#$5)+Q3(KXoU7muY>GbR>894%tr@tRsdV|%PhAhVf1CE5%MCTY&I2(s1niLhu9ZqrW+MMT_
z6|ZG3!p)X|A00pa|MTL}?E=91l}bq7`u~x6DU#(x>v5xXvkd
zE&~DceuAk;n-Gx{awWUmjz>Eq$zR8%Mp{7U4oJ@#QT`QXMp4g}O*8k5aw7^og*%~H
zk!^QBbkg}QKKCD5kV7Hkb}7OFIb->Xq|!mk$N~=Lo8~U@8m399-ZI=^(k;F@#STdi
zlvjo%!QYeKnZWP*RH6~yhDAY|mr)MLuGn+4pUV`kN&p&9paPag3V7$P}Gku}LsK-5pM9QexL({%$dDsZ!3RO=eb@Zmfq7*2c
zXQ`*K&)pq+(iqr`W~H{kV(pKG>Uz?=a*@*ll`X(+#O9?c?bOl&AXOKUnqxNVnW7oR
zAdN*k*e_xb3Fs+yHB1#pK}&!-3TUK4B4bYS3Ga%Mkem$DZpfjYQVt6f^rDVQqO#|Z
zA33D)7WtN?5+iUy_9Rwn&JTK#W?_Po3@{rb-m+7YEy667t)zuMVTC;oX$&q#42K7cMgG9IWg*y4~f42|UdYGv6H?U}LYM>NKSux31MZg~A`MAHPv
z#dQt6sg_VUIKu`w_sBoarSnmhT_#ryG71{3h?|H>RgZPf4Lqv>w1(r<(r`>0NV#ny
zsl=_rbTunF#H9fZ?oUe3-?}d>p7{sTo|i5rQhE`FtlVq~rdv@X4Eaau4rz9=;}nF(
zryvig;+8iOHWc&dO$ju{^$;TtO9CmUappG39jWpL2ZcP-Vk&nj1!Tv_NaH^BMs^xy
z&l2Ycf<-<#YL5iw2w08L57MW_69)wCOfWAUp(j@sTN+I&r>
z!pmf>z}s-Ii7eRiK$roJl0HE_66&*~heCMnrlHlX^#1@61${;Uv!WD=?jf>dQZQ6V
z2I8b}RFb4{L8SSuscK1^R`-?{(2LDB{u$sP5iC-I21Z7GKN^beZ#pv&e1Jg}{g88Uf*=Z#+r{AttL`VA+?u>SyNO?E=r
zY7vhz&h7D*RgroT&Q}}P1hOC$;VxNR8d67RbMwYTv6GH^;}u$FS*{2J0he@mJ$FCosQ&zSBQeR6G1S_Ua?y|VOi59&otJi%AZ`Q@2xU182*@D!
z7&UIjdF=p}Ihk2{GBE^HPCDHh##&g9QH`_>R(}|1(LKa7D3e{Vv>VOJ?F3!IgYQhK
zw6cz6jYcxz18r&7_Bw@(8kU`N6!y0;thTo>IAoO=WHOP#1RCb_`5WQnzuH!}3;lpw
ze75SRIy2;w@qv&)qc=NDos>yaAAE(`GhAs4?>u`qQ@^*i%t0$x(v2ri-QSrn(_g
z#JHA@;x}fZncP0?(*@e?#_9yCAo-68Iu2JY)YIbxwgpC*2PgEW#>aol`PR`f
zB?`dj6(;`xYG`-bRFTaXK8C1GC%HZpj+FACE=?g44F=q
zSr5xnLT+f|@B2PUVtBol*G?Cd8}=(4WJNCup;G2ohF
z?L7N>)8Y!vk8fJ2+Qv?H(XwI&+|Z|K&MBr$4Y=rOJkM&e#p+4sds3``9Fb9m3~(vM
zNaTtNcG#52fNsI&ksV7Dnv4cxv{I_Fw<3cE+*vUBjs+Ch+iPb%srebonhK2ji}Qg_
zgs~?y!o-XkSji2?r3QY{CR?^(q?S0TgQ@7kpp8Q)ApR93agK6nSsFJNsU<@p0~s|n
zSD&?95uDwlaw*6pVAN07@Tny2QNLfpyH<$hy~y9M;ZeU|!lZt`g+~2<3Z=QrKaG*N
zB=9L$?$Xg)EsU?O!9^5c`coTML(}exkv{0C)M0t47oe$@sHo^lh)RP~N6;lPKIo}q
zjDi}u=Is-r8X{<-+yKp5js;a5)G;pH8nVB1R~*}D>WWDZSl3_#bsdd8VokgXHb?-<
z@rsK708`WSdn13UT!^C%5k?7l9P~s{mXXgzMFp7f0X4@a?_;HGTelg^Nf9LU!0Kx<
z-&NGSQpVT9H*0ODVEMZERevz#e735z*udXrN>0Aym2y92<@QP#EFJsMb`xk;`r=LFkn)`}W=fWXKE
zfUA-LcPffV%L>gx#W|=enlL1e#Kwf+b`^yFCDuGun?C@gBHh$)@eSN%t<%0Bk1B3S
z#y197*LHU~Qj{-;bx#zLKaQF|vxWve0@ce~t~wdt0NgiRsRAx>vC4@LL<2i#l5x}0
zpDm2s`PoOqo*L5pG`A918Kt|t+HbWRrw?&-%bXWr0Kmz@s0g_sRYw(~sz)V>{Afs|
z+&)A3Qn1)?$C@+&zF)l?2dysgOF^!jnvuwMuu(=m&*x5P)thl9k$?vRgP!i=N`f&>
zBL@{K44!El88{RUE!>X>9VyB%Flk386)M08puT2C6rPm03G)`|PGu4CP~1wpFe%2(
zl--raUFwSyY(jI6wV!iwB~PeT`iUoMtutVj#S`7K
zoK-tmIXhGq;L??dFu>
zP}Zp>vCla)^dSsDz^1SmCjz9FCWzs>)Gp_C(Opnl9Q7NqR(NJ$&Cnj?)B+;CIjfUn
z7^oyddJoFCjGc}-M&d+`k&*>Wv9olfhTXBy^r?{wu1MmEOG^qgqtt>=kmIcj^0wjY
zO_D`2a^2}fnB`P8LW^c?q{o@$3&T_sO6QX2^QDsM$iWH#^c7m-Vda+&4i7z=wpToVl$G2)l&NAGedZ#|J!~}xbn*#tUlYvoS1{I0B<-I43ff~#}r1CS%pUIF+ACN_o<%cEMoH#
zTr&0pty7x%;QWxv5PL0Gk|(+up7vJ*qMik57$o*NWi2-$ztrz<8b+RC+qw?GRa7h&
zYIfr_aqUqC79c&?=8ft(7F+l=D|nU*~>MbT1=XphLx3RgY15(nrF
zFPDYR-hH}MyI78R_okF~IA=JjQb`!{GP?xbf`z@Q8AtbyJ17-6fM=nm$`qUeN%p3t
zaB;B^8Q=t;UW7Dwpri5}i0DcrZ3zUsfbHx%Oq-C-BRN^vuY??xa
zhyi*YDF-VdF(QzslZ>2Fgj`@p`P8o&jd!u?X`t@t59LbeIYw>Ir5i{0s%YVCXUaqV
z`fj2`Kg37iF-w{yMi(oT8Rm_nu&1(s!Bk|EPPj!@R7?_job;sTjTJON@w*-$1KNQA
z?!?sFRCcE-=YdPx6rhN_oDKy7)W251sR+Tv6)vUC6(wRfg&-i0n9)kdCp1!0xcO2(
zM2hBN!ETju3z8W-fv@8Ik)k<^{Xjs
zGD0t4kUuzd2a#Cbc|)B3bu?G4j51^z##Ga0ila6pxq!s7M)@Rs>P8Bhj9oY*X{u4(
zqmQ5EJ+MwcQ%_fB`;M*n))J=Cq-L3`1;B6_i5|e!sF5x*e9lK=D=G&dVZjw@7b$f2cN_27DnXlX*0W-O{eY;s0=Q`S}6jPdoMesy?beW912
z@9kQ1pb^3=GOLnRuyUkv>q*=aD7MElRAGl=I^v|s#xuv(pEcH@
zscPQ-;y_up5IXH#8nYea=kJq_)teNt!;!d*^#oLlGG{yM2L#^6s#+S5UTOjpG>n7>
zz+KoNdg7&nRFX~yuWHDaUoghX<0^UQsHsx#7%DJ*MMI|+Qkyqp)Mj4fewABZ@UMq_
zOBAW8>G88(LOjMXt1O^&5+eeV0shd)&Qz)9s-^U#VdT#}PtvVJeC1I$YN4*hdn1b#F4q40Xb
zJAW5gOX5u+M0}Z0sJew$0|I4@Be*>DA}LQrj$@WR?}L12;NOT^LDoDcuETH@woPvN
zNurY(F|s&TRvpxUN1&=)=w9O{x3Fw{#2$F7FGCW5c`t-#=X?LlcgD2S8B~3XNLU2w;)}pqY`JhSD+N{TO6L1vpdx{-K
znY)sFd2TPzT^?RtyX>
z(<79Y0h+Bx9)4hQJ*t&SB3u&^-2#DwQ>C##Fh8XcLA!u)&v90vjH>+H{VJ9sOQ}A_
zFJX~JQ>f=2wEX)Y%AO-uJ?Z}dV^VTl`(lSzm|n)jNH-3&sWg`4yhjHZ&MEC^R~RR+
zTD5Ov2*k01KPt`CN=&5=hdC9G{30+qcy~pTeGx|ablX*Dn@l`zlHGYSL)5e*ESbz>
z17&L?#Vj$;XL%g(%z@#W7Yvdq=*mX~9>jI6J4+cYW(^Q3bChA)s@!;2Q?R|8T+kvj
z+RW_N`YNL3Se?y1u5pJaC7~y4la$!`5@kB-+A+E;g$$uuFvWMFUCY#DVyoN8o&b(3
zojTa2fez=E7-HOXf?IKP0B7vkBP;&^EO1C03E8)8VP$)#+v@sV^mjJ5Dohs<6Sc5#
zN2nn1LF66{U(@XFEp8HPcVuvWS3`m;HHcl!sndMAoaAuW_!isk7SrzV?Zm!hvV`24
zG?IMVfn5IpqB}O?-61WWV{hGQUBx_7vP_s|I3qc$_O@5P18=wK&LhzjlRO|Ze`fAH
zvLFXG`6RSpA1`q#3Vf-8meBYu;2Le4fYb>awl*|Xd56!Av_YXo~cQdCNX3ID@&M4#xoZC$qDpY~!!;w#p)D^&N{cB|6&Ua1rDvalYL8}d?$`8sjK(cVT
zb^MJ|Xr4m8);y2pQ$UBD)X-Wk-&0>lvxC58cn995WH=H(v+wY0tHNn
zU#PB#I~h(l)J2Vn=~Ju&Z#3A$D{?VXWCQn&bVVj%INb<{5OGeze-WS@j=8BKgh|@9
zwuV+@cv1IGMM(!!PkRYsUCkc{p~WI5)6?~-QRQVdbUF^CRM9glaC=m6sNGW{4c%(V
zYQ&JfNe9xUPrXq>xRaCGrd9dy4J540X0;_xx+q|{$rPxEZ&6Cou^6TcVTe>XH7-@S
z%|@{}Zj|+ngLLRW#)3Vhg;Z_%T98J~)Ev?#;K1Uhh?3mUQkOE+VnA5@%a34bS`vhu
zcB@fD8wYhgp#**J)`FF+Q2Nu59Oo4|g;4z6DWf^|?LmhvnCo}OMmtrQ^Nf*D!PNaK
z=m16t6dJ|8rD^+AWPmL3T;gw83v<%zlC;Q=NzLh
zd!je%_)}?Q>q$YJ)4ro0?;1*yiU}j9uR%*iX%@gLMd&`29tr3wMF#_e(xz{jSQg~!
zFrut4K~N~*3bPs)01CoR@j4<*y#-lNJfQjfD!AcTo`|_)=sjwUuBmn6XmmClQycej3iv|ZoX
zX_|bxo~L;gy}S{Zo_XX5k*}*U;16?EFLgy}-%RlJzuWI)$eBc&V7FoXGRgyNZl53w
z{DZd#H9fwE1hd6|;t9UnaRi&~kfz9O&tj2+a>M)XS%+e|B9n_intK#v)VxQoGe@Gp
z`el(_jj3K*{{BLGm?Ck22Wcc2&q&;JT3Sw<29q43#saN8o9-reW|DRt3$qeffxyYY
z8~_IuISach(`5-O);Yzj!jA+
zl=n3mgOiiqizn2MQDam9oc^@kqdCPnVmyIP5mD}^VWnY;262j&+l0nO)g@D
z@H~=nP_>h(+-mnE)REio8=AIN8ZvN9$d=nD1oKXc$d&_UYg6p@`}u`Q^)({f1%1_W
ztfLbjWp-DJ!)lM+MBt)|f-qg@A
zybO8@xhO0?RLRdZRJOuN!8qtEwjT-R)VDcSW<>-yIP5C0F_5vA86zZn)w2O|Nr*Ok
zHh-lrlHhDhBn0={jMeCI&zZ7}%8UlygRVQ$!4a6@BPO(?vb?xpB}QO60mv0+>?)%G
zGJ)HQe8ehJY|M@qQ-)ks`+G?yj}R!}bz_Q_Ej!F3G7q6OaWs2^nH~L31utrj=unha
zrgLd-puCeO9hBypk+wf_fsdnfsjn^Ov<#nXhz~$=YMfHevSCX}_~20I7CvaANp$;*8#aE+M;-(1zC-o`VVOpD2uUzR5z`qN)dC^yY1W9?G?
zrf6~=eZ$Zw099+so$jVQt
z6tXgntBm^!g_=y3l#%r{Wu+=Ms*lQ|`D8hkJo_G$`K)DmG6#_xk51IF$8jOVR_uO@
zT6R}DeXs!O%1@~^c{T8=RV>j+>;*1nY^lQ9vki33Qcgb1!|PHmhkE3Bvc{gdYQJ%*
zTu#|(Hva&!rl4t^#t5a%%2A}c5lN<5#J{}04o77b0gl=}RA=i%<=Q}~Ff-PhD&TQi
z#pq`_N1`K^P)-1-UNa-BGjh4dwLQSiCJ$UyT{d9qNKK2}lSULURg4mS>C3mcW9$Vq
zMBZy&DjX=#UVgX
zdbcDg8hzoKhZ*2x({r#9i2nfDrBnd&2hyZOF57nCk-m`XSf&VpI7*e&@=2*};wc_L9Qx6AZ3UrQ$#6pv&MADBCq3$RT%K4`5QD2y
zFO`u9X+mu~KpfM|u?_$xn$kbsNWl8@K}sxXiC2!an@6P#@)4Goub`%2vB)IQU6`&$
za63^(#`L1CSqCZc_nkYOOCRHkkz(D^@%5%$31FLH2M3{|H_ub~*T^5bb3~minT||=
z=A@GrMG?MW-3aYa9jY?N@f75J#1eN8N{Ouk>h~=pMt}PB_`(Mbv{WKk@;iG}io}i+
zKT5(#qVyq@aJdy`*_K1Gl&_}YRITPaM+6_GTaCQs+Rc!AS3Ki&V`xj2+`b`=9b&i`
zEPh^k)TZ7PI0b?F)oA7dH5m=Jrh8N6yLmCV^IUE%Go@8bu&T-TPDMTHwJ*mxaY)|^R2he7tLK~36r4beeMVWE^%yaqBq|lm7hDag@0g1<`q!6Iv6)Rfb
zx6HmS=U9F66UufKVkSPkN?af+bXP8P;f;k?Pf)a8GE06C3%{5(IQlWLRRa7b5
zqn}zjJwYS?0Ifi~Wa>(o=kyyJHtTCA5FjvEK-PV*2F
z0^ot3v<$K&3$#9|Q*%HMl-GPLis;kE}p)B$~E
z?vbcpcIis8E9A!{OaSPtbNG7IQ7KXPsaNN7GQTLC@bOG|dD21%)*ykN-P
zl%1xjEQr7Wn7}v#r6jJl{-RGQRM+r7KVnB-EjMtN2Ts3V~j&T5|z^eelet7tdZT4tdd
zDnof;awC;dSTe=AmDI)=K@qyVYDPi>%{4D_IU9Y+sgmt>{{XDy^yjTER5c~s&C|Xy
zd`9ps(rKRrX4YmEVLaK`K6AYB!X$rIn8)
zaB;_O!o1hRe;cnnK8H*A*Pu!UvU>;0M6T+N|VeyU(@5Bx!dI
z!pjtj8D1v`BmDNP2o4-Fls2I5f<9KG3a|$QF7&u0l>ioj-b?uGXDS&
zKljyV$i$t?NiO43#d3VWfF8X`s!1*%8-_j4HDWokbHzbdXusx78N(`Qh4Vy_+?Z0a%dkkjR^M~IBL5Drx?lNs9U%zjMb=I4*jS#i@Bw0vNH|!rxa?+
z6m`X8#c%)wI49DyEpI%hAU;5@IZsPi>5LM0MdWz65LvF1dFJ#v|G@xAE4C-2Yu-Se`
z{IpU?5zme{^n3+;aOrbSz0-9IMZU9>bY@13hC;0ykG&j0fqjG2XmneChoQYAf&_e^O3F0Rm#%nWC
z@cbGrjldQ{_d#oUKG&u^LPKb*7`a93Bqt=aT~|3wf>Ni_^>cY{%jG;X!G*lCmmY1a
z9N}La5OI*oO0eV;Q^~42p$O$qv8)x6LV?M(%$w{K^VJ
zB!?S`{{WV9e-Of-n+8XK~vNUV@>8T!0+o_bX3>Ga#xz@OwumkLsQae7~pYHh(mquwJ?Ax
z
zwbZHH;&RlQ0z+|D8b{bZYNZ=6J!$@9{{VWEgpm2V6G-fFmNddi>S=}8^U8JIWAg&|bua{>ako{yv2*gS`Pmv?gDT;JUg(#i)x
zzzU1Tb|R{RNybSv$2#phSnO2nifiHBPsCnb--=_73w32^^jKxwhmZMqWjL7fN0-b)
z(n1x-Nd>2$KbbS&cg0h@)NC|lS+z@%5S8}AyGuOJ*J2>cJ7f4%6URztpW+Q!h={(E
zY*-bL?2_fc&mdr=JmaqRBeD@(H7iOsvOB5MQCb(ZOT8aQc;JIP+yupZEl1431hraZl28`wbh+I(#8s
zYJ=?UG6D(zN2ZMC)9$sMk}XDl9E@i)$Nnv$D622xAzI2v8~@^a?lw
z=}Yz$C#Gt`Os~*XpssjY&)T=MF3~gOvP9$`)}bH+f!efg;R-XKdd@tPk6P)cCf24E
zJ>7<}U~)QB;v0rqn*RWHl^BHp8r071q5D*Jry{ON#YPzCB-Hlg6(mqgOjTXS6y!tZ
zV+Nj`Y3cW>B|AF~BkDJd$*udjXg&4`-ZgGrK8$x=FRfo8a6ScXhZZ#OpXUal8wbuvK#%Ux}xUx_%8|zuu
zAS|0lYPmh!&In)9sYeMg)s#ec6H0X_9D$loAj{W^QD+e3lU*t)9I4o2
zT+1gXjP?~9vD=K)-b&Lh1Zj-ZFq3KBjf8ip)s3E^Psn(wppJGm6C7R9VmP-P
zj`a3cUZS6CZ6dDcs`vBB7$gs+M4qL_$|79K6m7+4&gI)`fzQ1~b$2s2%;!GUN?9F`
z%nAC}T^et8aAGR!p>3f8K_gF+4-3>&WLYjBC5Cg~rBf2h&5R$;ty-h8%VxV{vBPd=
zJM6bEc1freU-kwm$rhb#IG%Z1xDkW;nvzd0G4qV^Ngdn(Vi+4Z>$gA3vUf(2rLLnz
z)VSD|ZiCiF{*=k(WKG!SnAdyd!94m?F6AztDEzA6oU|gPqVyV~eZ;H4?c8C*`CK6UYE>&YPs7V(KN1nm!-vE1
z%9*&-5k@)#OrJ{AEs&UX6S#kLPc9h0#spK{{Rg`Ni!+bcO5gAh{zKMb`(@n1|tU?)U6Ef!U4eJni2_PlgyEn
z%MXsaiBsf_XId
zw+YQV*g07M$TfL@i-Kz2_rkWt1?;?#^cm^jT6}@Y&q|G>c5hk-%0(f-#wbyNNEghT%3e)OV8?n_L**l)q%eR#
z`t(uF5am8Ijuey=3aSr5#WjSGuvR?#nifX~B9?GxQMmN!UnS2I_P$9EQ!!tiPCau{
zE?1<3^QMcF6~C57M@;%tglQ6qCXf)v0hjvG%@!_JD0xRinx584i3iOq&m*Wc5QSL$
zulZ7{Nde0p!jp<|pR;-1$s
zC*CHZ&^C@ExzNL?cHLGY)Hud-T*a28^1DW-m0aW2tfs8$Kq=4Gq2bwU5fi3|QZcY~
zQNi`8VAZ2VE99vgiNmfyu4+A2a&hD-_uK_pOVN&0{&fz6Emng%B)Hx;n3o68RI4;4
zdVaNsZL2abm!2xZYN|f)rR?UmAGCusq)5*r6vl~MZO781w$v>o`|d~7W~>8j<8)U5
z{c9Pjk!2?qLH(Zm{_K(Kifon)P=S>`ql(aouoy`{DwJ++TbdPGsFw&x=H;@uR{5kP
z`;$x-)3*qjBD$3p@c{;rBJxhVQDvsU4ED&`Bzjik&%S(cUl=St4SZ22N|R+Mil3-
z6{7lD#<*EGFLBc*qH6?9`CyJa*FsPv~ia1t^~5y8i;7eJhQgOx^A^A+4LB}OwzgCX9d
z1a>sOQV*C&A3X_N8V4VAXXt3MWg);j3}>mvY3UgUBI6y7Re(R3rX=#?V?sT!PIF4(
z!vx>Bus8#$#T=+)i8DlS8$N_n&E%@lw%nMQ6(j;agmM{?fPAOS|JZ}*o$(^UTsG3Fu
zg$EhqK9mm1JCZ;ZBrBZ!pzQ-cH?Ja#FeWdj>UIsMYWj3HF*=niG)maW!N%ZleZ_K6
z{4ViNfG7Ug_#NUUh5&Ong>@&zoVW)9FO`-9*lh}`I?JN7gs{$bOZ~+4xK1@54BSmw_(7;V;nQj{{59
zt^#Tn!+<=gNC|O{epV{o25C5^JG0VEQJIg~{NoYyVzzv732{v;*E
zj*lezeyL?Bhf&dWX#UT2X)ZqNBjADPdLG30>F!~g8BA>DSx-1P&19+TV_3@Qy>BiN
zaX8!#94W{j*ji3W2H-=Qk9LBy$HM0smEVxmOVf*;Nr4ll(*hB
zAbv*HBi|K-@bYZ0Z_3T9O;N-A?0Ct>KA-)1y%w^_GOdH#HO+@%&f}ivG~;t354~LP
z4!abw9%ox^;v9Ukw_gMqD
zHDWDNGy#kA^{zfGW=Hu9dvVTl)}{W`d;kYt4h=)YBSncj81?8+RzHu^aYFUaYd
z;|8xPsVJnL+q+RFr!m3jIPFmk9=fu=vlOpmrq*YQ`gm{S)y<{!M{IY|#v3V;Oj0lW
zw2YIIy+P$lwir~?z9GJ)buN_feTx3xGWQm8Nw_);4>l)YJlKgm90mu53V5t2?k*;M
zvM(O>CH{wPt=l9sNi-9}+wCtcWZNtc({i4KW2&}QhXfF7WrU`dsLEJ~x1rECiKms8
zM<;Tg8Ad;qU~1L^Nbd!;-i5@bm=oVRO!
zG);cm8++Lc8(AV^Aiq@^W{Y<#AnMSZvgaVO@-!@MUKzaDWFwvk?Ok*zQiPq1Ct7yg
z&G7C0`+b*4)GwO)JD^UDtpK|<6zU^wyb06>%J3K=W$58IBZ^qh?-hsx9XeDO{vVD@
zIpa+=FDvCWdk|QOBygQD}lKFc&QD+139R(Y)xFW*Ds6_O%f|MPXzZhR9)tj
z-4G5&cT
z5mC28^sOnhjHfkZMBjFslj>>D_f0ao(@f`bvlKfHLmG0slgk=q86%;kC2>36=$9wd
z(YcqSRCzw8l?ge?spT4O#|iP9_N({}`#fuUABR3Lc
z%-;aNYYzdRhJUm-!&@H_X=>(6DJD}L)}JZI?`@|{NdxdRw1C(FgAOa>twa6^3HwO?
z&werg0EDmhF8!*kbvrVmJAEVKEVEi_cF>NHuBB{}CCYRjT=02de+^9N)oIS0mDfl5
zJ-+)I#xx}xGw=I?5^paUApF4f9P>=Co_}WVh~Kxjgm3SB4f{|2$`jt{nr)bp&&9qE
z@h+Kr_KsK-OJj19$&3cVq7u2pdBWGY_$%TkfW9K$TxlK;@hn$5rJC-((QYl|wZF9(
z1eqK6RnUpVJIW?;KmA$yz3We>37^;`PCbzJ*lKT0|#R~3n
zh*+ORRRES^Z~}~yNCT~KOB*>qbo8m<>Bn|fd;b6o=~~==8k+5HHD|PIsC1il4Ra(a
zc9{Zagxpn2wh%J_SZ2Oh{ki-n{h|9~{0G;3S7~N#?sYVb*u)Eg96Ftg5hD;hz{x2E
zKnsG(Ltr&~?7t3R@du5^#l1&a*K{8g9Wk`IC(>_VVHJ?Ld1JTRG%}z(t2-t)uo=h;
zkCy)c;GzB()_e=`kKuohb&rf*D%N4W{?sbk=h)18N0T<3+o2Yg&}0(ZkD0vum;?ev
zbNfoK;%85@rnWxay^_aBR9mZrggk~TYjqsC0FfMM23X-v6+t*03Z-Q|{;6&*G}zxx
zx0mJ<+{c@T8U7fUVKOj7F((~RX1OabgB~m&67)Zeo-X*it6XZjBo~m!r~Q@~pcdB~
zOpwU1ZAN3|L`z|Y
zuGwzqki4!v3m%Kf`9>6HfGY1niiRa(;DJ^aCM*F{QBm0mY$AQ(Ru}IIYFnMafl_%+
zyw);Nm5ozYEJWc>4NPZM8OKV`EdIRHcTL^N{A!gH_6}salU(XD%1Wu@r9fPc0H**4
zN*XSH_fzRz3K4>_%TiKfU+#)k=qctPMLl6a2NbS`ld#25Mo6h+;Bo8PoUtGSijgBs
zecBsITwxTTX9AvQBegM2o!yomXfwtcyJ8w
zv{3;rx!joa%~y44}QS)4B3Pp=iIwh+KD
zP!o>T6|KC99^_G+Qzz9Vv?p-f!=}?%%1xbg;~H?hkn8DL7i?+(#baMjX6=iaWGDNm
zDf*7RdvL_M4C5IjlT;OMB_>B+J?naOWh9$tnToF$DOO1n4aZt;tUR}qr&f(cm1z*TvRX8zZupa)NQ!k^J#it-L+1UI5n67SBRlDE&=UkwZQZ1MX^6zP+|x(cH2hRW+->E`wIS
zqFwlFTY}&jtlf`qCoTGlvt!}CNpU&Sh$Efz(_tl
z^*P_zHDut~x~oMkWLuW=SyIu*Aw@&amM|)X)$Gy}_KTwb0FG)Iq})8qozCC%)0}>F
z3jra{Fg-DhQ5e|auUa{0OOx-D?^P+T^&s
zo>-m-TvO0(j3FnEywgc-$BYBp`qf3)ZCwp`VPldB$4r_}Ek{az4mwgd-T8X^)u|ac
z>PA&}XB1$13UzBn2#MPGn4JyOD__j9|vheT67>Zjq7i
znovC!lq-5s%#Ha3!0ssv1>+RpjZ7#@9GZzwL8@WqdsAC};Vf){*Y1{0zxx1WVn(xmEPYAx0?cIXLCfRzF{OK{4`InDc
z`95@h*~&3hjzY_i!!+e982$XQH~qW+0EGrA3+{<>p0wtVoCA?ho*~M~Etx}tt_VGV
zspJZJj8%BSUNKWDmKh9b`$R{xLwu0K3m_hpc~XvdgG;pU+n&8Cb}%^4K9qANH5QNt
zaoV07gOks-Gs=*E4L&q^K2=kZ)|gq3x^<2(GTl|!@FNaJ-^Ua}%%}sBg#G0Yk^HJY
z_&tqCHW807W`G>3P~{t~JUe%$2Iv_N>~hWbnD?hE
zZsQcJfWDJ4KP;L2YGt{VPbRX8WalQQhj0fhe>z;KcV@l4@K*VoA46IYYEb|Q;<@y8
z9Ow0^;+^(`gU7vRI#Fo2)Nwi(^*F~ysiW0^;66VZ=N9Cxvn%d0=j9^+W6<@dt;>jf
z=0a3(*gY!;N+ycC?sYM0F*w6Wzlo_*{w>(^1#)8kJY&mmtxXl|1pe|Mexk6R9bDcu
zRFT$wq9+*+DmG*!{Q3GC!-r9cKnwEw16E+Yh`48RKs<`c(6OXuu4!ym;2yOUw!Cfx
zRFKQ0NL7r|DF=~Kr`R@tF4ewc$>8JsX+_EpMpUgm(Vs1&5y;I~Hb{iC0stMoYj4Vl
z+Z%`BOZ}RUoL0YClt^?S&VBUq&z2k&=m$gVL9{&>Z!pS>a?-Xu{?&tcP~sPBJNt6k+_b-##a;5=f?^l}Ng#W5rU@I?mNG%eU&v50ZR}$N9;iwm
zO!H3?`Fk>;Zou3RdS=29I3pP8K!uwOBy16lxC9zp(E`zT&I|-?ZbAKN5Q11U5tjLW
zl^NLWzq#-0Pf>xu@99hn5ZtPuR$Oh(PPcO?-4pOS)hCVJcB>G5YGiS^5}^oO5Idh*
z0E5N;4e)QoSmAFN_oPNg9Lw_z9&mEJatPwGFZFMY-T^X6JH_M8
z9wjn)_SR`@szn&aL&QEw*aN|gg1{W8Jf4(D?n&AXNhILppU$5IvPU3wau{?Xp{-3}
zYZLe<#a}M$Hw|>o+i;FKR<~y8#1?6iU9urkBdbm
zm~|#dvIairr6=x#-0aph^HoR+4^};@UV9u>eHU5KykDk8;x7to7W$2psu?bAW)41#
zI;(oC07r3F7^1i?hw!Y_n1{t70PZ~J(wt`s#SdDA9}ay9IHx-y`PqOyGHT;D)QLv=
zlPvfd1NEmY!z1^Z8Nu(0fgN@bdyE54R0;C9=N-GBT5UynZ
zkU{jRBbo7sf(dPhTQT9(DW>yT-G48u`)^n3=DSv06$98hfNCG
zwwj+$(}0jMIrhg&(o0y!YUGwA>5di2YGqz%)*8K=Y5JY7nG0a!C0SS;hE@a=V0jD(
zAdG?yKT5FFtmPJZxiVXsw%h9Mg$z$2CIf_Q;0ZWaCm2(~=&fu~;}4#_s_YP3i)XZ*
z5#y0c$sB+cV0wZNQI6HaJi=<}GheC26fN|z1cn(CEWL6%Rc%YbwzsC@>e9g&&Qb2{
zjJ{G`GGHu<;DZIifHq7y`F6J(Sv0LJMu86RI$NDT=F0OV_FYau12RGYJOEuY{AyHwVsOgCu>^2Oy=V2>joi`{DaJN?m^v`~MB>JIUPs(xhUP^I^Rl30
z6o{h{Tq2NJGlm0tjiiOSCY!Obx06;cVbgV>(tWNNrzoh&Bgm^dFDi}Y{L0F}GD^R^
ziGV6dERCCM;yA7)8fES5pKpx;^5#$8IQfp&3J5Y_5;o;!P+dAYrjW%{lhGM+Q|wOG
zY&hG;wLx`kLCEYX=+z=WyW5lRQC!@>aps&8(;e%kHENl-QoW4jMqa1$sUTJjz@&S)
zBqc)f4|;r3$HxG1+Pfn*&MJ^mdJitjFa-iksSI&JWX|B+)NV_h?I(_LS}774RbWA<
zn}p~`>r~>DjOBV7lg~fht<t+U##RZ&I`
zmoqC-SCdDUL$Wc?y%T7+*2pBD-wU3+8jq~UxnEY5ib=`jcBxTUo(JVh@JJ5jjN|E7
z?R3PEeA%Z|%7VS~?d??Sr4d4v-IFOM!g0oZDa{`9ouJjbfdT-EN99!}l!KA-@y#e!
zzQs9~OKHfP6pks(7DG8;S7ejcy3JjdJa_-%^bW
z9$8cil$7B{Wl_yv@VCYf4r}`4eh$|nz3}gdT}g|>`uqYJt^hbKZs4?6OP-AjzrHyG
zY%^n$YLALMFX68o=yLdD#+n7LtEbFCdrQdoC}Gjmj1+O%l!hIRO-e4Nr7DU#8t*j4
z%tt%-V`*M<=~Qi6O-oZ3@{(+A8#Y59EzH2Oyxoq)QwbTI+rhd|miW@w3C9+W!Cv
zJXLRh4VV#K>B0jI&YngG)U1pYT?_Ls&Ln4E$4Pc^heIbF9H^Rh0SbBsOyiPBM0r5etvu`MKTz`ug$})o&KT
z;bUPN`C5r#`@6ltztQphN*Pee{3IVN?#zn6F#iC-Vl?aP?}I)I)>=?f9~ez;+562T
z)^e~o{{Xf}^Qm!-v?4O7-R^yLs%i^(F3Ow-k)o9z>+``Mv|!Afw+u=)3C@Ht*H5sQsj!HnXGC}&vMPGl6e(406q;z
zZ^s6t{{WVi%P8F5Ev7k$1CBk-G!`fCXY-|E2*5PnSDcT|qmdNYt&oL?=9s?SJ5sZj
z7|1@<41vvS4KyOxdxbf06q;z@=9P|4Y05V(A$-qjlX>(N2;}0VtW61ohzH)Mq_Kc9
zY5taKnK@rOl}l_eW)nqXMnaEs4&sT2s}<%dezF>694
zS}l*>}LRsS41gEoYI_b51vSM~6s@IB@h^|;VCY2&G9DsPMe8&&JDQZ66%2}Ke)w5@(
z2oM~B$5UC*JHwU(0C%ZBwkDJBOO?(un#yWS=!1h@dU*O)=+v*@`fu>su!5VL3;XV_9ko^GOIybS&Wi09sdstYFJ`
zll{bx1M|%zG_G>ZO)grhlcS@xDv8lY(fCUzdAUp-h