Skip to content

Commit b592f37

Browse files
[Feat][HPI] Enable MKL-DNN as default and suggest backend configurations more intelligently (#4169)
* HPI supports mkldnn by default and allows finer config selection * Update HPI prior knowledge Co-authored-by: zhang-prog <zhang-prog@users.noreply.github.com> * BEVFusion does not support CPU inference * Fix missing deps --------- Co-authored-by: zhang-prog <zhang-prog@users.noreply.github.com>
1 parent c6db316 commit b592f37

File tree

6 files changed

+852
-387
lines changed

6 files changed

+852
-387
lines changed

paddlex/inference/models/common/static_infer.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
suggest_inference_backend_and_config,
3434
)
3535
from ...utils.model_paths import get_model_paths
36-
from ...utils.pp_option import PaddlePredictorOption
36+
from ...utils.pp_option import PaddlePredictorOption, get_default_run_mode
3737
from ...utils.trt_config import DISABLE_TRT_HALF_OPS_CONFIG
3838

3939
CACHE_DIR = ".cache"
@@ -407,17 +407,10 @@ def _create(
407407
assert self._option.device_type == "cpu"
408408
config.disable_gpu()
409409
if "mkldnn" in self._option.run_mode:
410-
if hasattr(config, "set_mkldnn_cache_capacity"):
411-
config.enable_mkldnn()
412-
if "bf16" in self._option.run_mode:
413-
config.enable_mkldnn_bfloat16()
414-
config.set_mkldnn_cache_capacity(
415-
self._option.mkldnn_cache_capacity
416-
)
417-
else:
418-
logging.warning(
419-
"MKL-DNN is not available. We will disable MKL-DNN."
420-
)
410+
config.enable_mkldnn()
411+
if "bf16" in self._option.run_mode:
412+
config.enable_mkldnn_bfloat16()
413+
config.set_mkldnn_cache_capacity(self._option.mkldnn_cache_capacity)
421414
else:
422415
if hasattr(config, "disable_mkldnn"):
423416
config.disable_mkldnn()
@@ -641,10 +634,19 @@ def _determine_backend_and_config(self):
641634
)
642635
backend_config = self._config.backend_config or {}
643636

644-
if backend == "paddle" and not backend_config:
645-
logging.warning(
646-
"The Paddle Inference backend is selected with the default configuration. This may not provide optimal performance."
647-
)
637+
if backend == "paddle":
638+
if not backend_config:
639+
is_default_config = True
640+
elif backend_config.keys() != {"run_mode"}:
641+
is_default_config = False
642+
else:
643+
is_default_config = backend_config["run_mode"] == get_default_run_mode(
644+
self._config.pdx_model_name, self._config.device_type
645+
)
646+
if is_default_config:
647+
logging.warning(
648+
"The Paddle Inference backend is selected with the default configuration. This may not provide optimal performance."
649+
)
648650

649651
return backend, backend_config
650652

paddlex/inference/utils/hpi.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import importlib.util
1818
import json
1919
import platform
20+
from collections import defaultdict
2021
from functools import lru_cache
2122
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
2223

@@ -30,6 +31,7 @@
3031
get_paddle_version,
3132
)
3233
from ...utils.flags import USE_PIR_TRT
34+
from .misc import is_mkldnn_available
3335
from .model_paths import ModelPaths
3436

3537

@@ -186,24 +188,23 @@ def suggest_inference_backend_and_config(
186188
hpi_config.pdx_model_name
187189
].copy()
188190

191+
if not is_mkldnn_available():
192+
if "paddle_mkldnn" in supported_pseudo_backends:
193+
supported_pseudo_backends.remove("paddle_mkldnn")
194+
189195
# XXX
190196
if not (
191197
USE_PIR_TRT
192198
and importlib.util.find_spec("tensorrt")
193199
and ctypes.util.find_library("nvinfer")
194200
):
195-
if (
196-
"paddle_tensorrt" in supported_pseudo_backends
197-
or "paddle_tensorrt_fp16" in supported_pseudo_backends
198-
):
199-
supported_pseudo_backends.append("paddle")
200201
if "paddle_tensorrt" in supported_pseudo_backends:
201202
supported_pseudo_backends.remove("paddle_tensorrt")
202203
if "paddle_tensorrt_fp16" in supported_pseudo_backends:
203204
supported_pseudo_backends.remove("paddle_tensorrt_fp16")
204205

205-
candidate_backends = []
206-
backend_to_pseudo_backend = {}
206+
supported_backends = []
207+
backend_to_pseudo_backends = defaultdict(list)
207208
for pb in supported_pseudo_backends:
208209
if pb.startswith("paddle"):
209210
backend = "paddle"
@@ -213,34 +214,38 @@ def suggest_inference_backend_and_config(
213214
backend = pb
214215
if available_backends is not None and backend not in available_backends:
215216
continue
216-
candidate_backends.append(backend)
217-
backend_to_pseudo_backend[backend] = pb
217+
supported_backends.append(backend)
218+
backend_to_pseudo_backends[backend].append(pb)
218219

219-
if not candidate_backends:
220+
if not supported_backends:
220221
return None, "No inference backend can be selected."
221222

222223
if hpi_config.backend is not None:
223-
if hpi_config.backend not in candidate_backends:
224+
if hpi_config.backend not in supported_backends:
224225
return (
225226
None,
226227
f"{repr(hpi_config.backend)} is not a supported inference backend.",
227228
)
228229
suggested_backend = hpi_config.backend
230+
pseudo_backends = backend_to_pseudo_backends[suggested_backend]
231+
pseudo_backend = pseudo_backends[0]
229232
else:
230-
# The first backend is the preferred one.
231-
suggested_backend = candidate_backends[0]
233+
# Prefer the first one.
234+
suggested_backend = supported_backends[0]
235+
pseudo_backend = supported_pseudo_backends[0]
232236

233237
suggested_backend_config = {}
234238
if suggested_backend == "paddle":
235-
pseudo_backend = backend_to_pseudo_backend["paddle"]
236239
assert pseudo_backend in (
237240
"paddle",
238241
"paddle_fp16",
239242
"paddle_mkldnn",
240243
"paddle_tensorrt",
241244
"paddle_tensorrt_fp16",
242245
), pseudo_backend
243-
if pseudo_backend == "paddle_fp16":
246+
if pseudo_backend == "paddle":
247+
suggested_backend_config.update({"run_mode": "paddle"})
248+
elif pseudo_backend == "paddle_fp16":
244249
suggested_backend_config.update({"run_mode": "paddle_fp16"})
245250
elif pseudo_backend == "paddle_mkldnn":
246251
suggested_backend_config.update({"run_mode": "mkldnn"})
@@ -250,7 +255,6 @@ def suggest_inference_backend_and_config(
250255
# TODO: Check if the target device supports FP16.
251256
suggested_backend_config.update({"run_mode": "trt_fp16"})
252257
elif suggested_backend == "tensorrt":
253-
pseudo_backend = backend_to_pseudo_backend["tensorrt"]
254258
assert pseudo_backend in ("tensorrt", "tensorrt_fp16"), pseudo_backend
255259
if pseudo_backend == "tensorrt_fp16":
256260
suggested_backend_config.update({"precision": "fp16"})

0 commit comments

Comments
 (0)