pytorch
diff --git a/‎py/torch_tensorrt/dynamo/__init__.py
Lines changed: 1 addition & 1 deletion b/‎py/torch_tensorrt/dynamo/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/_compiler.py
Lines changed: 54 additions & 8 deletions b/‎py/torch_tensorrt/dynamo/_compiler.py
Lines changed: 54 additions & 8 deletions
diff --git a/‎py/torch_tensorrt/dynamo/_defaults.py
Lines changed: 0 additions & 1 deletion b/‎py/torch_tensorrt/dynamo/_defaults.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/_settings.py
Lines changed: 0 additions & 2 deletions b/‎py/torch_tensorrt/dynamo/_settings.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎py/torch_tensorrt/dynamo/_tracer.py
Lines changed: 2 additions & 6 deletions b/‎py/torch_tensorrt/dynamo/_tracer.py
Lines changed: 2 additions & 6 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Lines changed: 9 additions & 3 deletions b/‎py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Lines changed: 9 additions & 3 deletions
@@ -14,9 +14,9 @@
         load_cross_compiled_exported_program,
         save_cross_compiled_exported_program,
     )
-    from ._Debugger import Debugger
     from ._exporter import export
     from ._refit import refit_module_weights
     from ._settings import CompilationSettings
     from ._SourceIR import SourceIR
     from ._tracer import trace
+    from .debug._Debugger import Debugger
@@ -2,6 +2,7 @@
 
 import collections.abc
 import logging
+import os
 import platform
 import warnings
 from typing import Any, Collection, List, Optional, Sequence, Set, Tuple, Union
@@ -31,6 +32,8 @@
 from torch_tensorrt.dynamo.conversion._ConverterRegistry import (
     DYNAMO_CONVERTERS as CONVERTERS,
 )
+from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig
+from torch_tensorrt.dynamo.debug._supports_debugger import fn_supports_debugger
 from torch_tensorrt.dynamo.lowering import (
     get_decompositions,
     post_lowering,
@@ -42,7 +45,6 @@
     get_output_metadata,
     parse_graph_io,
     prepare_inputs,
-    set_log_level,
     to_torch_device,
     to_torch_tensorrt_device,
 )
@@ -64,7 +66,7 @@ def cross_compile_for_windows(
         Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]]
     ] = _defaults.ENABLED_PRECISIONS,
     engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY,
-    debug: bool = _defaults.DEBUG,
+    debug: bool = False,
     num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
@@ -186,7 +188,11 @@ def cross_compile_for_windows(
         )
 
     if debug:
-        set_log_level(logger.parent, logging.DEBUG)
+        warnings.warn(
+            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` to configure debugging options.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
 
     if "truncate_long_and_double" in kwargs.keys():
         if truncate_double is not _defaults.TRUNCATE_DOUBLE:
@@ -297,7 +303,6 @@ def cross_compile_for_windows(
         "enabled_precisions": (
             enabled_precisions if enabled_precisions else _defaults.ENABLED_PRECISIONS
         ),
-        "debug": debug,
         "device": device,
         "assume_dynamic_shape_support": assume_dynamic_shape_support,
         "workspace_size": workspace_size,
@@ -399,7 +404,7 @@ def compile(
         Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]]
     ] = _defaults.ENABLED_PRECISIONS,
     engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY,
-    debug: bool = _defaults.DEBUG,
+    debug: bool = False,
     num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
@@ -518,6 +523,13 @@ def compile(
         torch.fx.GraphModule: Compiled FX Module, when run it will execute via TensorRT
     """
 
+    if debug:
+        warnings.warn(
+            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` for debugging functionality",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
     if "truncate_long_and_double" in kwargs.keys():
         if truncate_double is not _defaults.TRUNCATE_DOUBLE:
             raise ValueError(
@@ -639,7 +651,6 @@ def compile(
         "enabled_precisions": (
             enabled_precisions if enabled_precisions else _defaults.ENABLED_PRECISIONS
         ),
-        "debug": debug,
         "device": device,
         "assume_dynamic_shape_support": assume_dynamic_shape_support,
         "workspace_size": workspace_size,
@@ -713,12 +724,15 @@ def compile(
     return trt_gm
 
 
+@fn_supports_debugger
 def compile_module(
     gm: torch.fx.GraphModule,
     sample_arg_inputs: Sequence[Input],
     sample_kwarg_inputs: Optional[dict[Any, Any]] = None,
     settings: CompilationSettings = CompilationSettings(),
     engine_cache: Optional[BaseEngineCache] = None,
+    *,
+    _debugger_settings: Optional[DebuggerConfig] = None,
 ) -> torch.fx.GraphModule:
     """Compile a traced FX module
 
@@ -921,6 +935,34 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
 
             trt_modules[name] = trt_module
 
+            if _debugger_settings:
+
+                if _debugger_settings.save_engine_profile:
+                    if settings.use_python_runtime:
+                        if _debugger_settings.profile_format == "trex":
+                            logger.warning(
+                                "Profiling with TREX can only be enabled when using the C++ runtime. Python runtime profiling only support cudagraph visualization."
+                            )
+                            trt_module.enable_profiling()
+                    else:
+                        path = os.path.join(
+                            _debugger_settings.logging_dir, "engine_visualization"
+                        )
+                        os.makedirs(path, exist_ok=True)
+                        trt_module.enable_profiling(
+                            profiling_results_dir=path,
+                            profile_format=_debugger_settings.profile_format,
+                        )
+
+                if _debugger_settings.save_layer_info:
+                    with open(
+                        os.path.join(
+                            _debugger_settings.logging_dir, "engine_layer_info.json"
+                        ),
+                        "w",
+                    ) as f:
+                        f.write(trt_module.get_layer_info())
+
     # Parse the graph I/O and store it in dryrun tracker
     parse_graph_io(gm, dryrun_tracker)
 
@@ -948,7 +990,7 @@ def convert_exported_program_to_serialized_trt_engine(
     enabled_precisions: (
         Set[torch.dtype | dtype] | Tuple[torch.dtype | dtype]
     ) = _defaults.ENABLED_PRECISIONS,
-    debug: bool = _defaults.DEBUG,
+    debug: bool = False,
     assume_dynamic_shape_support: bool = _defaults.ASSUME_DYNAMIC_SHAPE_SUPPORT,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     min_block_size: int = _defaults.MIN_BLOCK_SIZE,
@@ -1051,7 +1093,11 @@ def convert_exported_program_to_serialized_trt_engine(
         bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs
     """
     if debug:
-        set_log_level(logger.parent, logging.DEBUG)
+        warnings.warn(
+            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` to configure debugging options.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
 
     if "truncate_long_and_double" in kwargs.keys():
         if truncate_double is not _defaults.TRUNCATE_DOUBLE:
 
@@ -6,7 +6,6 @@
 from torch_tensorrt._enums import EngineCapability, dtype
 
 ENABLED_PRECISIONS = {dtype.f32}
-DEBUG = False
 DEVICE = None
 DISABLE_TF32 = False
 ASSUME_DYNAMIC_SHAPE_SUPPORT = False
 
@@ -7,7 +7,6 @@
 from torch_tensorrt.dynamo._defaults import (
     ASSUME_DYNAMIC_SHAPE_SUPPORT,
     CACHE_BUILT_ENGINES,
-    DEBUG,
     DISABLE_TF32,
     DLA_GLOBAL_DRAM_SIZE,
     DLA_LOCAL_DRAM_SIZE,
@@ -101,7 +100,6 @@ class CompilationSettings:
     """
 
     enabled_precisions: Set[dtype] = field(default_factory=lambda: ENABLED_PRECISIONS)
-    debug: bool = DEBUG
     workspace_size: int = WORKSPACE_SIZE
     min_block_size: int = MIN_BLOCK_SIZE
     torch_executed_ops: Collection[Target] = field(default_factory=set)
 
@@ -7,8 +7,8 @@
 import torch
 from torch.export import Dim, export
 from torch_tensorrt._Input import Input
-from torch_tensorrt.dynamo._defaults import DEBUG, default_device
-from torch_tensorrt.dynamo.utils import get_torch_inputs, set_log_level, to_torch_device
+from torch_tensorrt.dynamo._defaults import default_device
+from torch_tensorrt.dynamo.utils import get_torch_inputs, to_torch_device
 
 logger = logging.getLogger(__name__)
 
@@ -70,10 +70,6 @@ def trace(
     if kwarg_inputs is None:
         kwarg_inputs = {}
 
-    debug = kwargs.get("debug", DEBUG)
-    if debug:
-        set_log_level(logger.parent, logging.DEBUG)
-
     device = to_torch_device(kwargs.get("device", default_device()))
     torch_arg_inputs = get_torch_inputs(arg_inputs, device)
     torch_kwarg_inputs = get_torch_inputs(kwarg_inputs, device)
 
@@ -46,6 +46,8 @@
     to_torch,
 )
 from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, deallocate_module, to_torch_device
+from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig
+from torch_tensorrt.dynamo.debug._supports_debugger import cls_supports_debugger
 from torch_tensorrt.fx.observer import Observer
 from torch_tensorrt.logging import TRT_LOGGER
 
@@ -70,6 +72,7 @@ class TRTInterpreterResult(NamedTuple):
     requires_output_allocator: bool
 
 
+@cls_supports_debugger
 class TRTInterpreter(torch.fx.Interpreter):  # type: ignore[misc]
     def __init__(
         self,
@@ -78,12 +81,14 @@ def __init__(
         output_dtypes: Optional[Sequence[dtype]] = None,
         compilation_settings: CompilationSettings = CompilationSettings(),
         engine_cache: Optional[BaseEngineCache] = None,
+        *,
+        _debugger_settings: Optional[DebuggerConfig] = None,
     ):
         super().__init__(module)
 
         self.logger = TRT_LOGGER
         self.builder = trt.Builder(self.logger)
-
+        self._debugger_settings = _debugger_settings
         flag = 0
         if compilation_settings.use_explicit_typing:
             STRONGLY_TYPED = 1 << (int)(
@@ -204,7 +209,7 @@ def _populate_trt_builder_config(
     ) -> trt.IBuilderConfig:
         builder_config = self.builder.create_builder_config()
 
-        if self.compilation_settings.debug:
+        if self._debugger_settings and self._debugger_settings.engine_builder_monitor:
             builder_config.progress_monitor = TRTBulderMonitor()
 
         if self.compilation_settings.workspace_size != 0:
@@ -215,7 +220,8 @@ def _populate_trt_builder_config(
         if version.parse(trt.__version__) >= version.parse("8.2"):
             builder_config.profiling_verbosity = (
                 trt.ProfilingVerbosity.DETAILED
-                if self.compilation_settings.debug
+                if self._debugger_settings
+                and self._debugger_settings.save_engine_profile
                 else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
             )