pytorch
diff --git a/‎core/runtime/TRTEngine.cpp‎
Lines changed: 1 addition & 1 deletion b/‎core/runtime/TRTEngine.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/runtime/TRTEngineProfiler.cpp‎
Lines changed: 1 addition & 1 deletion b/‎core/runtime/TRTEngineProfiler.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/runtime/TRTEngineProfiler.h‎
Lines changed: 0 additions & 2 deletions b/‎core/runtime/TRTEngineProfiler.h‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎py/torch_tensorrt/dynamo/_compiler.py‎
Lines changed: 29 additions & 29 deletions b/‎py/torch_tensorrt/dynamo/_compiler.py‎
Lines changed: 29 additions & 29 deletions
diff --git a/‎py/torch_tensorrt/dynamo/_defaults.py‎
Lines changed: 1 addition & 0 deletions b/‎py/torch_tensorrt/dynamo/_defaults.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py‎
Lines changed: 5 additions & 6 deletions b/‎py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎py/torch_tensorrt/dynamo/debug/_Debugger.py‎
Lines changed: 23 additions & 50 deletions b/‎py/torch_tensorrt/dynamo/debug/_Debugger.py‎
Lines changed: 23 additions & 50 deletions
diff --git a/‎py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py‎
Lines changed: 3 additions & 2 deletions b/‎py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py‎
Lines changed: 3 additions & 2 deletions
@@ -325,7 +325,7 @@ void TRTEngine::set_profiling_paths() {
   output_profile_path = std::filesystem::path{profile_path_prefix + "/" + name + "_output_profile.trace"}.string();
   enqueue_profile_path = std::filesystem::path{profile_path_prefix + "/" + name + "_enqueue_profile.trace"}.string();
   trt_engine_profile_path =
-      std::filesystem::path{profile_path_prefix + "/" + name + "_engine_exectuion_profile.trace"}.string();
+      std::filesystem::path{profile_path_prefix + "/" + name + "_engine_execution_profile.trace"}.string();
   cuda_graph_debug_path = std::filesystem::path{profile_path_prefix + "/" + name + "_cudagraph.dot"}.string();
 }
 
 
@@ -62,7 +62,7 @@ void dump_trace(const std::string& path, const TRTEngineProfiler& value) {
     } else { // kTREX
       out << "    \"timeMs\": " << elem.time << "," << std::endl;
       out << "    \"averageMs\": " << elem.time / elem.count << "," << std::endl;
-      out << "    \"percentage\": " << (elem.time * 100.0 / ts) << "," << std::endl;
+      out << "    \"percentage\": " << (elem.time * 100.0 / ts) << std::endl;
     }
     out << "  }," << std::endl;
     running_time += elem.time;
 
@@ -12,8 +12,6 @@ namespace runtime {
 
 enum TraceFormat { kPERFETTO, kTREX };
 
-// Forward declare the function
-
 struct TRTEngineProfiler : public nvinfer1::IProfiler {
   struct Record {
     float time{0};
 
@@ -66,7 +66,6 @@ def cross_compile_for_windows(
         Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]]
     ] = _defaults.ENABLED_PRECISIONS,
     engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY,
-    debug: bool = False,
     num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
@@ -140,7 +139,6 @@ def cross_compile_for_windows(
         assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False
         sparse_weights (bool): Enable sparsity for convolution and fully connected layers.
         enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels
-        debug (bool): Enable debuggable engine
         capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
         num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
         workspace_size (int): Maximum size of workspace given to TensorRT
@@ -187,9 +185,9 @@ def cross_compile_for_windows(
             f"Cross compile for windows is only supported on x86-64 Linux architecture, current platform: {platform.system()=}, {platform.architecture()[0]=}"
         )
 
-    if debug:
+    if kwargs.get("debug", False):
         warnings.warn(
-            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` to configure debugging options.",
+            "`debug` is deprecated. Please use `with torch_tensorrt.dynamo.Debugger(...)` to wrap your compilation call to enable debugging functionality.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -404,7 +402,6 @@ def compile(
         Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]]
     ] = _defaults.ENABLED_PRECISIONS,
     engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY,
-    debug: bool = False,
     num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
@@ -480,7 +477,6 @@ def compile(
         assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False
         sparse_weights (bool): Enable sparsity for convolution and fully connected layers.
         enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels
-        debug (bool): Enable debuggable engine
         capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
         num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
         workspace_size (int): Maximum size of workspace given to TensorRT
@@ -523,9 +519,9 @@ def compile(
         torch.fx.GraphModule: Compiled FX Module, when run it will execute via TensorRT
     """
 
-    if debug:
+    if kwargs.get("debug", False):
         warnings.warn(
-            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` for debugging functionality",
+            "`debug` is deprecated. Please use `with torch_tensorrt.dynamo.Debugger(...)` to wrap your compilation call to enable debugging functionality",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -732,7 +728,7 @@ def compile_module(
     settings: CompilationSettings = CompilationSettings(),
     engine_cache: Optional[BaseEngineCache] = None,
     *,
-    _debugger_settings: Optional[DebuggerConfig] = None,
+    _debugger_config: Optional[DebuggerConfig] = None,
 ) -> torch.fx.GraphModule:
     """Compile a traced FX module
 
@@ -935,29 +931,36 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
 
             trt_modules[name] = trt_module
 
-            if _debugger_settings:
+            if _debugger_config:
 
-                if _debugger_settings.save_engine_profile:
+                if _debugger_config.save_engine_profile:
                     if settings.use_python_runtime:
-                        if _debugger_settings.profile_format == "trex":
-                            logger.warning(
+                        if _debugger_config.profile_format != "cudagraph":
+                            raise ValueError(
                                 "Profiling with TREX can only be enabled when using the C++ runtime. Python runtime profiling only support cudagraph visualization."
                             )
+                        else:
                             trt_module.enable_profiling()
                     else:
-                        path = os.path.join(
-                            _debugger_settings.logging_dir, "engine_visualization"
-                        )
-                        os.makedirs(path, exist_ok=True)
-                        trt_module.enable_profiling(
-                            profiling_results_dir=path,
-                            profile_format=_debugger_settings.profile_format,
-                        )
-
-                if _debugger_settings.save_layer_info:
+                        if _debugger_config.profile_format == "cudagraph":
+                            raise ValueError(
+                                "Profiling with Cudagraph can only be enabled when using the Python runtime. C++ runtime profiling only support TREX/Perfetto visualization."
+                            )
+                        else:
+                            path = os.path.join(
+                                _debugger_config.logging_dir,
+                                "engine_visualization_profile",
+                            )
+                            os.makedirs(path, exist_ok=True)
+                            trt_module.enable_profiling(
+                                profiling_results_dir=path,
+                                profile_format=_debugger_config.profile_format,
+                            )
+
+                if _debugger_config.save_layer_info:
                     with open(
                         os.path.join(
-                            _debugger_settings.logging_dir, "engine_layer_info.json"
+                            _debugger_config.logging_dir, "engine_layer_info.json"
                         ),
                         "w",
                     ) as f:
@@ -990,7 +993,6 @@ def convert_exported_program_to_serialized_trt_engine(
     enabled_precisions: (
         Set[torch.dtype | dtype] | Tuple[torch.dtype | dtype]
     ) = _defaults.ENABLED_PRECISIONS,
-    debug: bool = False,
     assume_dynamic_shape_support: bool = _defaults.ASSUME_DYNAMIC_SHAPE_SUPPORT,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     min_block_size: int = _defaults.MIN_BLOCK_SIZE,
@@ -1052,7 +1054,6 @@ def convert_exported_program_to_serialized_trt_engine(
                         torch.randn((1, 3, 224, 244)) # Use an example tensor and let torch_tensorrt infer settings
                     ]
         enabled_precisions (Optional[Set[torch.dtype | _enums.dtype]]): The set of datatypes that TensorRT can use
-        debug (bool): Whether to print out verbose debugging information
         workspace_size (int): Workspace TRT is allowed to use for the module (0 is default)
         min_block_size (int): Minimum number of operators per TRT-Engine Block
         torch_executed_ops (Set[str]): Set of operations to run in Torch, regardless of converter coverage
@@ -1092,9 +1093,9 @@ def convert_exported_program_to_serialized_trt_engine(
     Returns:
         bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs
     """
-    if debug:
+    if kwargs.get("debug", False):
         warnings.warn(
-            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` to configure debugging options.",
+            "`debug` is deprecated. Please use `with torch_tensorrt.dynamo.Debugger(...)` to wrap your compilation call to enable debugging functionality.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -1181,7 +1182,6 @@ def convert_exported_program_to_serialized_trt_engine(
     compilation_options = {
         "assume_dynamic_shape_support": assume_dynamic_shape_support,
         "enabled_precisions": enabled_precisions,
-        "debug": debug,
         "workspace_size": workspace_size,
         "min_block_size": min_block_size,
         "torch_executed_ops": torch_executed_ops,
 
@@ -49,6 +49,7 @@
 L2_LIMIT_FOR_TILING = -1
 USE_DISTRIBUTED_MODE_TRACE = False
 OFFLOAD_MODULE_TO_CPU = False
+DEBUG_LOGGING_DIR = os.path.join(tempfile.gettempdir(), "torch_tensorrt/debug_logs")
 
 
 def default_device() -> Device:
 
@@ -45,9 +45,9 @@
     get_trt_tensor,
     to_torch,
 )
-from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, deallocate_module, to_torch_device
 from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig
 from torch_tensorrt.dynamo.debug._supports_debugger import cls_supports_debugger
+from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, deallocate_module, to_torch_device
 from torch_tensorrt.fx.observer import Observer
 from torch_tensorrt.logging import TRT_LOGGER
 
@@ -82,13 +82,13 @@ def __init__(
         compilation_settings: CompilationSettings = CompilationSettings(),
         engine_cache: Optional[BaseEngineCache] = None,
         *,
-        _debugger_settings: Optional[DebuggerConfig] = None,
+        _debugger_config: Optional[DebuggerConfig] = None,
     ):
         super().__init__(module)
 
         self.logger = TRT_LOGGER
         self.builder = trt.Builder(self.logger)
-        self._debugger_settings = _debugger_settings
+        self._debugger_config = _debugger_config
         flag = 0
         if compilation_settings.use_explicit_typing:
             STRONGLY_TYPED = 1 << (int)(
@@ -209,7 +209,7 @@ def _populate_trt_builder_config(
     ) -> trt.IBuilderConfig:
         builder_config = self.builder.create_builder_config()
 
-        if self._debugger_settings and self._debugger_settings.engine_builder_monitor:
+        if self._debugger_config and self._debugger_config.engine_builder_monitor:
             builder_config.progress_monitor = TRTBulderMonitor()
 
         if self.compilation_settings.workspace_size != 0:
@@ -220,8 +220,7 @@ def _populate_trt_builder_config(
         if version.parse(trt.__version__) >= version.parse("8.2"):
             builder_config.profiling_verbosity = (
                 trt.ProfilingVerbosity.DETAILED
-                if self._debugger_settings
-                and self._debugger_settings.save_engine_profile
+                if self._debugger_config and self._debugger_config.save_engine_profile
                 else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
             )
 
 
@@ -8,6 +8,7 @@
 from unittest import mock
 
 import torch
+from torch_tensorrt.dynamo._defaults import DEBUG_LOGGING_DIR
 from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig
 from torch_tensorrt.dynamo.debug._supports_debugger import (
     _DEBUG_ENABLED_CLS,
@@ -32,7 +33,7 @@ def __init__(
         save_engine_profile: bool = False,
         profile_format: str = "perfetto",
         engine_builder_monitor: bool = True,
-        logging_dir: str = tempfile.gettempdir(),
+        logging_dir: str = DEBUG_LOGGING_DIR,
         save_layer_info: bool = False,
     ):
         """Initialize a debugger for TensorRT conversion.
@@ -47,8 +48,9 @@ def __init__(
                 after execution of a lowering pass. Defaults to None.
             save_engine_profile (bool): Whether to save TensorRT engine profiling information.
                 Defaults to False.
-            profile_format (str): Format for profiling data. Can be either 'perfetto' or 'trex'.
-                If you need to generate engine graph using the profiling files, set it to 'trex' .
+            profile_format (str): Format for profiling data. Choose from 'perfetto', 'trex', 'cudagraph'.
+                If you need to generate engine graph using the profiling files, set it to 'trex' and use the C++ runtime.
+                If you need to generate cudagraph visualization, set it to 'cudagraph'.
                 Defaults to 'perfetto'.
             engine_builder_monitor (bool): Whether to monitor TensorRT engine building process.
                 Defaults to True.
@@ -92,7 +94,7 @@ def __init__(
     def __enter__(self) -> None:
         self.original_lvl = _LOGGER.getEffectiveLevel()
         self.rt_level = torch.ops.tensorrt.get_logging_level()
-        dictConfig(self.get_customized_logging_config())
+        dictConfig(self.get_logging_config(self.log_level))
 
         if self.capture_fx_graph_before or self.capture_fx_graph_after:
             self.old_pre_passes, self.old_post_passes = (
@@ -126,22 +128,22 @@ def __enter__(self) -> None:
         self._context_stack = contextlib.ExitStack()
 
         for f in _DEBUG_ENABLED_FUNCS:
-            f.__kwdefaults__["_debugger_settings"] = self.cfg
+            f.__kwdefaults__["_debugger_config"] = self.cfg
 
         [
             self._context_stack.enter_context(
                 mock.patch.object(
                     c,
                     "__init__",
-                    functools.partialmethod(c.__init__, _debugger_settings=self.cfg),
+                    functools.partialmethod(c.__init__, _debugger_config=self.cfg),
                 )
             )
             for c in _DEBUG_ENABLED_CLS
         ]
 
     def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None:
 
-        dictConfig(self.get_default_logging_config())
+        dictConfig(self.get_logging_config(None))
         torch.ops.tensorrt.set_logging_level(self.rt_level)
         if self.capture_fx_graph_before or self.capture_fx_graph_after:
             ATEN_PRE_LOWERING_PASSES.passes, ATEN_POST_LOWERING_PASSES.passes = (
@@ -151,50 +153,13 @@ def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None:
         self.debug_file_dir = tempfile.TemporaryDirectory().name
 
         for f in _DEBUG_ENABLED_FUNCS:
-            f.__kwdefaults__["_debugger_settings"] = None
+            f.__kwdefaults__["_debugger_config"] = None
 
         self._context_stack.close()
 
-    def get_customized_logging_config(self) -> dict[str, Any]:
-        config = {
-            "version": 1,
-            "disable_existing_loggers": False,
-            "formatters": {
-                "brief": {
-                    "format": "%(asctime)s - %(levelname)s - %(message)s",
-                    "datefmt": "%H:%M:%S",
-                },
-                "standard": {
-                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-                    "datefmt": "%Y-%m-%d %H:%M:%S",
-                },
-            },
-            "handlers": {
-                "file": {
-                    "level": self.log_level,
-                    "class": "logging.FileHandler",
-                    "filename": f"{self.cfg.logging_dir}/torch_tensorrt_logging.log",
-                    "formatter": "standard",
-                },
-                "console": {
-                    "level": self.log_level,
-                    "class": "logging.StreamHandler",
-                    "formatter": "brief",
-                },
-            },
-            "loggers": {
-                "": {  # root logger
-                    "handlers": ["file", "console"],
-                    "level": self.log_level,
-                    "propagate": True,
-                },
-            },
-            "force": True,
-        }
-        return config
-
-    def get_default_logging_config(self) -> dict[str, Any]:
-        config = {
+    def get_logging_config(self, log_level: Optional[int] = None) -> dict[str, Any]:
+        level = log_level if log_level is not None else self.original_lvl
+        config: dict[str, Any] = {
             "version": 1,
             "disable_existing_loggers": False,
             "formatters": {
@@ -209,18 +174,26 @@ def get_default_logging_config(self) -> dict[str, Any]:
             },
             "handlers": {
                 "console": {
-                    "level": self.original_lvl,
+                    "level": level,
                     "class": "logging.StreamHandler",
                     "formatter": "brief",
                 },
             },
             "loggers": {
                 "": {  # root logger
                     "handlers": ["console"],
-                    "level": self.original_lvl,
+                    "level": level,
                     "propagate": True,
                 },
             },
             "force": True,
         }
+        if log_level is not None:
+            config["handlers"]["file"] = {
+                "level": level,
+                "class": "logging.FileHandler",
+                "filename": f"{self.cfg.logging_dir}/torch_tensorrt_logging.log",
+                "formatter": "standard",
+            }
+            config["loggers"][""]["handlers"].append("file")
         return config
@@ -1,12 +1,13 @@
-import tempfile
 from dataclasses import dataclass
 
+from torch_tensorrt.dynamo._defaults import DEBUG_LOGGING_DIR
+
 
 @dataclass
 class DebuggerConfig:
     log_level: str = "debug"
     save_engine_profile: bool = False
     engine_builder_monitor: bool = True
-    logging_dir: str = tempfile.gettempdir()
+    logging_dir: str = DEBUG_LOGGING_DIR
     profile_format: str = "perfetto"
     save_layer_info: bool = False
Original file line number	Diff line number	Diff line change
`@@ -325,7 +325,7 @@ void TRTEngine::set_profiling_paths() {`
`325`	`325`	`output_profile_path = std::filesystem::path{profile_path_prefix + "/" + name + "_output_profile.trace"}.string();`
`326`	`326`	`enqueue_profile_path = std::filesystem::path{profile_path_prefix + "/" + name + "_enqueue_profile.trace"}.string();`
`327`	`327`	`trt_engine_profile_path =`
`328`		`- std::filesystem::path{profile_path_prefix + "/" + name + "_engine_exectuion_profile.trace"}.string();`
	`328`	`+ std::filesystem::path{profile_path_prefix + "/" + name + "_engine_execution_profile.trace"}.string();`
`329`	`329`	`cuda_graph_debug_path = std::filesystem::path{profile_path_prefix + "/" + name + "_cudagraph.dot"}.string();`
`330`	`330`	`}`
`331`	`331`
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ void dump_trace(const std::string& path, const TRTEngineProfiler& value) {`
`62`	`62`	`} else { // kTREX`
`63`	`63`	`out << " \"timeMs\": " << elem.time << "," << std::endl;`
`64`	`64`	`out << " \"averageMs\": " << elem.time / elem.count << "," << std::endl;`
`65`		`- out << " \"percentage\": " << (elem.time * 100.0 / ts) << "," << std::endl;`
	`65`	`+ out << " \"percentage\": " << (elem.time * 100.0 / ts) << std::endl;`
`66`	`66`	`}`
`67`	`67`	`out << " }," << std::endl;`
`68`	`68`	`running_time += elem.time;`