Added torchtrt.dynamo.debugger. Cleaning settings.debug

cehongwang · cehongwang · commit 1f2b35e3834f · 2025-06-06T17:45:13.000Z
diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py
@@ -14,6 +14,7 @@
         load_cross_compiled_exported_program,
         save_cross_compiled_exported_program,
     )
+    from ._debugger import Debugger
     from ._exporter import export
     from ._refit import refit_module_weights
     from ._settings import CompilationSettings
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -520,7 +520,13 @@ def compile(
     """
 
     if debug:
-        set_log_level(logger.parent, logging.DEBUG)
+        warnings.warn(
+            "The 'debug' argument is deprecated and will be removed in a future release. "
+            "Please use the torch_tensorrt.dynamo.Debugger context manager for debugging and graph capture.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
     if "truncate_long_and_double" in kwargs.keys():
         if truncate_double is not _defaults.TRUNCATE_DOUBLE:
             raise ValueError(
@@ -642,7 +648,6 @@ def compile(
         "enabled_precisions": (
             enabled_precisions if enabled_precisions else _defaults.ENABLED_PRECISIONS
         ),
-        "debug": debug,
         "device": device,
         "assume_dynamic_shape_support": assume_dynamic_shape_support,
         "workspace_size": workspace_size,
@@ -745,7 +750,7 @@ def compile_module(
 
     # Check the number of supported operations in the graph
     num_supported_ops, total_ops = partitioning.get_graph_converter_support(
-        gm, settings.debug, settings.torch_executed_ops
+        gm, settings.torch_executed_ops
     )
 
     dryrun_tracker.total_ops_in_graph = total_ops
@@ -797,7 +802,6 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
             logger.info("Partitioning the graph via the fast partitioner")
             partitioned_module, supported_ops = partitioning.fast_partition(
                 gm,
-                verbose=settings.debug,
                 min_block_size=settings.min_block_size,
                 torch_executed_ops=settings.torch_executed_ops,
                 require_full_compilation=settings.require_full_compilation,
@@ -818,7 +822,6 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
         logger.info("Partitioning the graph via the global partitioner")
         partitioned_module, supported_ops = partitioning.global_partition(
             gm,
-            verbose=settings.debug,
             min_block_size=settings.min_block_size,
             torch_executed_ops=settings.torch_executed_ops,
             require_full_compilation=settings.require_full_compilation,
@@ -925,17 +928,21 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
             )
 
             trt_modules[name] = trt_module
+            from torch_tensorrt.dynamo._debugger import (
+                DEBUG_FILE_DIR,
+                SAVE_ENGINE_PROFILE,
+            )
 
-            if settings.debug and settings.engine_vis_dir:
+            if SAVE_ENGINE_PROFILE:
                 if settings.use_python_runtime:
                     logger.warning(
                         "Profiling can only be enabled when using the C++ runtime"
                     )
                 else:
-                    if not os.path.exists(settings.engine_vis_dir):
-                        os.makedirs(settings.engine_vis_dir)
+                    path = os.path.join(DEBUG_FILE_DIR, "engine_visualization")
+                    os.makedirs(path, exist_ok=True)
                     trt_module.enable_profiling(
-                        profiling_results_dir=settings.engine_vis_dir,
+                        profiling_results_dir=path,
                         profile_format="trex",
                     )
 
diff --git a/py/torch_tensorrt/dynamo/_debugger.py b/py/torch_tensorrt/dynamo/_debugger.py
@@ -0,0 +1,177 @@
+import logging
+import os
+import tempfile
+from logging.config import dictConfig
+from typing import Any, List, Optional
+
+import torch
+from torch_tensorrt.dynamo.lowering import (
+    ATEN_POST_LOWERING_PASSES,
+    ATEN_PRE_LOWERING_PASSES,
+)
+
+_LOGGER = logging.getLogger("torch_tensorrt [TensorRT Conversion Context]")
+GRAPH_LEVEL = 5
+logging.addLevelName(GRAPH_LEVEL, "GRAPHS")
+
+# Debugger States
+DEBUG_FILE_DIR = tempfile.TemporaryDirectory().name
+SAVE_ENGINE_PROFILE = False
+
+
+class Debugger:
+    def __init__(
+        self,
+        level: str,
+        capture_fx_graph_before: Optional[List[str]] = None,
+        capture_fx_graph_after: Optional[List[str]] = None,
+        save_engine_profile: bool = False,
+        logging_dir: Optional[str] = None,
+    ):
+
+        if level != "graphs" and (capture_fx_graph_after or save_engine_profile):
+            _LOGGER.warning(
+                "Capture FX Graph or Draw Engine Graph is only supported when level is 'graphs'"
+            )
+
+        if level == "debug":
+            self.level = logging.DEBUG
+        elif level == "info":
+            self.level = logging.INFO
+        elif level == "warning":
+            self.level = logging.WARNING
+        elif level == "error":
+            self.level = logging.ERROR
+        elif level == "internal_errors":
+            self.level = logging.CRITICAL
+        elif level == "graphs":
+            self.level = GRAPH_LEVEL
+
+        else:
+            raise ValueError(
+                f"Invalid level: {level}, allowed levels are: debug, info, warning, error, internal_errors, graphs"
+            )
+
+        self.capture_fx_graph_before = capture_fx_graph_before
+        self.capture_fx_graph_after = capture_fx_graph_after
+        global SAVE_ENGINE_PROFILE
+        SAVE_ENGINE_PROFILE = save_engine_profile
+
+        if logging_dir is not None:
+            global DEBUG_FILE_DIR
+            DEBUG_FILE_DIR = logging_dir
+        os.makedirs(DEBUG_FILE_DIR, exist_ok=True)
+
+    def __enter__(self) -> None:
+        self.original_lvl = _LOGGER.getEffectiveLevel()
+        self.rt_level = torch.ops.tensorrt.get_logging_level()
+        dictConfig(self.get_config())
+
+        if self.level == GRAPH_LEVEL:
+            self.old_pre_passes, self.old_post_passes = (
+                ATEN_PRE_LOWERING_PASSES.passes,
+                ATEN_POST_LOWERING_PASSES.passes,
+            )
+            pre_pass_names = [p.__name__ for p in self.old_pre_passes]
+            post_pass_names = [p.__name__ for p in self.old_post_passes]
+            path = os.path.join(DEBUG_FILE_DIR, "lowering_passes_visualization")
+            if self.capture_fx_graph_before is not None:
+                pre_vis_passes = [
+                    p for p in self.capture_fx_graph_before if p in pre_pass_names
+                ]
+                post_vis_passes = [
+                    p for p in self.capture_fx_graph_before if p in post_pass_names
+                ]
+                ATEN_PRE_LOWERING_PASSES.insert_debug_pass_before(pre_vis_passes, path)
+                ATEN_POST_LOWERING_PASSES.insert_debug_pass_before(
+                    post_vis_passes, path
+                )
+            if self.capture_fx_graph_after is not None:
+                pre_vis_passes = [
+                    p for p in self.capture_fx_graph_after if p in pre_pass_names
+                ]
+                post_vis_passes = [
+                    p for p in self.capture_fx_graph_after if p in post_pass_names
+                ]
+                ATEN_PRE_LOWERING_PASSES.insert_debug_pass_after(pre_vis_passes, path)
+                ATEN_POST_LOWERING_PASSES.insert_debug_pass_after(post_vis_passes, path)
+
+    def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None:
+
+        dictConfig(self.get_default_config())
+        torch.ops.tensorrt.set_logging_level(self.rt_level)
+        if self.level == GRAPH_LEVEL and self.capture_fx_graph_after:
+            ATEN_PRE_LOWERING_PASSES.passes, ATEN_POST_LOWERING_PASSES.passes = (
+                self.old_pre_passes,
+                self.old_post_passes,
+            )
+
+    def get_config(self) -> dict[str, Any]:
+        config = {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "brief": {
+                    "format": "%(asctime)s - %(levelname)s - %(message)s",
+                    "datefmt": "%H:%M:%S",
+                },
+                "standard": {
+                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                    "datefmt": "%Y-%m-%d %H:%M:%S",
+                },
+            },
+            "handlers": {
+                "file": {
+                    "level": self.level,
+                    "class": "logging.FileHandler",
+                    "filename": f"{DEBUG_FILE_DIR}/torch_tensorrt_logging.log",
+                    "formatter": "standard",
+                },
+                "console": {
+                    "level": self.level,
+                    "class": "logging.StreamHandler",
+                    "formatter": "brief",
+                },
+            },
+            "loggers": {
+                "": {  # root logger
+                    "handlers": ["file", "console"],
+                    "level": self.level,
+                    "propagate": True,
+                },
+            },
+            "force": True,
+        }
+        return config
+
+    def get_default_config(self) -> dict[str, Any]:
+        config = {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "brief": {
+                    "format": "%(asctime)s - %(levelname)s - %(message)s",
+                    "datefmt": "%H:%M:%S",
+                },
+                "standard": {
+                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                    "datefmt": "%Y-%m-%d %H:%M:%S",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "level": self.original_lvl,
+                    "class": "logging.StreamHandler",
+                    "formatter": "brief",
+                },
+            },
+            "loggers": {
+                "": {  # root logger
+                    "handlers": ["console"],
+                    "level": self.original_lvl,
+                    "propagate": True,
+                },
+            },
+            "force": True,
+        }
+        return config
diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py
@@ -15,7 +15,6 @@
 DLA_SRAM_SIZE = 1048576
 ENGINE_CAPABILITY = EngineCapability.STANDARD
 WORKSPACE_SIZE = 0
-ENGINE_VIS_DIR = None
 MIN_BLOCK_SIZE = 5
 PASS_THROUGH_BUILD_FAILURES = False
 MAX_AUX_STREAMS = None
diff --git a/py/torch_tensorrt/dynamo/_refit.py b/py/torch_tensorrt/dynamo/_refit.py
@@ -39,7 +39,6 @@
     check_module_output,
     get_model_device,
     get_torch_inputs,
-    set_log_level,
     to_torch_device,
     to_torch_tensorrt_device,
 )
@@ -72,7 +71,6 @@ def construct_refit_mapping(
     interpreter = TRTInterpreter(
         module,
         inputs,
-        logger_level=(trt.Logger.VERBOSE if settings.debug else trt.Logger.WARNING),
         output_dtypes=output_dtypes,
         compilation_settings=settings,
     )
@@ -266,9 +264,6 @@ def refit_module_weights(
         not settings.immutable_weights
     ), "Refitting is not enabled. Please recompile the engine with immutable_weights=False."
 
-    if settings.debug:
-        set_log_level(logger.parent, logging.DEBUG)
-
     device = to_torch_tensorrt_device(settings.device)
     if arg_inputs:
         if not isinstance(arg_inputs, collections.abc.Sequence):
@@ -304,7 +299,6 @@ def refit_module_weights(
         try:
             new_partitioned_module, supported_ops = partitioning.fast_partition(
                 new_gm,
-                verbose=settings.debug,
                 min_block_size=settings.min_block_size,
                 torch_executed_ops=settings.torch_executed_ops,
             )
@@ -320,7 +314,6 @@ def refit_module_weights(
     if not settings.use_fast_partitioner:
         new_partitioned_module, supported_ops = partitioning.global_partition(
             new_gm,
-            verbose=settings.debug,
             min_block_size=settings.min_block_size,
             torch_executed_ops=settings.torch_executed_ops,
         )
diff --git a/py/torch_tensorrt/dynamo/_settings.py b/py/torch_tensorrt/dynamo/_settings.py
@@ -1,3 +1,4 @@
+import logging
 from dataclasses import dataclass, field
 from typing import Collection, Optional, Set, Tuple, Union
 
@@ -7,7 +8,6 @@
 from torch_tensorrt.dynamo._defaults import (
     ASSUME_DYNAMIC_SHAPE_SUPPORT,
     CACHE_BUILT_ENGINES,
-    DEBUG,
     DISABLE_TF32,
     DLA_GLOBAL_DRAM_SIZE,
     DLA_LOCAL_DRAM_SIZE,
@@ -18,7 +18,6 @@
     ENABLE_WEIGHT_STREAMING,
     ENABLED_PRECISIONS,
     ENGINE_CAPABILITY,
-    ENGINE_VIS_DIR,
     HARDWARE_COMPATIBLE,
     IMMUTABLE_WEIGHTS,
     L2_LIMIT_FOR_TILING,
@@ -102,7 +101,7 @@ class CompilationSettings:
     """
 
     enabled_precisions: Set[dtype] = field(default_factory=lambda: ENABLED_PRECISIONS)
-    debug: bool = DEBUG
+    debug: bool = logging.root.manager.root.level <= logging.DEBUG
     workspace_size: int = WORKSPACE_SIZE
     min_block_size: int = MIN_BLOCK_SIZE
     torch_executed_ops: Collection[Target] = field(default_factory=set)
diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
@@ -75,7 +75,6 @@ def __init__(
         self,
         module: torch.fx.GraphModule,
         input_specs: Sequence[Input],
-        logger_level: trt.ILogger.Severity = trt.ILogger.Severity.WARNING,
         output_dtypes: Optional[Sequence[dtype]] = None,
         compilation_settings: CompilationSettings = CompilationSettings(),
         engine_cache: Optional[BaseEngineCache] = None,
diff --git a/py/torch_tensorrt/dynamo/conversion/_conversion.py b/py/torch_tensorrt/dynamo/conversion/_conversion.py
@@ -3,7 +3,6 @@
 import logging
 from typing import Any, List, Optional, Sequence
 
-import tensorrt as trt
 import torch
 from torch_tensorrt._enums import dtype
 from torch_tensorrt._features import ENABLED_FEATURES
@@ -60,7 +59,6 @@ def interpret_module_to_result(
     interpreter = TRTInterpreter(
         module,
         inputs,
-        logger_level=(trt.Logger.VERBOSE if settings.debug else trt.Logger.WARNING),
         output_dtypes=output_dtypes,
         compilation_settings=settings,
         engine_cache=engine_cache,
diff --git a/py/torch_tensorrt/dynamo/partitioning/_adjacency_partitioner.py b/py/torch_tensorrt/dynamo/partitioning/_adjacency_partitioner.py
@@ -13,14 +13,15 @@
 )
 from torch.fx.passes.tools_common import CALLABLE_NODE_OPS, NodeSet
 from torch_tensorrt.dynamo._defaults import (
-    DEBUG,
     MIN_BLOCK_SIZE,
     REQUIRE_FULL_COMPILATION,
 )
 from torch_tensorrt.dynamo.conversion._ConverterRegistry import (
     DYNAMO_CONVERTERS as CONVERTERS,
 )
-from torch_tensorrt.dynamo.conversion._ConverterRegistry import ConverterRegistry
+from torch_tensorrt.dynamo.conversion._ConverterRegistry import (
+    ConverterRegistry,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -250,7 +251,6 @@ def starter_nodes(self) -> Tuple[NodeSet, NodeSet]:
 
 def partition(
     gm: torch.fx.GraphModule,
-    verbose: bool = DEBUG,
     min_block_size: int = MIN_BLOCK_SIZE,
     torch_executed_ops: Collection[Target] = set(),
     require_full_compilation: bool = REQUIRE_FULL_COMPILATION,
@@ -286,7 +286,6 @@ def partition(
 
     partitioned_graph = partitioner.partition_graph()
 
-    if verbose:
-        supported_ops.print_support_overview(partitioner.num_trt_accelerated_subgraphs)
+    supported_ops.print_support_overview(partitioner.num_trt_accelerated_subgraphs)
 
     return partitioned_graph, supported_ops
diff --git a/py/torch_tensorrt/dynamo/partitioning/_global_partitioner.py b/py/torch_tensorrt/dynamo/partitioning/_global_partitioner.py
diff --git a/py/torch_tensorrt/dynamo/partitioning/common.py b/py/torch_tensorrt/dynamo/partitioning/common.py
diff --git a/tools/debug/engine_visualization/README.md b/tools/debug/engine_visualization/README.md
diff --git a/tools/debug/engine_visualization/draw_engine_graph_example.py b/tools/debug/engine_visualization/draw_engine_graph_example.py

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,7 @@`
`14`	`14`	`load_cross_compiled_exported_program,`
`15`	`15`	`save_cross_compiled_exported_program,`
`16`	`16`	`)`
	`17`	`+ from ._debugger import Debugger`
`17`	`18`	`from ._exporter import export`
`18`	`19`	`from ._refit import refit_module_weights`
`19`	`20`	`from ._settings import CompilationSettings`