vllm-project
diff --git a/‎debug_logging.sh
Lines changed: 8 additions & 0 deletions b/‎debug_logging.sh
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/offline_inference/llm_engine_example.py
Lines changed: 1 addition & 9 deletions b/‎examples/offline_inference/llm_engine_example.py
Lines changed: 1 addition & 9 deletions
diff --git a/‎vllm/v1/engine/core.py
Lines changed: 41 additions & 1 deletion b/‎vllm/v1/engine/core.py
Lines changed: 41 additions & 1 deletion
diff --git a/‎vllm/v1/worker/gpu_model_runner.py
Lines changed: 15 additions & 0 deletions b/‎vllm/v1/worker/gpu_model_runner.py
Lines changed: 15 additions & 0 deletions
diff --git a/‎vllm/v1/worker/il_config.py
Lines changed: 187 additions & 0 deletions b/‎vllm/v1/worker/il_config.py
Lines changed: 187 additions & 0 deletions
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Set the logging level to DEBUG
+export VLLM_LOGGING_LEVEL=DEBUG
+
+# Run your command with debug logging enabled
+# Replace this with your actual command to run vLLM
+python "$@"
@@ -17,15 +17,7 @@ def create_test_prompts() -> list[tuple[str, SamplingParams]]:
         (
             "A robot may not injure a human being",
             SamplingParams(temperature=0.0, logprobs=1, prompt_logprobs=1),
-        ),
-        (
-            "To be or not to be,",
-            SamplingParams(temperature=0.8, top_k=5, presence_penalty=0.2),
-        ),
-        (
-            "What is the meaning of life?",
-            SamplingParams(n=2, temperature=0.8, top_p=0.95, frequency_penalty=0.1),
-        ),
+        )
     ]
 
 
 
@@ -73,6 +73,34 @@ def __init__(self,
 
         # Setup Model.
         self.model_executor = executor_class(vllm_config)
+
+        
+        # Hook the dumping logic
+        from vllm.v1.worker.il_config import IntermediateLoggingConfig
+        
+        # Create a configuration for intermediate logging
+        logger.info("Setting up intermediate tensor logging")
+        
+        # Define the regex patterns to match module names
+        # These patterns will match any module with "layers.0" or "embed_tokens" in its name
+        module_patterns = ["layers\\.0", "embed_tokens"]
+        logger.info(f"Using module name regex patterns: {module_patterns}")
+        
+        il_config = IntermediateLoggingConfig(
+            output_dir="/tmp/vllm_intermediates",  # Directory to save intermediates
+            module_name_regex=module_patterns,     # Log layer 0 and embedding modules
+            log_step_ids=[0, 1, 2, 3, 4, 5],       # Log steps 0-5
+            max_tensor_size=1000000,               # Limit to 1M elements
+            enabled=True                           # Enable logging
+        )
+        
+        logger.info(f"Intermediate logging config: {il_config.to_dict()}")
+        
+        # Register hooks for intermediate tensor logging
+        logger.info("Calling register_intermediate_hooks via collective_rpc")
+        self.collective_rpc("register_intermediate_hooks", args=(il_config,))
+        logger.info("Finished setting up intermediate tensor logging")
+
         if executor_fail_callback is not None:
             self.model_executor.register_failure_callback(
                 executor_fail_callback)
@@ -215,7 +243,19 @@ def abort_requests(self, request_ids: list[str]):
 
     def execute_model(self, scheduler_output: SchedulerOutput):
         try:
-            return self.model_executor.execute_model(scheduler_output)
+            # Increment the step counter for intermediate logging
+            try:
+                from vllm.v1.worker.intermediates_logging import increment_step
+                logger.info("Incrementing intermediate logging step counter before model execution")
+                increment_step()
+            except Exception as e:
+                logger.warning(f"Failed to increment intermediate logging step counter: {e}")
+            
+            # Execute the model
+            result = self.model_executor.execute_model(scheduler_output)
+            
+            logger.info(f"Model execution completed for step with {scheduler_output.total_num_scheduled_tokens} tokens")
+            return result
         except Exception as err:
             # We do not want to catch BaseException here since we're only
             # interested in dumping info when the exception is due to an
 
@@ -68,6 +68,9 @@
                     initialize_kv_cache_for_kv_sharing,
                     sanity_check_mm_encoder_outputs, scatter_mm_placeholders)
 
+from vllm.v1.worker.intermediates_logging import (
+    disable_intermediate_logging_decorator
+)
 if TYPE_CHECKING:
     import xgrammar as xgr
     import xgrammar.kernels.apply_token_bitmask_inplace_torch_compile as xgr_torch_compile  # noqa: E501
@@ -2152,7 +2155,19 @@ def _dummy_pooler_run(
                 raise e
         return pooler_output
 
+    @torch.inference_mode()
+    @disable_intermediate_logging_decorator
     def profile_run(self) -> None:
+        """Run a profile of the model with dummy inputs.
+        
+        This method is decorated with disable_intermediate_logging_decorator to automatically
+        disable intermediate tensor logging during profiling to avoid overhead.
+        """
+        # The decorator automatically disables intermediate logging
+        self._run_profile_internal()
+        torch._dynamo.config.guard_nn_modules = True
+    
+    def _run_profile_internal(self) -> None:
         # Profile with multimodal encoder & encoder cache.
         # TODO: handle encoder-decoder models once we support them.
         if (self.is_multimodal_model and self.max_num_encoder_input_tokens > 0
 
@@ -0,0 +1,187 @@
+"""
+Configuration for intermediate tensor logging.
+
+This module defines the configuration data class for intermediate tensor logging,
+which controls how intermediate tensors are captured and saved during model execution.
+"""
+
+import dataclasses
+import re
+from pathlib import Path
+from typing import Optional, Pattern, List, Set, Union
+
+
+@dataclasses.dataclass
+class IntermediateLoggingConfig:
+    """Configuration for intermediate tensor logging."""
+    
+    # Directory where to save the intermediate tensors
+    output_dir: str = "/tmp/vllm_intermediates"
+    
+    # Regex patterns to filter modules by name (None or empty list means log all modules)
+    # Can be a single string or a list of strings
+    module_name_regex: Optional[Union[str, List[str]]] = None
+    
+    # List of step IDs to log (empty list means log all steps)
+    log_step_ids: List[int] = dataclasses.field(default_factory=lambda: [0, 1])
+    
+    # Maximum number of elements in tensors to log (None = no limit)
+    max_tensor_size: Optional[int] = None
+    
+    # Whether logging is enabled
+    enabled: bool = True
+    
+    # Current step counter (incremented after each forward pass)
+    current_step: int = 0
+    
+    # List of device names to log (empty list means log all devices)
+    device_names: List[str] = dataclasses.field(default_factory=list)
+    
+    # Compiled regex patterns for module filtering
+    _module_name_patterns: List[Pattern] = dataclasses.field(default_factory=list)
+    
+    # Set of step IDs for faster lookup
+    _step_id_set: Set[int] = dataclasses.field(default_factory=set)
+    
+    def __post_init__(self):
+        """Initialize derived fields after instance creation."""
+        self._compile_regex_patterns()
+        self._step_id_set = set(self.log_step_ids)
+        Path(self.output_dir).mkdir(exist_ok=True, parents=True)
+    
+    def _compile_regex_patterns(self):
+        """Compile regex patterns for module name filtering."""
+        from vllm.logger import init_logger
+        logger = init_logger(__name__)
+        
+        self._module_name_patterns = []
+        
+        if self.module_name_regex is None:
+            logger.info("No module name regex patterns provided, will log all modules")
+            return
+            
+        # Convert single string to list for uniform handling
+        patterns = self.module_name_regex
+        if isinstance(patterns, str):
+            patterns = [patterns]
+            logger.info(f"Converting single regex pattern to list: [{patterns[0]}]")
+        else:
+            logger.info(f"Using list of regex patterns: {patterns}")
+            
+        # Compile all patterns
+        for pattern in patterns:
+            try:
+                compiled_pattern = re.compile(pattern)
+                self._module_name_patterns.append(compiled_pattern)
+                logger.info(f"Successfully compiled regex pattern: '{pattern}'")
+            except re.error as e:
+                logger.error(f"Invalid regex pattern '{pattern}': {e}")
+                raise ValueError(f"Invalid regex pattern '{pattern}': {e}")
+        
+        logger.info(f"Compiled {len(self._module_name_patterns)} regex patterns")
+    
+    def should_log_step(self) -> bool:
+        """Check if the current step should be logged based on the step IDs."""
+        if not self.enabled:
+            return False
+        
+        # If log_step_ids is empty, log all steps
+        if not self.log_step_ids:
+            return True
+            
+        # Otherwise, check if current step is in the set of step IDs to log
+        return self.current_step in self._step_id_set
+        
+    def should_log_device(self, device_name: str) -> bool:
+        """Check if a device should be logged based on the device names.
+        
+        Args:
+            device_name: The name of the device to check (e.g., 'cuda:0', 'cpu').
+            
+        Returns:
+            True if the device should be logged, False otherwise.
+            If device_names is empty, all devices are logged.
+        """
+        # If device_names is empty, log all devices
+        if not self.device_names:
+            return True
+            
+        # Otherwise, check if device_name is in the list of device names to log
+        return device_name in self.device_names
+    
+    def should_log_module(self, module_name: str) -> bool:
+        """Check if a module should be logged based on the name regex patterns.
+        
+        Args:
+            module_name: The name of the module to check.
+            
+        Returns:
+            True if the module should be logged, False otherwise.
+            If no patterns are defined, all modules are logged.
+            If patterns are defined, the module is logged if it matches ANY pattern.
+        """
+        from vllm.logger import init_logger
+        logger = init_logger(__name__)
+        
+        # If no patterns are defined, log all modules
+        if not self._module_name_patterns:
+            logger.debug(f"No patterns defined, will log module: {module_name}")
+            return True
+        
+        # Check if the module name matches any of the patterns
+        for i, pattern in enumerate(self._module_name_patterns):
+            match = pattern.search(module_name)
+            if match:
+                logger.info(f"Module {module_name} matches pattern {i}: '{pattern.pattern}'")
+                return True
+        
+        # For debugging, log at a higher level when we're checking layer modules
+        if "layer" in module_name or "embed" in module_name:
+            logger.info(f"Module {module_name} doesn't match any patterns: {[p.pattern for p in self._module_name_patterns]}")
+        else:
+            logger.debug(f"Module {module_name} doesn't match any patterns")
+        return False
+    
+    def increment_step(self) -> None:
+        """Increment the current step counter."""
+        self.current_step += 1
+    
+    def reset_step(self) -> None:
+        """Reset the current step counter to zero."""
+        self.current_step = 0
+    
+    def to_dict(self) -> dict:
+        """Convert the config to a dictionary for serialization."""
+        return {
+            "output_dir": self.output_dir,
+            "module_name_regex": self.module_name_regex,
+            "log_step_ids": self.log_step_ids,
+            "max_tensor_size": self.max_tensor_size,
+            "enabled": self.enabled,
+            "current_step": self.current_step,
+            "device_names": self.device_names
+        }
+    
+    @classmethod
+    def from_dict(cls, config_dict: dict) -> "IntermediateLoggingConfig":
+        """Create a config instance from a dictionary.
+        
+        Args:
+            config_dict: Dictionary containing configuration parameters.
+            
+        Returns:
+            An IntermediateLoggingConfig instance.
+        """
+        # Filter out unknown parameters
+        known_params = {"output_dir", "module_name_regex", "log_step_ids", 
+                       "max_tensor_size", "enabled", "current_step", "device_names"}
+        filtered_dict = {k: v for k, v in config_dict.items() if k in known_params}
+        
+        # Handle backward compatibility with log_step_interval
+        if "log_step_interval" in config_dict and "log_step_ids" not in filtered_dict:
+            interval = config_dict["log_step_interval"]
+            if interval > 0:
+                # Convert interval to step IDs (first few steps)
+                filtered_dict["log_step_ids"] = list(range(0, 10 * interval, interval))
+        
+        return cls(**filtered_dict)