diff --git a/ddtrace/debugging/_function/discovery.py b/ddtrace/debugging/_function/discovery.py index 1c32712d6ba..23429d182ff 100644 --- a/ddtrace/debugging/_function/discovery.py +++ b/ddtrace/debugging/_function/discovery.py @@ -142,6 +142,10 @@ def __init__(self, code: Optional[CodeType] = None, function: Optional[FunctionT self.code = function.__code__ if function is not None else code def resolve(self) -> FullyNamedFunction: + if self.code is None: + msg = "Cannot resolve pair with no code object" + raise ValueError(msg) + if self.function is not None: return cast(FullyNamedFunction, self.function) @@ -159,6 +163,17 @@ def resolve(self) -> FullyNamedFunction: raise ValueError(msg) self.function = _f = functions[0] + try: + # We try to update the code object to the one currently in use with + # the function. This is not necessarily the code object that was + # stored in the pair, which generally comes from the code objects + # generated by module compilation. The benefit of doing this is that + # we can relinquish a reference to the original code object so that + # it can be garbage collected if needed. + self.code = _f.__code__ + except AttributeError: + pass + f = cast(FullyNamedFunction, _f) f.__fullname__ = f"{f.__module__}.{f.__qualname__}" @@ -245,13 +260,13 @@ def __init__(self, module: ModuleType) -> None: return self._module = module - self._fullname_index = _collect_functions(module) if PYTHON_VERSION_INFO < (3, 11): self._name_index: Dict[str, List[_FunctionCodePair]] = defaultdict(list) self._cached: Dict[int, List[FullyNamedFunction]] = {} # Create the line to function mapping if hasattr(module, "__dd_code__"): + self._fullname_index = {} for code in module.__dd_code__: fcp = _FunctionCodePair(code=code) @@ -266,6 +281,7 @@ def __init__(self, module: ModuleType) -> None: for lineno in linenos(code): self[lineno].append(fcp) else: + self._fullname_index = _collect_functions(module) # If the module was already loaded we don't have its code object seen_functions = set() for _, fcp in self._fullname_index.items(): @@ -351,7 +367,7 @@ def from_module(cls, module: ModuleType) -> "FunctionDiscovery": information on the module object itself. Subsequent calls will return the cached information. """ - # Cache the function tree on the module + # Cache the function discovery on the module try: return module.__function_discovery__ except AttributeError: diff --git a/ddtrace/internal/utils/inspection.py b/ddtrace/internal/utils/inspection.py index 15864dd75af..eeec4791a1c 100644 --- a/ddtrace/internal/utils/inspection.py +++ b/ddtrace/internal/utils/inspection.py @@ -8,6 +8,7 @@ from types import FunctionType from typing import Iterator from typing import List +from typing import MutableMapping from typing import Set from typing import cast @@ -126,8 +127,45 @@ def collect_code_objects(code: CodeType) -> Iterator[CodeType]: q.append(new_code) +_CODE_TO_ORIGINAL_FUNCTION_MAPPING: MutableMapping[CodeType, FunctionType] = dict() + + +def link_function_to_code(code: CodeType, function: FunctionType) -> None: + """ + Link a function to a code object. This is used to speed up the search for + the original function from a code object. + """ + global _CODE_TO_ORIGINAL_FUNCTION_MAPPING + + _CODE_TO_ORIGINAL_FUNCTION_MAPPING[code] = function + + @lru_cache(maxsize=(1 << 14)) # 16k entries -def functions_for_code(code: CodeType) -> List[FunctionType]: +def _functions_for_code_gc(code: CodeType) -> List[FunctionType]: import gc return [_ for _ in gc.get_referrers(code) if isinstance(_, FunctionType) and _.__code__ is code] + + +def functions_for_code(code: CodeType) -> List[FunctionType]: + global _CODE_TO_ORIGINAL_FUNCTION_MAPPING + + try: + # Try to get the function from the original code-to-function mapping + return [_CODE_TO_ORIGINAL_FUNCTION_MAPPING[code]] + except KeyError: + # If the code is not in the mapping, we fall back to the garbage + # collector + return _functions_for_code_gc(code) + + +def clear(): + """Clear the inspection state. + + This should be called when modules are reloaded to ensure that the mappings + stay relevant. + """ + global _CODE_TO_ORIGINAL_FUNCTION_MAPPING + + _functions_for_code_gc.cache_clear() + _CODE_TO_ORIGINAL_FUNCTION_MAPPING.clear() diff --git a/ddtrace/internal/wrapping/context.py b/ddtrace/internal/wrapping/context.py index c6b4ee896e2..88d490b47a4 100644 --- a/ddtrace/internal/wrapping/context.py +++ b/ddtrace/internal/wrapping/context.py @@ -17,6 +17,7 @@ from bytecode import Bytecode from ddtrace.internal.assembly import Assembly +from ddtrace.internal.utils.inspection import link_function_to_code T = t.TypeVar("T") @@ -562,7 +563,10 @@ def wrap(self) -> None: # Mark the function as wrapped by a wrapping context t.cast(ContextWrappedFunction, f).__dd_context_wrapped__ = self - # Replace the function code with the wrapped code + # Replace the function code with the wrapped code. We also link + # the function to its original code object so that we can retrieve + # it later if required. + link_function_to_code(f.__code__, f) f.__code__ = bc.to_code() def unwrap(self) -> None: @@ -688,7 +692,10 @@ def wrap(self) -> None: # Mark the function as wrapped by a wrapping context t.cast(ContextWrappedFunction, f).__dd_context_wrapped__ = self - # Replace the function code with the wrapped code + # Replace the function code with the wrapped code. We also link + # the function to its original code object so that we can retrieve + # it later if required. + link_function_to_code(f.__code__, f) f.__code__ = bc.to_code() def unwrap(self) -> None: diff --git a/releasenotes/notes/fix-di-with-external-wrapping-context-e26b46a6fc7f6a47.yaml b/releasenotes/notes/fix-di-with-external-wrapping-context-e26b46a6fc7f6a47.yaml new file mode 100644 index 00000000000..c419905641a --- /dev/null +++ b/releasenotes/notes/fix-di-with-external-wrapping-context-e26b46a6fc7f6a47.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + dynamic instrumentation: fixed an incompatibility issue with code origin + that caused line probes on the entry point functions to fail to instrument. diff --git a/tests/debugging/conftest.py b/tests/debugging/conftest.py index 862f817a055..ba73f4c7b7a 100644 --- a/tests/debugging/conftest.py +++ b/tests/debugging/conftest.py @@ -2,12 +2,13 @@ import pytest -from ddtrace.internal.utils.inspection import functions_for_code +from ddtrace.internal.utils import inspection @pytest.fixture def stuff(): - functions_for_code.cache_clear() + inspection.clear() + was_loaded = False if "tests.submod.stuff" in sys.modules: was_loaded = True diff --git a/tests/debugging/function/test_discovery.py b/tests/debugging/function/test_discovery.py index 6f247bc7fd0..326e58e384f 100644 --- a/tests/debugging/function/test_discovery.py +++ b/tests/debugging/function/test_discovery.py @@ -101,21 +101,27 @@ def test_function_mangled(stuff_discovery): assert original_method is original_func -def test_discovery_after_external_wrapping(stuff): +@pytest.mark.subprocess +def test_discovery_after_external_wrapping(): import wrapt + from ddtrace.debugging._debugger import DebuggerModuleWatchdog + from ddtrace.debugging._function.discovery import FunctionDiscovery + + DebuggerModuleWatchdog.install() + + import tests.submod.stuff as stuff + def wrapper(wrapped, inst, args, kwargs): pass - original_function = stuff.Stuff.instancestuff - wrapt.wrap_function_wrapper(stuff, "Stuff.instancestuff", wrapper) assert isinstance(stuff.Stuff.instancestuff, (wrapt.BoundFunctionWrapper, wrapt.FunctionWrapper)) code = stuff.Stuff.instancestuff.__code__ - f, *_ = FunctionDiscovery(stuff).at_line(36) + f, *_ = FunctionDiscovery.from_module(stuff).at_line(36) - assert f is original_function or isinstance(f, (wrapt.BoundFunctionWrapper, wrapt.FunctionWrapper)), f + assert f is stuff.Stuff.instancestuff.__wrapped__, (f, stuff.Stuff.instancestuff.__wrapped__) assert f.__code__ is code @@ -139,3 +145,24 @@ def transform(self, code, module): assert home.__qualname__ == "home" DiscoveryModuleWatchdog.uninstall() + + +@pytest.mark.subprocess +def test_discovery_after_external_wrapping_context(): + from ddtrace.debugging._debugger import DebuggerModuleWatchdog + from ddtrace.debugging._function.discovery import FunctionDiscovery + from ddtrace.internal.module import origin + from ddtrace.internal.wrapping.context import WrappingContext + + DebuggerModuleWatchdog.install() + + import tests.submod.stuff as stuff + + f = stuff.modulestuff + + def hook(module): + assert FunctionDiscovery.from_module(module).at_line(f.__code__.co_firstlineno + 1) + + WrappingContext(f).wrap() # type: ignore + + DebuggerModuleWatchdog.register_origin_hook(origin(stuff), hook) # type: ignore diff --git a/tests/debugging/test_debugger_span_decoration.py b/tests/debugging/test_debugger_span_decoration.py index 457f6b3a919..b73990317e0 100644 --- a/tests/debugging/test_debugger_span_decoration.py +++ b/tests/debugging/test_debugger_span_decoration.py @@ -7,7 +7,7 @@ from ddtrace.debugging._probe.model import SpanDecorationTag from ddtrace.debugging._probe.model import SpanDecorationTargetSpan from ddtrace.debugging._signal.model import EvaluationError -from ddtrace.internal.utils.inspection import functions_for_code +from ddtrace.internal.utils import inspection from tests.debugging.mocking import debugger from tests.debugging.utils import create_span_decoration_function_probe from tests.debugging.utils import create_span_decoration_line_probe @@ -22,7 +22,7 @@ def setUp(self): import tests.submod.traced_stuff as ts - functions_for_code.cache_clear() + inspection.clear() self.traced_stuff = ts self.backup_tracer = ddtrace.tracer