Skip to content

Commit 85f2421

Browse files
authored
Add SNAPSHOT_REGEX_PLACEHOLDERS (#208)
E.g. depending on where snapshots run, the port may be 80 or 8080. Or the path in error.stack tags will be different.
1 parent d363ffd commit 85f2421

File tree

6 files changed

+129
-32
lines changed

6 files changed

+129
-32
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ metrics incorrectly placed within the meta field, or other type errors related t
185185
that are not relevant to the test case. **Note that removing `span_id` is not permitted to allow span
186186
ordering to be maintained.**
187187

188+
- `SNAPSHOT_REGEX_PLACEHOLDERS` [`""`]: The regex expressions to replace by a placeholder. Expressed as a comma separated `key:value` list. Specifying `ba[rz]:placeholder` will change any occurrence of `bar` or `baz` to `{placeholder}`: `foobarbazqux` -> `foo{placeholder}{placeholder}qux`. This is in particular useful to strip path prefixes or other infrastructure dependent identifiers.
189+
188190
- `DD_POOL_TRACE_CHECK_FAILURES` [`false`]: Set to `"true"` to pool Trace Check failures that occured within Test-Agent memory. These failures can be queried later using the `/test/trace_check/failures` endpoint. Can also be set using the `--pool-trace-check-failures=true` option.
189191

190192
- `DD_DISABLE_ERROR_RESPONSES` [`false`]: Set to `"true"` to disable Test-Agent `<Response 400>` when a Trace Check fails, instead sending a valid `<Response 200>`. Recommended for use with the `DD_POOL_TRACE_CHECK_FAILURES` env variable. Can also be set using the `--disable-error-responses=true` option.

ddapm_test_agent/agent.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import logging
1010
import os
1111
import pprint
12+
import re
1213
import socket
1314
import sys
1415
from typing import Awaitable
@@ -91,6 +92,15 @@ def _parse_csv(s: str) -> List[str]:
9192
return [s.strip() for s in s.split(",") if s.strip() != ""]
9293

9394

95+
def _parse_map(s: str) -> Dict[str, str]:
96+
"""Return the values of a csv string.
97+
98+
>>> _parse_map("a:b,b:c,c:d")
99+
{'a': 'b', 'b': 'c', 'c': 'd'}
100+
"""
101+
return dict([s.strip().split(":", 2) for s in s.split(",") if s.strip()])
102+
103+
94104
def _session_token(request: Request) -> Optional[str]:
95105
token: Optional[str]
96106
if "X-Datadog-Test-Session-Token" in request.headers:
@@ -794,6 +804,15 @@ async def handle_snapshot(self, request: Request) -> web.Response:
794804
span_removes = list(default_span_removes | overrides)
795805
log.info("using removes %r", span_removes)
796806

807+
# Get the span attributes that are to be removed for this snapshot.
808+
default_attribute_regex_replaces: Dict[str, str] = request.app["snapshot_regex_placeholders"]
809+
regex_overrides = _parse_map(request.url.query.get("regex_placeholders", ""))
810+
attribute_regex_replaces = dict(
811+
(f"{{{key}}}", re.compile(regex))
812+
for (key, regex) in (default_attribute_regex_replaces | regex_overrides).items()
813+
)
814+
log.info("using regex placeholders %r", span_removes)
815+
797816
if "span_id" in span_removes:
798817
raise AssertionError("Cannot remove 'span_id' from spans")
799818

@@ -841,7 +860,13 @@ async def handle_snapshot(self, request: Request) -> web.Response:
841860
elif received_traces:
842861
# Create a new snapshot for the data received
843862
with open(trace_snap_file, mode="w") as f:
844-
f.write(trace_snapshot.generate_snapshot(received_traces=received_traces, removed=span_removes))
863+
f.write(
864+
trace_snapshot.generate_snapshot(
865+
received_traces=received_traces,
866+
removed=span_removes,
867+
attribute_regex_replaces=attribute_regex_replaces,
868+
)
869+
)
845870
log.info("wrote new trace snapshot to %r", os.path.abspath(trace_snap_file))
846871

847872
# Get all stats buckets from the payloads since we don't care about the other fields (hostname, env, etc)
@@ -1115,6 +1140,7 @@ def make_app(
11151140
pool_trace_check_failures: bool,
11161141
disable_error_responses: bool,
11171142
snapshot_removed_attrs: List[str],
1143+
snapshot_regex_placeholders: Dict[str, str],
11181144
) -> web.Application:
11191145
agent = Agent()
11201146
app = web.Application(
@@ -1190,6 +1216,7 @@ def make_app(
11901216
app["pool_trace_check_failures"] = pool_trace_check_failures
11911217
app["disable_error_responses"] = disable_error_responses
11921218
app["snapshot_removed_attrs"] = snapshot_removed_attrs
1219+
app["snapshot_regex_placeholders"] = snapshot_regex_placeholders
11931220
return app
11941221

11951222

@@ -1240,6 +1267,14 @@ def main(args: Optional[List[str]] = None) -> None:
12401267
"with meta. or metrics."
12411268
),
12421269
)
1270+
parser.add_argument(
1271+
"--snapshot-regex-placeholders",
1272+
type=_parse_map,
1273+
default=os.environ.get("SNAPSHOT_REGEX_PLACEHOLDERS", ""),
1274+
help=(
1275+
"Comma-separated list of placeholder:regex tuples where to remove the matching regexes with the placeholder."
1276+
),
1277+
)
12431278
parser.add_argument(
12441279
"--enabled-checks",
12451280
type=List[str],
@@ -1341,6 +1376,7 @@ def main(args: Optional[List[str]] = None) -> None:
13411376
pool_trace_check_failures=parsed_args.pool_trace_check_failures,
13421377
disable_error_responses=parsed_args.disable_error_responses,
13431378
snapshot_removed_attrs=parsed_args.snapshot_removed_attrs,
1379+
snapshot_regex_placeholders=parsed_args.snapshot_regex_placeholders,
13441380
)
13451381

13461382
web.run_app(app, sock=apm_sock, port=parsed_args.port)

ddapm_test_agent/trace_snapshot.py

Lines changed: 53 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
import operator
55
import pprint
6+
from re import Pattern
67
import textwrap
78
from typing import Any
89
from typing import Dict
@@ -459,40 +460,57 @@ def _ordered_span(s: Span) -> OrderedDictType[str, TopLevelSpanValue]:
459460
return d # type: ignore
460461

461462

462-
def _snapshot_trace_str(trace: Trace, removed: Optional[List[str]] = None) -> str:
463+
def _walk_span_attributes_with_regex_replaces(
464+
dictionary: Dict[str, Any], attribute_regex_replaces: Dict[str, Pattern[str]]
465+
) -> None:
466+
for key, val in dictionary.items():
467+
if isinstance(val, str):
468+
for placeholder, pattern in attribute_regex_replaces.items():
469+
dictionary[key] = pattern.sub(placeholder, dictionary[key])
470+
elif isinstance(val, Dict):
471+
_walk_span_attributes_with_regex_replaces(val, attribute_regex_replaces)
472+
elif isinstance(val, List):
473+
for v in val:
474+
if isinstance(v, Dict):
475+
_walk_span_attributes_with_regex_replaces(v, attribute_regex_replaces)
476+
477+
478+
def _snapshot_trace_str(trace: Trace, removed: List[str], attribute_regex_replaces: Dict[str, Pattern[str]]) -> str:
463479
cmap = child_map(trace)
464480
stack: List[Tuple[int, Span]] = [(0, root_span(trace))]
465481
s = "[\n"
466482
while stack:
467483
prefix, span = stack.pop(0)
468484

469485
# Remove any keys that are not needed for comparison
470-
if removed:
471-
for key in removed:
472-
if key.startswith("meta."):
473-
span["meta"].pop(key[5:], None)
474-
elif key.startswith("metrics."):
475-
span["metrics"].pop(key[8:], None)
476-
elif key.startswith("span_links.attributes."):
477-
if "span_links" in span:
478-
for link in span["span_links"]:
479-
if "attributes" in link:
480-
link["attributes"].pop(key[22:], None)
481-
elif key.startswith("span_links."):
482-
if "span_links" in span:
483-
for link in span["span_links"]:
484-
link.pop(key[11:], None) # type: ignore
485-
elif key.startswith("span_events.attributes."):
486-
if "span_events" in span:
487-
for event in span["span_events"]:
488-
if "attributes" in event:
489-
event["attributes"].pop(key[23:], None)
490-
elif key.startswith("span_events."):
491-
if "span_events" in span:
492-
for event in span["span_events"]:
493-
event.pop(key[12:], None) # type: ignore
494-
else:
495-
span.pop(key, None) # type: ignore
486+
for key in removed:
487+
if key.startswith("meta."):
488+
span["meta"].pop(key[5:], None)
489+
elif key.startswith("metrics."):
490+
span["metrics"].pop(key[8:], None)
491+
elif key.startswith("span_links.attributes."):
492+
if "span_links" in span:
493+
for link in span["span_links"]:
494+
if "attributes" in link:
495+
link["attributes"].pop(key[22:], None)
496+
elif key.startswith("span_links."):
497+
if "span_links" in span:
498+
for link in span["span_links"]:
499+
link.pop(key[11:], None) # type: ignore
500+
elif key.startswith("span_events.attributes."):
501+
if "span_events" in span:
502+
for event in span["span_events"]:
503+
if "attributes" in event:
504+
event["attributes"].pop(key[23:], None)
505+
elif key.startswith("span_events."):
506+
if "span_events" in span:
507+
for event in span["span_events"]:
508+
event.pop(key[12:], None) # type: ignore
509+
else:
510+
span.pop(key, None) # type: ignore
511+
512+
if attribute_regex_replaces: # only walk if we actually have something to replace
513+
_walk_span_attributes_with_regex_replaces(cast(Dict[str, Any], span), attribute_regex_replaces)
496514

497515
for i, child in enumerate(reversed(cmap[span["span_id"]])):
498516
if i == 0:
@@ -507,15 +525,19 @@ def _snapshot_trace_str(trace: Trace, removed: Optional[List[str]] = None) -> st
507525
return s
508526

509527

510-
def _snapshot_json(traces: List[Trace], removed: Optional[List[str]] = None) -> str:
528+
def _snapshot_json(traces: List[Trace], removed: List[str], attribute_regex_replaces: Dict[str, Pattern[str]]) -> str:
511529
s = "["
512530
for t in traces:
513-
s += _snapshot_trace_str(t, removed)
531+
s += _snapshot_trace_str(t, removed, attribute_regex_replaces)
514532
if t != traces[-1]:
515533
s += ",\n"
516534
s += "]\n"
517535
return s
518536

519537

520-
def generate_snapshot(received_traces: List[Trace], removed: Optional[List[str]] = None) -> str:
521-
return _snapshot_json(_normalize_traces(received_traces), removed)
538+
def generate_snapshot(
539+
received_traces: List[Trace],
540+
removed: Optional[List[str]] = None,
541+
attribute_regex_replaces: Optional[Dict[str, Pattern[str]]] = None,
542+
) -> str:
543+
return _snapshot_json(_normalize_traces(received_traces), removed or [], attribute_regex_replaces or {})
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
features:
3+
- |
4+
Add a SNAPSHOT_REGEX_PLACEHOLDERS option to allow infrastructure independent
5+
snapshots.

tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ def snapshot_removed_attrs() -> Generator[Set[str], None, None]:
9090
yield set()
9191

9292

93+
@pytest.fixture
94+
def snapshot_regex_placeholders() -> Generator[Dict[str, str], None, None]:
95+
yield dict()
96+
97+
9398
@pytest.fixture
9499
async def agent_app(
95100
aiohttp_server,
@@ -104,6 +109,7 @@ async def agent_app(
104109
pool_trace_check_failures,
105110
disable_error_responses,
106111
snapshot_removed_attrs,
112+
snapshot_regex_placeholders,
107113
):
108114
app = await aiohttp_server(
109115
make_app(
@@ -118,6 +124,7 @@ async def agent_app(
118124
pool_trace_check_failures,
119125
disable_error_responses,
120126
snapshot_removed_attrs,
127+
snapshot_regex_placeholders,
121128
)
122129
)
123130
yield app

tests/test_snapshot.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,31 @@ async def test_removed_attributes_metrics(agent, tmp_path, snapshot_removed_attr
477477
assert "process_id" not in span[0]
478478

479479

480+
@pytest.mark.parametrize("snapshot_regex_placeholders", [{"addr": "localhost:8080", "path": "^/.*"}])
481+
async def test_with_regex_placeholders(agent, tmp_path, snapshot_removed_attrs, do_reference_v04_http_trace):
482+
resp = await do_reference_v04_http_trace(token="test_case")
483+
assert resp.status == 200
484+
485+
custom_dir = tmp_path / "custom"
486+
custom_dir.mkdir()
487+
custom_file_name = custom_dir / "custom_snapshot"
488+
custom_file = custom_dir / "custom_snapshot.json"
489+
490+
resp = await agent.get(
491+
"/test/session/snapshot", params={"test_session_token": "test_case", "file": str(custom_file_name)}
492+
)
493+
assert resp.status == 200, await resp.text()
494+
495+
assert os.path.exists(custom_file), custom_file
496+
with open(custom_file, mode="r") as f: # Check that the removed attributes are not present in the span
497+
file_content = "".join(f.readlines())
498+
assert file_content != ""
499+
span = json.loads(file_content)
500+
assert "http.request" == span[0][0]["name"]
501+
assert "{path}" == span[0][0]["resource"]
502+
assert "http://{addr}/users" == span[0][0]["meta"]["http.url"]
503+
504+
480505
ONE_SPAN_TRACE_NO_START = random_trace(1, remove_keys=["start"])
481506
TWO_SPAN_TRACE_NO_START = random_trace(2, remove_keys=["start"])
482507
FIVE_SPAN_TRACE_NO_START = random_trace(5, remove_keys=["start"])

0 commit comments

Comments
 (0)