Skip to content

Commit 6732048

Browse files
committed
Allow specifying extra params to scrub
1 parent 56e2478 commit 6732048

File tree

4 files changed

+122
-5
lines changed

4 files changed

+122
-5
lines changed

docs/source/operators/configuring-logging.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,28 @@ A minimal example which logs Jupyter Server output to a file:
5454
redirecting to log files ensure they have appropriate permissions.
5555

5656

57+
.. _configurable_logging.log_scrub:
58+
59+
Configuring Log Scrubbing
60+
^^^^^^^^^^^^^^^^^^^^^^^^^^
61+
62+
By default, Jupyter Server scrubs sensitive URL parameters from log output to prevent
63+
security tokens and other sensitive information from being leaked in log files. Additional
64+
parameters to be scrubbed can be configured using the ``extra_log_scrub_param_keys`` trait.
65+
66+
Default scrubbed parameter keys include: ``token``, ``auth``, ``key``, ``code``, ``state``, and ``xsrf``.
67+
68+
Example configuration to add additional parameters to scrub:
69+
70+
.. code-block:: python
71+
72+
# jupyter_server_config.py
73+
74+
# Add additional parameter keys to scrub (these will be added to the defaults)
75+
c.ServerApp.extra_log_scrub_param_keys = [
76+
"password", "secret", "api_key", "jwt-token"
77+
]
78+
5779
.. _configurable_logging.extension_applications:
5880

5981
Jupyter Server Extension Applications (e.g. Jupyter Lab)

jupyter_server/log.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@
1717
# url params to be scrubbed if seen
1818
# any url param that *contains* one of these
1919
# will be scrubbed from logs
20-
_SCRUB_PARAM_KEYS = {"token", "auth", "key", "code", "state", "xsrf"}
20+
_DEFAULT_SCRUB_PARAM_KEYS = {"token", "auth", "key", "code", "state", "xsrf"}
2121

2222

23-
def _scrub_uri(uri: str) -> str:
23+
def _scrub_uri(uri: str, extra_param_keys=None) -> str:
2424
"""scrub auth info from uri"""
25+
26+
scrub_param_keys = _DEFAULT_SCRUB_PARAM_KEYS.union(set(extra_param_keys or []))
27+
2528
parsed = urlparse(uri)
2629
if parsed.query:
2730
# check for potentially sensitive url params
@@ -31,7 +34,7 @@ def _scrub_uri(uri: str) -> str:
3134
changed = False
3235
for i, s in enumerate(parts):
3336
key, sep, value = s.partition("=")
34-
for substring in _SCRUB_PARAM_KEYS:
37+
for substring in scrub_param_keys:
3538
if substring in key:
3639
parts[i] = f"{key}{sep}[secret]"
3740
changed = True
@@ -59,6 +62,10 @@ def log_request(handler, record_prometheus_metrics=True):
5962
except AttributeError:
6063
logger = access_log
6164

65+
extra_param_keys = []
66+
if hasattr(handler, "serverapp") and hasattr(handler.serverapp, "extra_log_scrub_param_keys"):
67+
extra_param_keys = handler.serverapp.extra_log_scrub_param_keys
68+
6269
if status < 300 or status == 304:
6370
# Successes (or 304 FOUND) are debug-level
6471
log_method = logger.debug
@@ -74,7 +81,7 @@ def log_request(handler, record_prometheus_metrics=True):
7481
"status": status,
7582
"method": request.method,
7683
"ip": request.remote_ip,
77-
"uri": _scrub_uri(request.uri),
84+
"uri": _scrub_uri(request.uri, extra_param_keys),
7885
"request_time": request_time,
7986
}
8087
# log username
@@ -90,7 +97,7 @@ def log_request(handler, record_prometheus_metrics=True):
9097
msg = "{status} {method} {uri} ({username}@{ip}) {request_time:.2f}ms"
9198
if status >= 400:
9299
# log bad referrers
93-
ns["referer"] = _scrub_uri(request.headers.get("Referer", "None"))
100+
ns["referer"] = _scrub_uri(request.headers.get("Referer", "None"), extra_param_keys)
94101
msg = msg + " referer={referer}"
95102
if status >= 500 and status != 502:
96103
# Log a subset of the headers if it caused an error.

jupyter_server/serverapp.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,6 +2006,24 @@ def _default_terminals_enabled(self) -> bool:
20062006
20072007
Set to False to disable recording the http_request_duration_seconds metric.
20082008
""",
2009+
config=True,
2010+
)
2011+
2012+
extra_log_scrub_param_keys = List(
2013+
Unicode(),
2014+
default_value=[],
2015+
config=True,
2016+
help="""
2017+
Additional URL parameter keys to scrub from logs.
2018+
2019+
These will be added to the default list of scrubbed parameter keys.
2020+
Any URL parameter whose key contains one of these substrings will have
2021+
its value replaced with '[secret]' in the logs. This is to prevent
2022+
sensitive information like authentication tokens from being leaked
2023+
in log files.
2024+
2025+
Default scrubbed keys: ["token", "auth", "key", "code", "state", "xsrf"]
2026+
""",
20092027
)
20102028

20112029
static_immutable_cache = List(

tests/test_log.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
"""Tests for log utilities."""
2+
3+
from unittest.mock import Mock
4+
5+
import pytest
6+
7+
from jupyter_server.log import log_request
8+
from jupyter_server.serverapp import ServerApp
9+
10+
11+
@pytest.fixture
12+
def server_app_with_extra_scrub_keys():
13+
"""Fixture that returns a ServerApp with custom extra_log_scrub_param_keys."""
14+
app = ServerApp()
15+
app.extra_log_scrub_param_keys = ["password", "secret"]
16+
return app
17+
18+
19+
@pytest.fixture
20+
def server_app_with_default_scrub_keys():
21+
"""Fixture that returns a ServerApp with default extra_log_scrub_param_keys."""
22+
app = ServerApp()
23+
return app
24+
25+
26+
def test_log_request_scrubs_sensitive_params_default(server_app_with_default_scrub_keys, caplog):
27+
"""Test that log_request scrubs sensitive parameters using default configuration."""
28+
handler = Mock()
29+
handler.get_status.return_value = 200
30+
handler.request.method = "GET"
31+
handler.request.remote_ip = "127.0.0.1"
32+
handler.request.uri = "http://example.com/path?token=secret123&normal=value"
33+
handler.request.request_time.return_value = 0.1
34+
handler.serverapp = server_app_with_default_scrub_keys
35+
handler.log = Mock()
36+
handler.current_user = None
37+
38+
log_request(handler, record_prometheus_metrics=False)
39+
40+
handler.log.debug.assert_called_once()
41+
call_args = handler.log.debug.call_args[0][0]
42+
43+
assert "secret123" not in call_args
44+
assert "[secret]" in call_args
45+
assert "normal=value" in call_args
46+
47+
48+
def test_log_request_scrubs_sensitive_params_extra(server_app_with_extra_scrub_keys, caplog):
49+
"""Test that log_request scrubs sensitive parameters using extra configuration."""
50+
handler = Mock()
51+
handler.get_status.return_value = 200
52+
handler.request.method = "GET"
53+
handler.request.remote_ip = "127.0.0.1"
54+
handler.request.uri = (
55+
"http://example.com/path?password=secret123&token=default_token&normal=value"
56+
)
57+
handler.request.request_time.return_value = 0.1
58+
handler.serverapp = server_app_with_extra_scrub_keys
59+
handler.log = Mock()
60+
handler.current_user = None
61+
62+
log_request(handler, record_prometheus_metrics=False)
63+
64+
handler.log.debug.assert_called_once()
65+
call_args = handler.log.debug.call_args[0][0]
66+
67+
assert "secret123" not in call_args
68+
assert "default_token" not in call_args
69+
assert "[secret]" in call_args
70+
assert "normal=value" in call_args

0 commit comments

Comments
 (0)