From 673204894199c917fa2d6db97049a1937495568d Mon Sep 17 00:00:00 2001 From: Jeremy Tuloup Date: Wed, 25 Jun 2025 13:48:35 +0200 Subject: [PATCH] Allow specifying extra params to scrub --- docs/source/operators/configuring-logging.rst | 22 ++++++ jupyter_server/log.py | 17 +++-- jupyter_server/serverapp.py | 18 +++++ tests/test_log.py | 70 +++++++++++++++++++ 4 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 tests/test_log.py diff --git a/docs/source/operators/configuring-logging.rst b/docs/source/operators/configuring-logging.rst index 1bb382fa73..600e52e9fa 100644 --- a/docs/source/operators/configuring-logging.rst +++ b/docs/source/operators/configuring-logging.rst @@ -54,6 +54,28 @@ A minimal example which logs Jupyter Server output to a file: redirecting to log files ensure they have appropriate permissions. +.. _configurable_logging.log_scrub: + +Configuring Log Scrubbing +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, Jupyter Server scrubs sensitive URL parameters from log output to prevent +security tokens and other sensitive information from being leaked in log files. Additional +parameters to be scrubbed can be configured using the ``extra_log_scrub_param_keys`` trait. + +Default scrubbed parameter keys include: ``token``, ``auth``, ``key``, ``code``, ``state``, and ``xsrf``. + +Example configuration to add additional parameters to scrub: + +.. code-block:: python + + # jupyter_server_config.py + + # Add additional parameter keys to scrub (these will be added to the defaults) + c.ServerApp.extra_log_scrub_param_keys = [ + "password", "secret", "api_key", "jwt-token" + ] + .. _configurable_logging.extension_applications: Jupyter Server Extension Applications (e.g. Jupyter Lab) diff --git a/jupyter_server/log.py b/jupyter_server/log.py index b1211114e9..f268ee0579 100644 --- a/jupyter_server/log.py +++ b/jupyter_server/log.py @@ -17,11 +17,14 @@ # url params to be scrubbed if seen # any url param that *contains* one of these # will be scrubbed from logs -_SCRUB_PARAM_KEYS = {"token", "auth", "key", "code", "state", "xsrf"} +_DEFAULT_SCRUB_PARAM_KEYS = {"token", "auth", "key", "code", "state", "xsrf"} -def _scrub_uri(uri: str) -> str: +def _scrub_uri(uri: str, extra_param_keys=None) -> str: """scrub auth info from uri""" + + scrub_param_keys = _DEFAULT_SCRUB_PARAM_KEYS.union(set(extra_param_keys or [])) + parsed = urlparse(uri) if parsed.query: # check for potentially sensitive url params @@ -31,7 +34,7 @@ def _scrub_uri(uri: str) -> str: changed = False for i, s in enumerate(parts): key, sep, value = s.partition("=") - for substring in _SCRUB_PARAM_KEYS: + for substring in scrub_param_keys: if substring in key: parts[i] = f"{key}{sep}[secret]" changed = True @@ -59,6 +62,10 @@ def log_request(handler, record_prometheus_metrics=True): except AttributeError: logger = access_log + extra_param_keys = [] + if hasattr(handler, "serverapp") and hasattr(handler.serverapp, "extra_log_scrub_param_keys"): + extra_param_keys = handler.serverapp.extra_log_scrub_param_keys + if status < 300 or status == 304: # Successes (or 304 FOUND) are debug-level log_method = logger.debug @@ -74,7 +81,7 @@ def log_request(handler, record_prometheus_metrics=True): "status": status, "method": request.method, "ip": request.remote_ip, - "uri": _scrub_uri(request.uri), + "uri": _scrub_uri(request.uri, extra_param_keys), "request_time": request_time, } # log username @@ -90,7 +97,7 @@ def log_request(handler, record_prometheus_metrics=True): msg = "{status} {method} {uri} ({username}@{ip}) {request_time:.2f}ms" if status >= 400: # log bad referrers - ns["referer"] = _scrub_uri(request.headers.get("Referer", "None")) + ns["referer"] = _scrub_uri(request.headers.get("Referer", "None"), extra_param_keys) msg = msg + " referer={referer}" if status >= 500 and status != 502: # Log a subset of the headers if it caused an error. diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 1c70dd60ab..1dfe2b8c33 100644 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -2006,6 +2006,24 @@ def _default_terminals_enabled(self) -> bool: Set to False to disable recording the http_request_duration_seconds metric. """, + config=True, + ) + + extra_log_scrub_param_keys = List( + Unicode(), + default_value=[], + config=True, + help=""" + Additional URL parameter keys to scrub from logs. + + These will be added to the default list of scrubbed parameter keys. + Any URL parameter whose key contains one of these substrings will have + its value replaced with '[secret]' in the logs. This is to prevent + sensitive information like authentication tokens from being leaked + in log files. + + Default scrubbed keys: ["token", "auth", "key", "code", "state", "xsrf"] + """, ) static_immutable_cache = List( diff --git a/tests/test_log.py b/tests/test_log.py new file mode 100644 index 0000000000..bdf2d7c366 --- /dev/null +++ b/tests/test_log.py @@ -0,0 +1,70 @@ +"""Tests for log utilities.""" + +from unittest.mock import Mock + +import pytest + +from jupyter_server.log import log_request +from jupyter_server.serverapp import ServerApp + + +@pytest.fixture +def server_app_with_extra_scrub_keys(): + """Fixture that returns a ServerApp with custom extra_log_scrub_param_keys.""" + app = ServerApp() + app.extra_log_scrub_param_keys = ["password", "secret"] + return app + + +@pytest.fixture +def server_app_with_default_scrub_keys(): + """Fixture that returns a ServerApp with default extra_log_scrub_param_keys.""" + app = ServerApp() + return app + + +def test_log_request_scrubs_sensitive_params_default(server_app_with_default_scrub_keys, caplog): + """Test that log_request scrubs sensitive parameters using default configuration.""" + handler = Mock() + handler.get_status.return_value = 200 + handler.request.method = "GET" + handler.request.remote_ip = "127.0.0.1" + handler.request.uri = "http://example.com/path?token=secret123&normal=value" + handler.request.request_time.return_value = 0.1 + handler.serverapp = server_app_with_default_scrub_keys + handler.log = Mock() + handler.current_user = None + + log_request(handler, record_prometheus_metrics=False) + + handler.log.debug.assert_called_once() + call_args = handler.log.debug.call_args[0][0] + + assert "secret123" not in call_args + assert "[secret]" in call_args + assert "normal=value" in call_args + + +def test_log_request_scrubs_sensitive_params_extra(server_app_with_extra_scrub_keys, caplog): + """Test that log_request scrubs sensitive parameters using extra configuration.""" + handler = Mock() + handler.get_status.return_value = 200 + handler.request.method = "GET" + handler.request.remote_ip = "127.0.0.1" + handler.request.uri = ( + "http://example.com/path?password=secret123&token=default_token&normal=value" + ) + handler.request.request_time.return_value = 0.1 + handler.serverapp = server_app_with_extra_scrub_keys + handler.log = Mock() + handler.current_user = None + + log_request(handler, record_prometheus_metrics=False) + + handler.log.debug.assert_called_once() + call_args = handler.log.debug.call_args[0][0] + + assert "secret123" not in call_args + assert "default_token" not in call_args + assert "[secret]" in call_args + assert "normal=value" in call_args