Skip to content

MLFlowLogger.save_dir mishandles absolute file: URIs on Windows #20972

@g-sawicki

Description

@g-sawicki

Bug description

When passing an absolute file path as tracking_uri on Windows, MLFlowLogger.save_dir does not handle the file: URI correctly, causing a malformed local path.

This results in writing to an invalid path like ///C:/..., resulting in FileNotFoundError: [WinError 161] The specified path is invalid.

I am aware that using mlflow.set_tracking_uri("http://localhost:8080") or mlflow.set_tracking_uri("file:./mlruns") is a valid workaround.

This is not related to #20669. I am willing to contribute a fix and a test case if needed.

# example

import mlflow
from lightning.pytorch import Trainer
from lightning.pytorch.demos import BoringModel
from lightning.pytorch.loggers import MLFlowLogger


model = BoringModel()

mlflow.pytorch.autolog()
with mlflow.start_run() as run:
    logger = MLFlowLogger(
        tracking_uri=mlflow.get_tracking_uri(), # file:///C:/Dev/example/mlruns
        run_id=run.info.run_id,
    )

    trainer = Trainer(
        max_epochs=1,
        logger=logger,
        limit_train_batches=1,
        limit_val_batches=1,
    )

    trainer.fit(model)
# this code mishandles absolute paths on Windows
# tracking_uri
# file:///C:/Dev/example/mlruns
# result:
# ///C:/Dev/example/mlruns
# expected:
# C:/Dev/example/mlruns

    @property
    @override
    def save_dir(self) -> Optional[str]:
        """The root file directory in which MLflow experiments are saved.

        Return:
            Local path to the root experiment directory if the tracking uri is local.
            Otherwise returns `None`.

        """
        if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
            return self._tracking_uri[len(LOCAL_FILE_URI_PREFIX) :]
        return None
# suggested fix

    @property
    @override
    def save_dir(self) -> Optional[str]:
        """The root file directory in which MLflow experiments are saved.

        Return:
            Local path to the root experiment directory if the tracking uri is local.
            Otherwise returns `None`.

        """
        from urllib.parse import urlparse
        from urllib.request import url2pathname

        if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
            p = urlparse(self._tracking_uri)
            return url2pathname(p.path)
        return None

What version are you seeing the problem on?

v2.5

Reproduced in studio

No response

How to reproduce the bug

Error messages and logs

Traceback (most recent call last):
  File "C:\Dev\example\main.py", line 23, in <module>
    trainer.fit(model)
  File "C:\Dev\example\venv\Lib\site-packages\mlflow\utils\autologging_utils\safety.py", line 484, in safe_patch_function
    patch_function(call_original, *args, **kwargs)
  File "C:\Dev\example\venv\Lib\site-packages\mlflow\utils\autologging_utils\safety.py", line 182, in patch_with_managed_run
    result = patch_function(original, *args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Dev\example\venv\Lib\site-packages\mlflow\pytorch\_lightning_autolog.py", line 544, in patched_fit
    result = original(self, *args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Dev\example\venv\Lib\site-packages\mlflow\utils\autologging_utils\safety.py", line 475, in call_original
    return call_original_fn_with_event_logging(_original_fn, og_args, og_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Dev\example\venv\Lib\site-packages\mlflow\utils\autologging_utils\safety.py", line 426, in call_original_fn_with_event_logging
    original_fn_result = original_fn(*og_args, **og_kwargs)
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Dev\example\venv\Lib\site-packages\mlflow\utils\autologging_utils\safety.py", line 472, in _original_fn
    original_result = original(*_og_args, **_og_kwargs)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\trainer\trainer.py", line 561, in fit
    call._call_and_handle_interrupt(
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\trainer\call.py", line 48, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\trainer\trainer.py", line 599, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\trainer\trainer.py", line 1012, in _run
    results = self._run_stage()
              ^^^^^^^^^^^^^^^^^
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\trainer\trainer.py", line 1056, in _run_stage
    self.fit_loop.run()
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py", line 217, in run
    self.on_advance_end()
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py", line 470, in on_advance_end
    call._call_callback_hooks(trainer, "on_train_epoch_end", monitoring_callbacks=True)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\trainer\call.py", line 227, in _call_callback_hooks
    fn(trainer, trainer.lightning_module, *args, **kwargs)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py", line 329, in on_train_epoch_end
    self._save_topk_checkpoint(trainer, monitor_candidates)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py", line 391, in _save_topk_checkpoint
    self._save_none_monitor_checkpoint(trainer, monitor_candidates)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py", line 719, in _save_none_monitor_checkpoint
    self._save_checkpoint(trainer, filepath)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py", line 394, in _save_checkpoint
    trainer.save_checkpoint(filepath, self.save_weights_only)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\trainer\trainer.py", line 1397, in save_checkpoint
    self.strategy.save_checkpoint(checkpoint, filepath, storage_options=storage_options)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\pytorch\strategies\strategy.py", line 491, in save_checkpoint
    self.checkpoint_io.save_checkpoint(checkpoint, filepath, storage_options=storage_options)
  File "C:\Dev\example\venv\Lib\site-packages\lightning\fabric\plugins\io\torch_io.py", line 57, in save_checkpoint
    fs.makedirs(os.path.dirname(path), exist_ok=True)
  File "C:\Dev\example\venv\Lib\site-packages\fsspec\implementations\local.py", line 53, in makedirs
    os.makedirs(path, exist_ok=exist_ok)
  File "<frozen os>", line 215, in makedirs
  File "<frozen os>", line 215, in makedirs
  File "<frozen os>", line 215, in makedirs
  [Previous line repeated 3 more times]
  File "<frozen os>", line 225, in makedirs
FileNotFoundError: [WinError 161] The specified path is invalid: '///C:/'

Environment

Current environment
#- PyTorch Lightning Version (e.g., 2.5.0): 2.5.2
#- PyTorch Version (e.g., 2.5): 2.7.1
#- Python version (e.g., 3.12): 3.12.4
#- OS (e.g., Linux): Windows 10
#- CUDA/cuDNN version: N/A
#- GPU models and configuration: N/A
#- How you installed Lightning(`conda`, `pip`, source): pip

More info

No response

cc @lantiga @Borda

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions