From 10f23e376b6bbb7e18d814948ff240bcd5dd68d9 Mon Sep 17 00:00:00 2001 From: Helena Graf Date: Thu, 16 Feb 2023 11:51:50 +0100 Subject: [PATCH 1/8] Add skeleton for wandb example --- examples/6_advanced_features/1_wandb_logging.py | 9 +++++++++ examples/6_advanced_features/README.rst | 2 ++ 2 files changed, 11 insertions(+) create mode 100644 examples/6_advanced_features/1_wandb_logging.py create mode 100644 examples/6_advanced_features/README.rst diff --git a/examples/6_advanced_features/1_wandb_logging.py b/examples/6_advanced_features/1_wandb_logging.py new file mode 100644 index 0000000000..3e7101e949 --- /dev/null +++ b/examples/6_advanced_features/1_wandb_logging.py @@ -0,0 +1,9 @@ +""" +Use Weights and Biases for logging +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This example shows how to use Weights and Biases for logging. + +""" + +pass diff --git a/examples/6_advanced_features/README.rst b/examples/6_advanced_features/README.rst new file mode 100644 index 0000000000..ec83a5ca9e --- /dev/null +++ b/examples/6_advanced_features/README.rst @@ -0,0 +1,2 @@ +Advanced Features +======== \ No newline at end of file From 81d80c537187dfb5f930331a7b2428385507d502 Mon Sep 17 00:00:00 2001 From: Helena Graf Date: Thu, 16 Feb 2023 13:31:37 +0100 Subject: [PATCH 2/8] Add minimal example --- .../6_advanced_features/1_wandb_logging.py | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/examples/6_advanced_features/1_wandb_logging.py b/examples/6_advanced_features/1_wandb_logging.py index 3e7101e949..053425b213 100644 --- a/examples/6_advanced_features/1_wandb_logging.py +++ b/examples/6_advanced_features/1_wandb_logging.py @@ -6,4 +6,28 @@ """ -pass +from ConfigSpace import Configuration, ConfigurationSpace + +import numpy as np +from smac import HyperparameterOptimizationFacade, Scenario +from sklearn import datasets +from sklearn.svm import SVC +from sklearn.model_selection import cross_val_score + +iris = datasets.load_iris() + + +def train(config: Configuration, seed: int = 0) -> float: + classifier = SVC(C=config["C"], random_state=seed) + scores = cross_val_score(classifier, iris.data, iris.target, cv=5) + return 1 - np.mean(scores) + + +configspace = ConfigurationSpace({"C": (0.100, 1000.0)}) + +# Scenario object specifying the optimization environment +scenario = Scenario(configspace, deterministic=True, n_trials=200) + +# Use SMAC to find the best configuration/hyperparameters +smac = HyperparameterOptimizationFacade(scenario, train) +incumbent = smac.optimize() \ No newline at end of file From 4a5cf75b73482d0f83497658cafcc0ebeac57a5e Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Fri, 2 Jun 2023 14:25:49 +0200 Subject: [PATCH 3/8] Current status --- .../6_advanced_features/1_wandb_logging.py | 65 ++++++++++++++++++- setup.py | 3 + smac/intensifier/abstract_intensifier.py | 11 ++++ 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/examples/6_advanced_features/1_wandb_logging.py b/examples/6_advanced_features/1_wandb_logging.py index 053425b213..aaeff941e6 100644 --- a/examples/6_advanced_features/1_wandb_logging.py +++ b/examples/6_advanced_features/1_wandb_logging.py @@ -5,6 +5,7 @@ This example shows how to use Weights and Biases for logging. """ +from __future__ import annotations from ConfigSpace import Configuration, ConfigurationSpace @@ -14,6 +15,60 @@ from sklearn.svm import SVC from sklearn.model_selection import cross_val_score +from smac import Callback +import smac +from wandb import Table + + +class WandBCallback(Callback): + def __init__( + self, + project: str, + entity: str, + id: str | None = None, + outdir: str | None = None, + mode: str | None = None, + resume: str = "allow", + job_type: str | None = None, + group: str | None = None, + config: dict | str | None = None, + save_code: bool = True, + **kwargs + ) -> None: + import wandb + self.run = wandb.init( + id=id, + resume=resume, + mode=mode, + project=project, + job_type=job_type, + entity=entity, + group=group, + dir=outdir, + config=config, + save_code=save_code, + **kwargs + ) + super().__init__() + + + def on_end(self, smbo: smac.main.smbo.SMBO) -> None: + intensifier_data = smbo.intensifier.get_data() + trajectory = intensifier_data["trajectory"] + import pandas as pd + df = pd.DataFrame(data=trajectory) + print(df) + # trajectory = Table(dataframe=df, allow_mixed_types=True) + df["costs"] = df["costs"].apply(lambda x: x[0]) # TODO properly log multi costs + for index, row in df.iterrows(): + print(dict(row)) + self.run.log(dict(row)) + self.run.finish() + return super().on_end(smbo) + + + + iris = datasets.load_iris() @@ -26,8 +81,14 @@ def train(config: Configuration, seed: int = 0) -> float: configspace = ConfigurationSpace({"C": (0.100, 1000.0)}) # Scenario object specifying the optimization environment -scenario = Scenario(configspace, deterministic=True, n_trials=200) +scenario = Scenario(configspace, deterministic=True, n_trials=100) + +wandb_callback = WandBCallback( + project="smac-dev", + entity="benjamc", + config=Scenario.make_serializable(scenario), +) # Use SMAC to find the best configuration/hyperparameters -smac = HyperparameterOptimizationFacade(scenario, train) +smac = HyperparameterOptimizationFacade(scenario, train, callbacks=[wandb_callback], overwrite=True) incumbent = smac.optimize() \ No newline at end of file diff --git a/setup.py b/setup.py index f194d36771..804413688a 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,9 @@ def read_file(filepath: str) -> str: "flake8", "pre-commit", ], + "wandb": [ + "wandb", + ] } setuptools.setup( diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py index b944867273..79aae61724 100644 --- a/smac/intensifier/abstract_intensifier.py +++ b/smac/intensifier/abstract_intensifier.py @@ -664,6 +664,17 @@ def save(self, filename: str | Path) -> None: with open(filename, "w") as fp: json.dump(data, fp, indent=2) + def get_data(self): + data = { + "incumbent_ids": [self.runhistory.get_config_id(config) for config in self._incumbents], + "rejected_config_ids": self._rejected_config_ids, + "incumbents_changed": self._incumbents_changed, + "trajectory": [dataclasses.asdict(item) for item in self._trajectory], + "state": self.get_state(), + } + return data + + def load(self, filename: str | Path) -> None: """Loads the latest state of the intensifier including the incumbents and trajectory.""" if isinstance(filename, str): From de68c9350476f856e3c5184587b13e4304472d6d Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Thu, 8 Jun 2023 17:10:07 +0200 Subject: [PATCH 4/8] Log cost per step --- .gitignore | 4 +- .../6_advanced_features/1_wandb_logging.py | 14 ++++- mwe.py | 62 +++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 mwe.py diff --git a/.gitignore b/.gitignore index 679812827c..080a3cd9cf 100644 --- a/.gitignore +++ b/.gitignore @@ -147,4 +147,6 @@ src # Pycharm .idea -.vscode \ No newline at end of file +.vscode +tmp +wandb \ No newline at end of file diff --git a/examples/6_advanced_features/1_wandb_logging.py b/examples/6_advanced_features/1_wandb_logging.py index aaeff941e6..3a5b73e091 100644 --- a/examples/6_advanced_features/1_wandb_logging.py +++ b/examples/6_advanced_features/1_wandb_logging.py @@ -19,6 +19,9 @@ import smac from wandb import Table +from smac.runhistory import TrialInfo, TrialValue +from dataclasses import asdict + class WandBCallback(Callback): def __init__( @@ -50,6 +53,15 @@ def __init__( **kwargs ) super().__init__() + + def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialValue) -> bool | None: + info_dict = asdict(info) + info_dict["config"] = info_dict["config"].get_dictionary() + value_dict = asdict(value) + log_dict = info_dict | value_dict + log_dict["step"] = smbo.runhistory.finished + self.run.log(data=log_dict) + return super().on_tell_end(smbo, info, value) def on_end(self, smbo: smac.main.smbo.SMBO) -> None: @@ -81,7 +93,7 @@ def train(config: Configuration, seed: int = 0) -> float: configspace = ConfigurationSpace({"C": (0.100, 1000.0)}) # Scenario object specifying the optimization environment -scenario = Scenario(configspace, deterministic=True, n_trials=100) +scenario = Scenario(configspace, deterministic=True, n_trials=100, seed=3) wandb_callback = WandBCallback( project="smac-dev", diff --git a/mwe.py b/mwe.py new file mode 100644 index 0000000000..9199073f96 --- /dev/null +++ b/mwe.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +import logging +import time +from pathlib import Path +import random + +from ConfigSpace import Configuration, ConfigurationSpace, Float +from smac import AlgorithmConfigurationFacade, Scenario +from dask_jobqueue import SLURMCluster + +cs = ConfigurationSpace(seed=0) +cs.add_hyperparameters([ + Float("x", [0, 1], default=0.75), +]) + +def run_trial(config: Configuration, seed: int = 0) -> float: + x = config["x"] + path = Path(f"tmp/mwe/logs/log.{x}") + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f"start with {x=}") + return (x-0.5)**2 + + +if __name__ == "__main__": + cluster = SLURMCluster( + cores=1, + memory="4 GB", + queue="cpu_short", + # interface="eth0", + walltime=f"00:10:00", + # job_script_prologue=[ + # "ulimit -c 0", + # ] + log_directory="tmp/mwe/slurm" + + ) + cluster.scale(jobs=10) + + scenario = Scenario( + cs, + deterministic=True, + walltime_limit=600, + n_trials=400, + use_default_config=True, + crash_cost = 2, + trial_walltime_limit=1000, + n_workers=2, + + ) + + smac = AlgorithmConfigurationFacade( + scenario, + run_trial, + overwrite=True, + dask_client=None#cluster.get_client(), + ) + smac.intensifier._retries = 10**6 + time.sleep(10) + incumbent = smac.optimize() + + logging.info(f"Incumbent: {incumbent.get_dictionary()}") \ No newline at end of file From 77b929fb8a96d97627d3e96ab4579833c3f3c8fe Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 12 Jun 2023 11:46:17 +0200 Subject: [PATCH 5/8] Update CHANGELOG.md --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56feda166b..7ff22b7210 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ ## Bugfixes - Fix bug in the incumbent selection in the case that multi-fidelity is combined with multi-objective (#1019). +## Features +- Log to WandB (#1037) + # 2.0.1 ## Improvements From ba5a5c1ecf277f2cf2b3cdb69a264301d4f85acd Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 12 Jun 2023 11:47:32 +0200 Subject: [PATCH 6/8] Delete spurious file --- mwe.py | 62 ---------------------------------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 mwe.py diff --git a/mwe.py b/mwe.py deleted file mode 100644 index 9199073f96..0000000000 --- a/mwe.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python3 - -import logging -import time -from pathlib import Path -import random - -from ConfigSpace import Configuration, ConfigurationSpace, Float -from smac import AlgorithmConfigurationFacade, Scenario -from dask_jobqueue import SLURMCluster - -cs = ConfigurationSpace(seed=0) -cs.add_hyperparameters([ - Float("x", [0, 1], default=0.75), -]) - -def run_trial(config: Configuration, seed: int = 0) -> float: - x = config["x"] - path = Path(f"tmp/mwe/logs/log.{x}") - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(f"start with {x=}") - return (x-0.5)**2 - - -if __name__ == "__main__": - cluster = SLURMCluster( - cores=1, - memory="4 GB", - queue="cpu_short", - # interface="eth0", - walltime=f"00:10:00", - # job_script_prologue=[ - # "ulimit -c 0", - # ] - log_directory="tmp/mwe/slurm" - - ) - cluster.scale(jobs=10) - - scenario = Scenario( - cs, - deterministic=True, - walltime_limit=600, - n_trials=400, - use_default_config=True, - crash_cost = 2, - trial_walltime_limit=1000, - n_workers=2, - - ) - - smac = AlgorithmConfigurationFacade( - scenario, - run_trial, - overwrite=True, - dask_client=None#cluster.get_client(), - ) - smac.intensifier._retries = 10**6 - time.sleep(10) - incumbent = smac.optimize() - - logging.info(f"Incumbent: {incumbent.get_dictionary()}") \ No newline at end of file From 48a33661efe0ab3109d5ddeb2c39c61daf933c94 Mon Sep 17 00:00:00 2001 From: Carolin Benjamins Date: Mon, 12 Jun 2023 12:40:21 +0200 Subject: [PATCH 7/8] Move callback to callback folder --- .../6_advanced_features/1_wandb_logging.py | 74 ++----------------- smac/callback/__init__.py | 2 + smac/callback/wandb_logging.py | 62 ++++++++++++++++ 3 files changed, 69 insertions(+), 69 deletions(-) create mode 100644 smac/callback/wandb_logging.py diff --git a/examples/6_advanced_features/1_wandb_logging.py b/examples/6_advanced_features/1_wandb_logging.py index 3a5b73e091..9ce045851e 100644 --- a/examples/6_advanced_features/1_wandb_logging.py +++ b/examples/6_advanced_features/1_wandb_logging.py @@ -7,79 +7,15 @@ """ from __future__ import annotations -from ConfigSpace import Configuration, ConfigurationSpace - import numpy as np -from smac import HyperparameterOptimizationFacade, Scenario +from ConfigSpace import Configuration, ConfigurationSpace from sklearn import datasets -from sklearn.svm import SVC from sklearn.model_selection import cross_val_score +from sklearn.svm import SVC -from smac import Callback import smac -from wandb import Table - -from smac.runhistory import TrialInfo, TrialValue -from dataclasses import asdict - - -class WandBCallback(Callback): - def __init__( - self, - project: str, - entity: str, - id: str | None = None, - outdir: str | None = None, - mode: str | None = None, - resume: str = "allow", - job_type: str | None = None, - group: str | None = None, - config: dict | str | None = None, - save_code: bool = True, - **kwargs - ) -> None: - import wandb - self.run = wandb.init( - id=id, - resume=resume, - mode=mode, - project=project, - job_type=job_type, - entity=entity, - group=group, - dir=outdir, - config=config, - save_code=save_code, - **kwargs - ) - super().__init__() - - def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialValue) -> bool | None: - info_dict = asdict(info) - info_dict["config"] = info_dict["config"].get_dictionary() - value_dict = asdict(value) - log_dict = info_dict | value_dict - log_dict["step"] = smbo.runhistory.finished - self.run.log(data=log_dict) - return super().on_tell_end(smbo, info, value) - - - def on_end(self, smbo: smac.main.smbo.SMBO) -> None: - intensifier_data = smbo.intensifier.get_data() - trajectory = intensifier_data["trajectory"] - import pandas as pd - df = pd.DataFrame(data=trajectory) - print(df) - # trajectory = Table(dataframe=df, allow_mixed_types=True) - df["costs"] = df["costs"].apply(lambda x: x[0]) # TODO properly log multi costs - for index, row in df.iterrows(): - print(dict(row)) - self.run.log(dict(row)) - self.run.finish() - return super().on_end(smbo) - - - +from smac import HyperparameterOptimizationFacade, Scenario +from smac.callback import WandBCallback iris = datasets.load_iris() @@ -103,4 +39,4 @@ def train(config: Configuration, seed: int = 0) -> float: # Use SMAC to find the best configuration/hyperparameters smac = HyperparameterOptimizationFacade(scenario, train, callbacks=[wandb_callback], overwrite=True) -incumbent = smac.optimize() \ No newline at end of file +incumbent = smac.optimize() diff --git a/smac/callback/__init__.py b/smac/callback/__init__.py index 73e9dc6e47..c23d3bc291 100644 --- a/smac/callback/__init__.py +++ b/smac/callback/__init__.py @@ -1,7 +1,9 @@ from smac.callback.callback import Callback from smac.callback.metadata_callback import MetadataCallback +from smac.callback.wandb_logging import WandBCallback __all__ = [ "Callback", "MetadataCallback", + "WandBCallback", ] diff --git a/smac/callback/wandb_logging.py b/smac/callback/wandb_logging.py new file mode 100644 index 0000000000..ecb65c09c6 --- /dev/null +++ b/smac/callback/wandb_logging.py @@ -0,0 +1,62 @@ +from dataclasses import asdict + +import smac +from smac import Callback +from smac.runhistory import TrialInfo, TrialValue + + +class WandBCallback(Callback): + def __init__( + self, + project: str, + entity: str, + id: str | None = None, + outdir: str | None = None, + mode: str | None = None, + resume: str = "allow", + job_type: str | None = None, + group: str | None = None, + config: dict | str | None = None, + save_code: bool = True, + **kwargs, + ) -> None: + import wandb + + self.run = wandb.init( + id=id, + resume=resume, + mode=mode, + project=project, + job_type=job_type, + entity=entity, + group=group, + dir=outdir, + config=config, + save_code=save_code, + **kwargs, + ) + super().__init__() + + def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialValue) -> bool | None: + info_dict = asdict(info) + info_dict["config"] = info_dict["config"].get_dictionary() + value_dict = asdict(value) + log_dict = info_dict | value_dict + log_dict["step"] = smbo.runhistory.finished + self.run.log(data=log_dict) + return super().on_tell_end(smbo, info, value) + + def on_end(self, smbo: smac.main.smbo.SMBO) -> None: + intensifier_data = smbo.intensifier.get_data() + trajectory = intensifier_data["trajectory"] + import pandas as pd + + df = pd.DataFrame(data=trajectory) + print(df) + # trajectory = Table(dataframe=df, allow_mixed_types=True) + df["costs"] = df["costs"].apply(lambda x: x[0]) # TODO properly log multi costs + for index, row in df.iterrows(): + print(dict(row)) + self.run.log(dict(row)) + self.run.finish() + return super().on_end(smbo) From 7c5946c0163170ea09849b97226f367989dd8449 Mon Sep 17 00:00:00 2001 From: Helena Graf Date: Tue, 30 Jul 2024 16:15:56 +0200 Subject: [PATCH 8/8] Add docstrings, explain example more, fix circular import --- .../6_advanced_features/1_wandb_logging.py | 11 ++++- smac/callback/wandb_logging.py | 43 +++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/examples/6_advanced_features/1_wandb_logging.py b/examples/6_advanced_features/1_wandb_logging.py index 9ce045851e..994d8f5f00 100644 --- a/examples/6_advanced_features/1_wandb_logging.py +++ b/examples/6_advanced_features/1_wandb_logging.py @@ -2,7 +2,16 @@ Use Weights and Biases for logging ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This example shows how to use Weights and Biases for logging. +This example shows how to use Weights and Biases (WandB) for logging. + +To use WandB, you need to install the package via pip: + +.. code-block:: bash + + pip install wandb + +Then you can use the WandBCallback to log the results of the optimization as well as intermediate information to WandB. +This is done by creating a WandBCallback object and passing it to the used Facade. """ from __future__ import annotations diff --git a/smac/callback/wandb_logging.py b/smac/callback/wandb_logging.py index ecb65c09c6..54e5c35a84 100644 --- a/smac/callback/wandb_logging.py +++ b/smac/callback/wandb_logging.py @@ -1,11 +1,46 @@ +from typing import Any + from dataclasses import asdict import smac -from smac import Callback +from smac.callback import Callback from smac.runhistory import TrialInfo, TrialValue class WandBCallback(Callback): + """ + + Callback to log the results of the optimization as well as intermediate information to WandB. + + Logs TrialInfo, TrialValue and the number of successfully executed trials (as step) to WandB `on_tell_end`. + Upon the end of the run, logs the trajectory of the intensifier to WandB. + + Parameters + ---------- + project : str + The project name of the WandB project. + entity : str + The entity name of the WandB project. + id : str, optional + The id of the run. + outdir : str, optional + The output directory of the WandB run. + mode : str, optional + The mode of the WandB run. + resume : str, optional + The resume mode of the WandB run. + job_type : str, optional + The job type of the WandB run. + group : str, optional + The group of the WandB run. + config : dict or str, optional + The configuration of the WandB run. + save_code : bool, optional + Whether to save the code of the WandB run. + **kwargs : dict + Additional arguments to pass to the WandB run. + """ + def __init__( self, project: str, @@ -18,7 +53,7 @@ def __init__( group: str | None = None, config: dict | str | None = None, save_code: bool = True, - **kwargs, + **kwargs: dict[str, Any], ) -> None: import wandb @@ -37,7 +72,7 @@ def __init__( ) super().__init__() - def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialValue) -> bool | None: + def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialValue) -> bool | None: # noqa: D102 info_dict = asdict(info) info_dict["config"] = info_dict["config"].get_dictionary() value_dict = asdict(value) @@ -46,7 +81,7 @@ def on_tell_end(self, smbo: smac.main.smbo.SMBO, info: TrialInfo, value: TrialVa self.run.log(data=log_dict) return super().on_tell_end(smbo, info, value) - def on_end(self, smbo: smac.main.smbo.SMBO) -> None: + def on_end(self, smbo: smac.main.smbo.SMBO) -> None: # noqa: D102 intensifier_data = smbo.intensifier.get_data() trajectory = intensifier_data["trajectory"] import pandas as pd