Skip to content

Commit 5209fca

Browse files
committed
impl. 'CMAESOptimizer' public class
1 parent 356281e commit 5209fca

File tree

2 files changed

+261
-0
lines changed

2 files changed

+261
-0
lines changed

src/gradient_free_optimizers/optimizer_search/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# License: MIT License
44

55
from .bayesian_optimization import BayesianOptimizer
6+
from .cma_es import CMAESOptimizer
67
from .differential_evolution import DifferentialEvolutionOptimizer
78
from .direct_algorithm import DirectAlgorithm
89
from .downhill_simplex import DownhillSimplexOptimizer
@@ -26,6 +27,7 @@
2627
from .tree_structured_parzen_estimators import TreeStructuredParzenEstimators
2728

2829
__all__ = [
30+
"CMAESOptimizer",
2931
"HillClimbingOptimizer",
3032
"StochasticHillClimbingOptimizer",
3133
"RepulsingHillClimbingOptimizer",
Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
# Author: Simon Blanke
2+
# Email: simon.blanke@yahoo.com
3+
# License: MIT License
4+
"""CMA-ES using covariance matrix adaptation for continuous domains."""
5+
6+
from typing import Literal
7+
8+
from .._init_utils import get_default_initialize
9+
from ..optimizers import (
10+
CMAESOptimizer as _CMAESOptimizer,
11+
)
12+
from ..search import Search
13+
14+
15+
class CMAESOptimizer(_CMAESOptimizer, Search):
16+
"""
17+
Evolutionary optimizer using covariance matrix adaptation.
18+
19+
CMA-ES (Covariance Matrix Adaptation Evolution Strategy) is a
20+
state-of-the-art evolutionary algorithm for difficult continuous
21+
optimization problems. It adapts a full covariance matrix to learn
22+
the correlation structure of the fitness landscape, enabling
23+
efficient search even when parameters are strongly correlated or
24+
have different sensitivities.
25+
26+
The algorithm maintains a multivariate normal distribution and
27+
iteratively:
28+
29+
1. Samples ``population`` candidate solutions from the distribution
30+
2. Evaluates and ranks them by fitness
31+
3. Updates the distribution mean toward the best solutions
32+
4. Adapts the covariance matrix using evolution paths
33+
5. Controls the global step size via cumulative step-size adaptation
34+
35+
CMA-ES is considered the gold standard for continuous black-box
36+
optimization. For mixed search spaces (discrete, categorical),
37+
this implementation samples in continuous space and rounds to the
38+
nearest valid value, which is a pragmatic compromise.
39+
40+
The algorithm is well-suited for:
41+
42+
- Continuous optimization with correlated parameters
43+
- Problems where parameter sensitivities differ strongly
44+
- Moderate dimensionality (up to ~100 dimensions)
45+
- Multi-modal landscapes (especially with IPOP restart)
46+
47+
Parameters
48+
----------
49+
search_space : dict[str, list]
50+
The search space to explore, defined as a dictionary mapping parameter
51+
names to arrays of possible values.
52+
53+
Each key is a parameter name (string), and each value is a numpy array
54+
or list of discrete values that the parameter can take. The optimizer
55+
will only evaluate positions that are on this discrete grid.
56+
57+
Example: A 2D search space with 100 points per dimension::
58+
59+
search_space = {
60+
"x": np.linspace(-10, 10, 100),
61+
"y": np.linspace(-10, 10, 100),
62+
}
63+
64+
The resolution of each dimension (number of points in the array)
65+
directly affects optimization quality and speed. More points give
66+
finer resolution but increase the search space size exponentially.
67+
initialize : dict[str, int], default={"vertices": 4, "random": 2}
68+
Strategy for generating initial positions before the main optimization
69+
loop begins. Initialization samples are evaluated first, and the best
70+
one becomes the starting point (mean) for the CMA-ES distribution.
71+
72+
Supported keys:
73+
74+
- ``"grid"``: ``int`` -- Number of positions on a regular grid.
75+
- ``"vertices"``: ``int`` -- Number of corner/edge positions of the
76+
search space.
77+
- ``"random"``: ``int`` -- Number of uniformly random positions.
78+
- ``"warm_start"``: ``list[dict]`` -- Specific positions to evaluate,
79+
each as a dict mapping parameter names to values.
80+
81+
Multiple strategies can be combined::
82+
83+
initialize = {"vertices": 4, "random": 10}
84+
initialize = {"warm_start": [{"x": 0.5, "y": 1.0}], "random": 5}
85+
86+
More initialization samples improve the starting point but consume
87+
iterations from ``n_iter``. For expensive objectives, a few targeted
88+
warm-start points are often more efficient than many random samples.
89+
constraints : list[callable], default=[]
90+
A list of constraint functions that restrict the search space. Each
91+
constraint is a callable that receives a parameter dictionary and
92+
returns ``True`` if the position is valid, ``False`` if it should
93+
be rejected.
94+
95+
Rejected positions are discarded and regenerated: the optimizer
96+
resamples a new candidate position (up to 100 retries per step).
97+
During initialization, positions that violate constraints are
98+
filtered out entirely.
99+
100+
Example: Constrain the search to a circular region::
101+
102+
def circular_constraint(para):
103+
return para["x"]**2 + para["y"]**2 <= 25
104+
105+
constraints = [circular_constraint]
106+
107+
Multiple constraints are combined with AND logic (all must return
108+
``True``).
109+
random_state : int or None, default=None
110+
Seed for the random number generator to ensure reproducible results.
111+
112+
- ``None``: Use a new random state each run (non-deterministic).
113+
- ``int``: Seed the random number generator for reproducibility.
114+
115+
Setting a fixed seed is recommended for debugging and benchmarking.
116+
Different seeds may lead to different optimization trajectories,
117+
especially for stochastic optimizers.
118+
rand_rest_p : float, default=0
119+
Probability of performing a random restart instead of the normal
120+
algorithm step. At each iteration, a uniform random number is drawn;
121+
if it falls below ``rand_rest_p``, the optimizer jumps to a random
122+
position instead of following its strategy.
123+
124+
- ``0.0``: No random restarts (pure algorithm behavior).
125+
- ``0.01-0.05``: Light diversification, helps escape shallow local
126+
optima.
127+
- ``0.1-0.3``: Aggressive restarts, useful for highly multi-modal
128+
landscapes.
129+
- ``1.0``: Equivalent to random search.
130+
population : int or None, default=None
131+
Number of candidate solutions sampled per generation (lambda in
132+
CMA-ES notation). If ``None``, uses the standard heuristic:
133+
``4 + floor(3 * ln(n_dimensions))``.
134+
135+
- ``None``: Auto-compute based on dimensionality (recommended).
136+
- ``10-20``: Small populations for fast convergence on simple
137+
problems.
138+
- ``50-100``: Large populations for better exploration on
139+
multimodal or high-dimensional problems.
140+
141+
Each generation requires ``population`` function evaluations,
142+
so total cost per generation scales linearly with this parameter.
143+
mu : int or None, default=None
144+
Number of best solutions selected as parents for the next
145+
generation. If ``None``, uses ``population // 2``.
146+
147+
- ``None``: Auto-compute as half the population (recommended).
148+
- Smaller ``mu``: Stronger selection pressure, faster convergence
149+
but higher risk of premature convergence.
150+
- Larger ``mu``: Weaker selection pressure, better exploration.
151+
152+
Must be less than or equal to ``population``.
153+
sigma : float, default=0.3
154+
Initial step size as a fraction of the normalized search space
155+
range. Controls the initial spread of sampled solutions around
156+
the mean.
157+
158+
- ``0.1``: Conservative, tight initial sampling.
159+
- ``0.3``: Standard starting point (default).
160+
- ``0.5``: Broad initial exploration.
161+
162+
CMA-ES adapts sigma automatically during optimization, so the
163+
initial value is not critical. Values between 0.1 and 0.5
164+
generally work well.
165+
ipop_restart : bool, default=False
166+
Enable IPOP (Increasing Population) restart strategy. When
167+
stagnation is detected (no improvement for many generations),
168+
the algorithm restarts with a doubled population size and a
169+
random starting point.
170+
171+
- ``False``: No restarts, single run (default).
172+
- ``True``: Enable IPOP restarts for better global search on
173+
multimodal landscapes.
174+
175+
IPOP-CMA-ES is particularly effective for problems with many
176+
local optima, as it combines the precision of CMA-ES with
177+
increasingly thorough global search.
178+
179+
Notes
180+
-----
181+
CMA-ES adapts the search distribution using two evolution paths:
182+
183+
- **Cumulation path for sigma** (p_sigma): Controls global step size
184+
via Cumulative Step-size Adaptation (CSA). If steps are correlated
185+
(consistent direction), sigma increases; if anti-correlated
186+
(oscillating), sigma decreases.
187+
- **Cumulation path for C** (p_c): Provides the rank-one update to
188+
the covariance matrix, capturing the dominant search direction.
189+
190+
The covariance matrix is updated via:
191+
192+
- **Rank-one update**: Uses p_c to learn the principal search
193+
direction.
194+
- **Rank-mu update**: Uses all mu selected solutions to learn the
195+
local landscape shape.
196+
197+
For mixed search spaces (discrete/categorical dimensions), the
198+
algorithm operates in a normalized continuous space and maps back
199+
to valid values via rounding. This is a standard approach (MI-CMA-ES)
200+
that preserves the covariance adaptation while supporting non-continuous
201+
parameters.
202+
203+
See Also
204+
--------
205+
EvolutionStrategyOptimizer : Simpler ES with self-adaptive sigma.
206+
DifferentialEvolutionOptimizer : DE using vector differences.
207+
ParticleSwarmOptimizer : Swarm intelligence approach.
208+
209+
Examples
210+
--------
211+
>>> import numpy as np
212+
>>> from gradient_free_optimizers import CMAESOptimizer
213+
214+
>>> def rosenbrock(para):
215+
... x, y = para["x"], para["y"]
216+
... return -(100 * (y - x**2)**2 + (1 - x)**2)
217+
218+
>>> search_space = {
219+
... "x": np.linspace(-5, 5, 1000),
220+
... "y": np.linspace(-5, 5, 1000),
221+
... }
222+
223+
>>> opt = CMAESOptimizer(search_space, population=20, sigma=0.3)
224+
>>> opt.search(rosenbrock, n_iter=500)
225+
"""
226+
227+
def __init__(
228+
self,
229+
search_space: dict[str, list],
230+
initialize: dict[
231+
Literal["grid", "vertices", "random", "warm_start"],
232+
int | list[dict],
233+
] = None,
234+
constraints: list[callable] = None,
235+
random_state: int = None,
236+
rand_rest_p: float = 0,
237+
nth_process: int = None,
238+
population: int = None,
239+
mu: int = None,
240+
sigma: float = 0.3,
241+
ipop_restart: bool = False,
242+
):
243+
if initialize is None:
244+
initialize = get_default_initialize()
245+
if constraints is None:
246+
constraints = []
247+
248+
super().__init__(
249+
search_space=search_space,
250+
initialize=initialize,
251+
constraints=constraints,
252+
random_state=random_state,
253+
rand_rest_p=rand_rest_p,
254+
nth_process=nth_process,
255+
population=population,
256+
mu=mu,
257+
sigma=sigma,
258+
ipop_restart=ipop_restart,
259+
)

0 commit comments

Comments
 (0)