scikit-learn-contrib · mathurinm · Oct 22, 2022 · Oct 12, 2022 · Oct 14, 2022 · Oct 14, 2022
diff --git a/skglm/solvers/__init__.py b/skglm/solvers/__init__.py
@@ -1,9 +1,10 @@
 from .anderson_cd import AndersonCD
 from .base import BaseSolver
+from .fista import FISTA
 from .gram_cd import GramCD
 from .group_bcd import GroupBCD
 from .multitask_bcd import MultiTaskBCD
 from .prox_newton import ProxNewton
 
 
-__all__ = [AndersonCD, BaseSolver, GramCD, GroupBCD, MultiTaskBCD, ProxNewton]
+__all__ = [AndersonCD, BaseSolver, FISTA, GramCD, GroupBCD, MultiTaskBCD, ProxNewton]
diff --git a/skglm/solvers/fista.py b/skglm/solvers/fista.py
@@ -0,0 +1,67 @@
+import numpy as np
+from numba import njit
+from skglm.solvers.base import BaseSolver
+
+
+@njit
+def _prox_vec(w, z, penalty, lipschitz):
+    # XXX: TO DISCUSS: should add a vectorized prox update
+    n_features = w.shape[0]
+    for j in range(n_features):
+        w[j] = penalty.prox_1d(z[j], 1 / lipschitz, j)
+    return w
+
+
+class FISTA(BaseSolver):
+    r"""ISTA solver with Nesterov acceleration (FISTA)."""
+
+    def __init__(self, max_iter=100, tol=1e-4, fit_intercept=False, warm_start=False,
+                 opt_freq=100, verbose=0):
+        self.max_iter = max_iter
+        self.tol = tol
+        self.fit_intercept = fit_intercept
+        self.warm_start = warm_start
+        self.opt_freq = opt_freq
+        self.verbose = verbose
+
+    def solve(self, X, y, penalty, w_init=None, weights=None):
+        # needs a quadratic datafit, but works with L1, WeightedL1, SLOPE
+        n_samples, n_features = X.shape
+        all_features = np.arange(n_features)
+        t_new = 1
+
+        w = w_init.copy() if w_init is not None else np.zeros(n_features)
+        z = w_init.copy() if w_init is not None else np.zeros(n_features)
+        weights = weights if weights is not None else np.ones(n_features)
+
+        # FISTA with Gram update
+        G = X.T @ X
+        Xty = X.T @ y
+        lipschitz = np.linalg.norm(X, ord=2) ** 2 / n_samples
+
+        for n_iter in range(self.max_iter):
+            t_old = t_new
+            t_new = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2
+            w_old = w.copy()
+            grad = (G @ z - Xty) / n_samples
+            z -= grad / lipschitz
+            w = _prox_vec(w, z, penalty, lipschitz)
+            z = w + (t_old - 1.) / t_new * (w - w_old)
+
+            if n_iter % self.opt_freq == 0:
+                opt = penalty.subdiff_distance(w, grad, all_features)
+                stop_crit = np.max(opt)
+
+                if self.verbose:
+                    p_obj = (np.sum((y - X @ w) ** 2) / (2 * n_samples)
+                             + penalty.value(w))
+                    print(
+                        f"Iteration {n_iter+1}: {p_obj:.10f}, "
+                        f"stopping crit: {stop_crit:.2e}"
+                    )
+
+                if stop_crit < self.tol:
+                    if self.verbose:
+                        print(f"Stopping criterion max violation: {stop_crit:.2e}")
+                    break
+        return w
diff --git a/toy_fista.py b/toy_fista.py
@@ -0,0 +1,27 @@
+import numpy as np
+from numpy.linalg import norm
+from skglm.solvers import FISTA
+from skglm.penalties import L1
+from skglm.estimators import Lasso
+from skglm.utils import make_correlated_data, compiled_clone
+
+
+X, y, _ = make_correlated_data(n_samples=200, n_features=100, random_state=24)
+
+n_samples, n_features = X.shape
+alpha_max = norm(X.T @ y, ord=np.inf) / n_samples
+
+alpha = alpha_max / 10
+
+max_iter = 1000
+obj_freq = 100
+tol = 1e-10
+
+solver = FISTA(max_iter=max_iter, tol=tol, opt_freq=obj_freq, verbose=1)
+penalty = compiled_clone(L1(alpha))
+w = solver.solve(X, y, penalty)
+
+clf = Lasso(alpha=alpha, tol=tol, fit_intercept=False)
+clf.fit(X, y)
+
+np.testing.assert_allclose(w, clf.coef_, rtol=1e-5)