fix import issue

divyashreepathihalli · divyashreepathihalli · commit a1ac51b32fa4 · 2025-05-28T19:50:37.000Z
diff --git a/examples/demo_jax_distributed.py b/examples/demo_jax_distributed.py
@@ -12,7 +12,6 @@
 import jax.numpy as jnp
 import tensorflow as tf  # just for tf.data
 import keras  # Keras multi-backend
-from flax import nnx
 import numpy as np
 from tqdm import tqdm
 
@@ -264,7 +263,7 @@ def compute_loss(trainable_variables, non_trainable_variables, x, y):
 
 
 # Training step: Keras provides a pure functional optimizer.stateless_apply
-@nnx.jit
+@jax.jit
 def train_step(train_state, x, y):
     (loss_value, non_trainable_variables), grads = compute_gradients(
         train_state.trainable_variables,
@@ -302,7 +301,7 @@ def train_step(train_state, x, y):
 sharded_data = jax.device_put(data.numpy(), data_sharding)
 
 
-@nnx.jit
+@jax.jit
 def predict(data):
     predictions, updated_non_trainable_variables = model.stateless_call(
         device_train_state.trainable_variables,
diff --git a/guides/distributed_training_with_jax.py b/guides/distributed_training_with_jax.py
@@ -48,7 +48,7 @@
 import numpy as np
 import tensorflow as tf
 import keras
-from flax import nnx
+import flax
 from jax.experimental import mesh_utils
 from jax.sharding import Mesh
 from jax.sharding import NamedSharding
@@ -186,7 +186,7 @@ def compute_loss(trainable_variables, non_trainable_variables, x, y):
 
 
 # Training step, Keras provides a pure functional optimizer.stateless_apply
-@nnx.jit
+@flax.jax.jit
 def train_step(train_state, x, y):
     (
         trainable_variables,
diff --git a/keras/src/backend/jax/layer.py b/keras/src/backend/jax/layer.py
@@ -1,6 +1,3 @@
-from flax import nnx
-
-
 class JaxLayer(nnx.Module):
     def __init_subclass__(cls):
         super().__init_subclass__()
diff --git a/keras/src/backend/jax/trainer.py b/keras/src/backend/jax/trainer.py
@@ -5,7 +5,6 @@
 
 import jax
 import numpy as np
-from flax import nnx
 
 from keras.src import backend
 from keras.src import callbacks as callbacks_module
@@ -234,7 +233,7 @@ def concatenate(outputs):
                     return output
 
                 if not self.run_eagerly and self.jit_compile:
-                    concatenate = nnx.jit(concatenate)
+                    concatenate = jax.jit(concatenate)
 
                 def iterator_step(state, iterator):
                     data = next(iterator)
@@ -278,7 +277,7 @@ def make_train_function(self, force=False):
             # so that jax will reuse the memory buffer for outputs.
             # This will reduce the memory usage of the training function by
             # half.
-            train_step = nnx.jit(self.train_step, donate_argnums=0)
+            train_step = jax.jit(self.train_step, donate_argnums=0)
         else:
             train_step = self.train_step
 
@@ -294,7 +293,7 @@ def make_test_function(self, force=False):
             # so that jax will reuse the memory buffer for outputs.
             # This will reduce the memory usage of the training function by
             # half.
-            test_step = nnx.jit(self.test_step, donate_argnums=0)
+            test_step = jax.jit(self.test_step, donate_argnums=0)
         else:
             test_step = self.test_step
 
@@ -311,7 +310,7 @@ def predict_step(state, data):
             return outputs, (state[0], non_trainable_variables)
 
         if not self.run_eagerly and self.jit_compile:
-            predict_step = nnx.jit(predict_step, donate_argnums=0)
+            predict_step = jax.jit(predict_step, donate_argnums=0)
 
         _step_function = self._make_function(
             predict_step, concatenate_outputs=True
@@ -905,7 +904,7 @@ def _enforce_jax_state_sharding(
 
         Since the output of the train/eval step will be used as inputs to next
         step, we need to ensure that they have the same sharding spec, so that
-        nnx.jit won't have to recompile the train/eval function.
+        jax.jit won't have to recompile the train/eval function.
 
         Note that this function will also rely on the recorded sharding spec
         for each of states.
diff --git a/keras/src/random/random_test.py b/keras/src/random/random_test.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pytest
 from absl.testing import parameterized
-from flax import nnx
 
 import keras
 from keras.src import backend
@@ -385,7 +384,7 @@ def test_dropout_jax_jit_stateless(self):
 
         x = ops.ones(3)
 
-        @nnx.jit
+        @jax.jit
         def train_step(x):
             with keras.src.backend.StatelessScope():
                 x = keras.layers.Dropout(rate=0.1)(x, training=True)
@@ -414,7 +413,7 @@ def test_jax_rngkey_seed(self):
         reason="This test requires `jax` as the backend.",
     )
     def test_jax_unseed_disallowed_during_tracing(self):
-        @nnx.jit
+        @jax.jit
         def jit_fn():
             return random.randint((2, 2), 0, 10, seed=None)
 
diff --git a/keras/src/random/seed_generator_test.py b/keras/src/random/seed_generator_test.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pytest
-from flax import nnx
 
 from keras.src import backend
 from keras.src import ops
@@ -79,7 +78,7 @@ def test_seed_generator_unexpected_kwargs(self):
         backend.backend() != "jax", reason="This test requires the JAX backend"
     )
     def test_jax_tracing_with_global_seed_generator(self):
-        @nnx.jit
+        @jax.jit
         def traced_function():
             return seed_generator.global_seed_generator().next()