Revert "test: skip HF API live integration tests (#8889)" (#8914)

julian-risch · davidsbatista · web-flow · commit 6652dd7550bc · 2025-02-25T09:03:20.000+01:00
* Revert "test: skip HF API live integration tests (#8889)" This reverts commit 56a3a9b. * Replace zephyr-7b-beta model with SmolLM2-1.7B-Instruct * Use zephyr-7b-beta model but extend instructions --------- Co-authored-by: David S. Batista <dsbatista@gmail.com>
diff --git a/test/components/embedders/test_hugging_face_api_document_embedder.py b/test/components/embedders/test_hugging_face_api_document_embedder.py
@@ -358,7 +358,10 @@ def test_run_custom_batch_size(self, mock_check_valid_model):
 
     @pytest.mark.flaky(reruns=5, reruns_delay=5)
     @pytest.mark.integration
-    @pytest.mark.skip(reason="Temporarily skipped due to limits on HF API requests.")
+    @pytest.mark.skipif(
+        not os.environ.get("HF_API_TOKEN", None),
+        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
+    )
     def test_live_run_serverless(self):
         docs = [
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
diff --git a/test/components/embedders/test_hugging_face_api_text_embedder.py b/test/components/embedders/test_hugging_face_api_text_embedder.py
@@ -187,7 +187,10 @@ def test_run_wrong_embedding_shape(self, mock_check_valid_model):
 
     @pytest.mark.flaky(reruns=5, reruns_delay=5)
     @pytest.mark.integration
-    @pytest.mark.skip(reason="Temporarily skipped due to limits on HF API requests.")
+    @pytest.mark.skipif(
+        not os.environ.get("HF_API_TOKEN", None),
+        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
+    )
     def test_live_run_serverless(self):
         embedder = HuggingFaceAPITextEmbedder(
             api_type=HFEmbeddingAPIType.SERVERLESS_INFERENCE_API,
diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py
@@ -526,7 +526,10 @@ def test_run_with_tools(self, mock_check_valid_model, tools):
         }
 
     @pytest.mark.integration
-    @pytest.mark.skip(reason="Temporarily skipped due to limits on HF API requests.")
+    @pytest.mark.skipif(
+        not os.environ.get("HF_API_TOKEN", None),
+        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
+    )
     @pytest.mark.flaky(reruns=3, reruns_delay=10)
     def test_live_run_serverless(self):
         generator = HuggingFaceAPIChatGenerator(
@@ -547,7 +550,10 @@ def test_live_run_serverless(self):
         assert "completion_tokens" in response["replies"][0].meta["usage"]
 
     @pytest.mark.integration
-    @pytest.mark.skip(reason="Temporarily skipped due to limits on HF API requests.")
+    @pytest.mark.skipif(
+        not os.environ.get("HF_API_TOKEN", None),
+        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
+    )
     @pytest.mark.flaky(reruns=3, reruns_delay=10)
     def test_live_run_serverless_streaming(self):
         generator = HuggingFaceAPIChatGenerator(
@@ -573,7 +579,13 @@ def test_live_run_serverless_streaming(self):
         assert "completion_tokens" in response_meta["usage"]
 
     @pytest.mark.integration
-    @pytest.mark.skip(reason="Temporarily skipped due to limits on HF API requests.")
+    @pytest.mark.skipif(
+        not os.environ.get("HF_API_TOKEN", None),
+        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
+    )
+    @pytest.mark.xfail(
+        reason="The Hugging Face API can be unstable and this test may fail intermittently", strict=False
+    )
     def test_live_run_with_tools(self, tools):
         """
         We test the round trip: generate tool call, pass tool message, generate response.
diff --git a/test/components/generators/test_hugging_face_api.py b/test/components/generators/test_hugging_face_api.py
@@ -291,7 +291,10 @@ def mock_iter(self):
 
     @pytest.mark.flaky(reruns=5, reruns_delay=5)
     @pytest.mark.integration
-    @pytest.mark.skip(reason="Temporarily skipped due to limits on HF API requests.")
+    @pytest.mark.skipif(
+        not os.environ.get("HF_API_TOKEN", None),
+        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
+    )
     def test_run_serverless(self):
         generator = HuggingFaceAPIGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@@ -312,16 +315,21 @@ def test_run_serverless(self):
         assert len(response["meta"]) > 0
         assert [isinstance(meta, dict) for meta in response["meta"]]
 
+    @pytest.mark.flaky(reruns=5, reruns_delay=5)
     @pytest.mark.integration
-    @pytest.mark.skip(reason="Temporarily skipped due to limits on HF API requests.")
+    @pytest.mark.skipif(
+        not os.environ.get("HF_API_TOKEN", None),
+        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
+    )
     def test_live_run_streaming_check_completion_start_time(self):
         generator = HuggingFaceAPIGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
             api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
+            generation_kwargs={"max_new_tokens": 30},
             streaming_callback=streaming_callback_handler,
         )
 
-        results = generator.run("What is the capital of France?")
+        results = generator.run("You are a helpful agent that answers questions. What is the capital of France?")
 
         assert len(results["replies"]) == 1
         assert "Paris" in results["replies"][0]