huggingface
diff --git a/‎.github/workflows/tests.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/tests.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_common_gpu.py
Lines changed: 2 additions & 3 deletions b/‎tests/test_common_gpu.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎tests/test_decoder_models.py
Lines changed: 3 additions & 3 deletions b/‎tests/test_decoder_models.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎tests/test_gptqmodel.py
Lines changed: 4 additions & 4 deletions b/‎tests/test_gptqmodel.py
Lines changed: 4 additions & 4 deletions
@@ -11,6 +11,7 @@ on:
 
 env:
   HF_HOME: .cache/huggingface
+  TRANSFORMERS_IS_CI: 1
 
 permissions: {}
 
 
@@ -18,7 +18,6 @@
 import pytest
 import torch
 import torch.nn.functional as F
-from datasets import load_dataset
 from parameterized import parameterized
 from torch import nn
 from transformers import (
@@ -59,6 +58,7 @@
 
 from .testing_utils import (
     device_count,
+    load_cat_image,
     require_bitsandbytes,
     require_multi_accelerator,
     require_non_cpu,
@@ -1416,8 +1416,7 @@ def test_apply_GS_hra_conv2d_inference(self):
         # check for different result with and without apply_GS
         model_id = "microsoft/resnet-18"
         image_processor = AutoImageProcessor.from_pretrained(model_id)
-        dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
-        image = dataset["test"]["image"][0]
+        image = load_cat_image()
         data = image_processor(image, return_tensors="pt")
 
         model = AutoModelForImageClassification.from_pretrained(model_id).eval()
 
@@ -17,7 +17,6 @@
 
 import pytest
 import torch
-from datasets import load_dataset
 from parameterized import parameterized
 from transformers import (
     AutoModelForCausalLM,
@@ -43,6 +42,7 @@
 
 from .testing_common import PeftCommonTester
 from .testing_common import PeftTestConfigManagerForDecoderModels as PeftTestConfigManager
+from .testing_utils import load_dataset_english_quotes
 
 
 PEFT_DECODER_MODELS_TO_TEST = [
@@ -526,7 +526,7 @@ def process(samples):
             tokenized = tokenizer(samples["quote"], truncation=True, max_length=128)
             return tokenized
 
-        data = load_dataset("ybelkada/english_quotes_copy")
+        data = load_dataset_english_quotes()
         data = data.map(process, batched=True)
 
         with tempfile.TemporaryDirectory() as tmp_dirname:
@@ -579,7 +579,7 @@ def process(samples):
             tokenized = tokenizer(samples["quote"], truncation=True, max_length=128)
             return tokenized
 
-        data = load_dataset("ybelkada/english_quotes_copy")
+        data = load_dataset_english_quotes()
         data = data.map(process, batched=True)
 
         with tempfile.TemporaryDirectory() as tmp_dirname:
 
@@ -20,7 +20,6 @@
 
 import pytest
 import torch
-from datasets import load_dataset
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
@@ -40,6 +39,7 @@
 from peft.utils import SAFETENSORS_WEIGHTS_NAME, infer_device
 
 from .testing_utils import (
+    load_dataset_english_quotes,
     require_gptqmodel,
     require_optimum,
     require_torch_multi_gpu,
@@ -158,7 +158,7 @@ def test_causal_lm_training(self):
             )
             model = get_peft_model(model, config)
 
-            data = load_dataset("ybelkada/english_quotes_copy")
+            data = load_dataset_english_quotes()
             data = data.map(lambda samples: self.tokenizer(samples["quote"]), batched=True)
 
             trainer = Trainer(
@@ -221,7 +221,7 @@ def test_adalora_causalLM(self):
 
         model = get_peft_model(model, peft_config)
 
-        data = load_dataset("ybelkada/english_quotes_copy")
+        data = load_dataset_english_quotes()
         data = data.map(lambda samples: self.tokenizer(samples["quote"]), batched=True)
         batch = tokenizer(data["train"][:3]["quote"], return_tensors="pt", padding=True)
         self._check_inference_finite(model, batch)
@@ -287,7 +287,7 @@ def test_causal_lm_training_multi_gpu(self):
 
             model = get_peft_model(model, config)
 
-            data = load_dataset("Abirate/english_quotes")
+            data = load_dataset_english_quotes()
             data = data.map(lambda samples: self.tokenizer(samples["quote"]), batched=True)
 
             trainer = Trainer(