Update import of ids_for_prompt and fix some formatting

andrea-fasoli · andrea-fasoli · commit d386fc97edd9 · 2025-06-20T13:37:32.000-04:00
Signed-off-by: Andrea Fasoli &lt;andrea.fasoli@ibm.com&gt;
diff --git a/aiu_fms_testing_utils/utils/__init__.py b/aiu_fms_testing_utils/utils/__init__.py
@@ -37,17 +37,17 @@ def __download_file(url, filename):
     try:
         response = requests.get(url, stream=True)
         response.raise_for_status()
-        
+
         with open(filename, 'wb') as file:
             for chunk in response.iter_content(chunk_size=8192):
                 file.write(chunk)
         print(f"Successfully downloaded {filename}")
-    
+
     except requests.exceptions.RequestException as e:
         print(f"An error occurred: {e}")
 
 def __sample_requests(
-    prompt_list: List[str], 
+    prompt_list: List[str],
     num_requests: int,
     tokenizer: BaseTokenizer,
     prompt_length_min: int = 32,
@@ -67,16 +67,14 @@ def __sample_requests(
         # Tokenize the prompts and completions.
         prompt = prompt_list[i]
         prompt_token_ids = ids_for_prompt(prompt, tokenizer)
-        
+
         prompt_len = len(prompt_token_ids)
         if prompt_len < prompt_length_min or prompt_len > prompt_length_max:
             # Prune too short or too long sequences.
             continue
         filtered_dataset.append((prompt, prompt_len))
-    
-    return filtered_dataset
-    
 
+    return filtered_dataset
 
 def sample_sharegpt_requests(
     dataset_path: str,
@@ -96,15 +94,15 @@ def sample_sharegpt_requests(
     # Filter out the conversations with less than 2 turns.
     dataset = [data for data in dataset if len(data["conversations"]) >= 2]
     dataset = [data["conversations"][0]["value"] for data in dataset]
-    
+
     return __sample_requests(dataset, num_requests, tokenizer, prompt_length_min, prompt_length_max, seed)
 
 def sample_squad_v2_qa_requests(
     dataset_path: str,
-    num_requests: int, 
-    tokenizer: BaseTokenizer, 
-    prompt_length_min: int = 32, 
-    prompt_length_max: int = 64, 
+    num_requests: int,
+    tokenizer: BaseTokenizer,
+    prompt_length_min: int = 32,
+    prompt_length_max: int = 64,
     seed: Optional[int] = None
 ) -> List[Tuple[str, int]]:
     from datasets import load_dataset
@@ -113,10 +111,10 @@ def sample_squad_v2_qa_requests(
         ds = load_dataset(dataset_path)['train']
     else:
         ds = load_dataset("rajpurkar/squad_v2", cache_dir=dataset_path)['train']
-        
-    
+
+
     ds = [f"{data['context']}\n{data['question']}" for data in ds]
 
     return __sample_requests(ds, num_requests, tokenizer, prompt_length_min, prompt_length_max, seed)
-    
+
 
diff --git a/aiu_fms_testing_utils/utils/decoders_utils.py b/aiu_fms_testing_utils/utils/decoders_utils.py
@@ -15,7 +15,7 @@
 import torch
 
 # Local Packages
-from aiu_fms_testing_utils.utils import warmup_model
+from aiu_fms_testing_utils.utils import ids_for_prompt, warmup_model
 from aiu_fms_testing_utils.utils.aiu_setup import dprint, local_rank
 
 
@@ -34,12 +34,10 @@ def __init__(
         self.args = args
         self.device = device
 
-        self.add_special_tokens = False
         self.has_padding = True
         self.max_len = 0
         self.extra_generation_kwargs = {}
 
-        # !!! Inference arguments (hardcoded, as in the original script)
         self.do_sample = [False]
         self.use_cache = [args.no_use_cache]  # True/False identical with greedy iff `torch.use_deterministic_algorithms(True)`
 
@@ -83,10 +81,6 @@ def process_eval_set(self):
         """
 
         args = self.args
-        self.add_special_tokens = (
-            self.tokenizer.bos_token_id != self.tokenizer.eos_token_id
-        )
-
         if args.prompt_path != "":
             # Before creating the Path object, check if prompt_path has a glob pattern
             if isinstance(args.prompt_path, str):
@@ -114,50 +108,69 @@ def process_eval_set(self):
                 prompt_file_paths = [prompt_path]
 
             # Check if we found some files
-            assert len(prompt_file_paths) > 0, f"Can't find any prompt files at {prompt_path}"
+            assert len(prompt_file_paths) > 0, (
+                f"Can't find any prompt files at {prompt_path}"
+            )
 
             # Check if we have enough files
-            assert (
-                len(prompt_file_paths) >= args.batch_size
-            ), f"Not enough prompt files at {prompt_path} for a batch size of {args.batch_size}"
+            assert len(prompt_file_paths) >= args.batch_size, (
+                f"Not enough prompt files at {prompt_path} "
+                f"for a batch size of {args.batch_size}"
+            )
 
             prompts = []
             for i, prompt_file_path in enumerate(prompt_file_paths):
                 if i == args.batch_size:
                     break
-                prompts.append(self.ids_for_prompt(prompt_file_path.read_text(encoding="utf-8")))
+                prompts.append(
+                    ids_for_prompt(
+                        prompt_file_path.read_text(encoding="utf-8"),
+                        self.tokenizer,
+                    )
+                )
         else:
             if args.prompt_type == "chat":
-                template = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{}\n\n### Response:"
-
+                template = (
+                    "Below is an instruction that describes a task. Write a response "
+                    "that appropriately completes the request.\n\n### Instruction:"
+                    "\n{}\n\n### Response:"
+                )
                 prompt1 = template.format(
                     "Provide a list of instructions for preparing chicken soup."
                 )
                 prompt2 = template.format("Explain some popular greetings in Spanish.")
                 prompt3 = template.format("Explain to me why ignorance is bliss.")
                 prompt4 = template.format(
-                    "I have just come into a very large sum of money. Provide me a list of things that I can do with my new found wealth."
+                    "I have just come into a very large sum of money. Provide me a "
+                    "list of things that I can do with my new found wealth."
                 )
             elif args.prompt_type == "code":
-                template = "[INST] Write code to solve the following coding problem that obeys the constraints and passes the example test cases. Please wrap your code answer using ```:\n{}\n[/INST]"
+                template = (
+                    "[INST] Write code to solve the following coding problem that "
+                    "obeys the constraints and passes the example test cases. "
+                    "Please wrap your code answer using ```:\n{}\n[/INST]"
+                )
                 prompt1 = template.format("Write a bubble sort function in python.")
                 prompt2 = template.format(
-                    "Using the Java streams API, write a simple function which will get the cumulative sum of a list of integers."
+                    "Using the Java streams API, write a simple function which will "
+                    "get the cumulative sum of a list of integers."
                 )
                 prompt3 = template.format(
-                    "In bash, how do I list all directories and sub-directories which contain a .py file."
+                    "In bash, how do I list all directories and sub-directories which "
+                    "contain a .py file."
                 )
                 prompt4 = template.format(
-                    "Write a simple decorator in python which will modify all string inputs to ints if possible."
+                    "Write a simple decorator in python which will modify all string "
+                    "inputs to ints if possible."
                 )
             else:
                 dprint("prompt_type must be one of chat or code")
                 exit()
 
-            prompt1 = self.ids_for_prompt(prompt1)
-            prompt2 = self.ids_for_prompt(prompt2)
-            prompt3 = self.ids_for_prompt(prompt3)
-            prompt4 = self.ids_for_prompt(prompt4)
+            prompt1 = ids_for_prompt(prompt1, self.tokenizer)
+            prompt2 = ids_for_prompt(prompt2, self.tokenizer)
+            prompt3 = ids_for_prompt(prompt3, self.tokenizer)
+            prompt4 = ids_for_prompt(prompt4, self.tokenizer)
             prompts = [prompt1, prompt2, prompt3, prompt4]
             prompts = prompts * ((args.batch_size // 4) + 1)
             prompts = prompts[: args.batch_size]