Merge pull request #24 from armingh2000/fix/fact_scorer_knowledge

armingh2000 · web-flow · commit d52fa7eba7aa · 2024-04-22T11:53:05.000-04:00
Fix/fact scorer knowledge
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -80,6 +80,12 @@ All notable changes to this project will be documented in this file.
 
 - Fix knowledge source in fact scorer prompt (used to send all knowledge sources in one prompt)
 
+## v 1.3.0 - 2024-04-22
+
+- Refactor knowledge source passing in FactScore
+- Update FactScorer prompt (improve performance)
+- Update README.md to contain the new prompt.
+
 <!--
 ### Added
 
diff --git a/FactScoreLite/fact_scorer.py b/FactScoreLite/fact_scorer.py
@@ -34,8 +34,8 @@ def get_instructions(self) -> str:
             str: The instructions for the prompt generation.
         """
 
-        instructions = "Evaluate the truthfulness of the statement based solely on the provided context and provide the reason for your decision.\n\n"
-        instructions += "Instruction:\nOnly consider the statement true if it can be directly verified by the information in the context. If the information in the statement cannot be found in the context or differs from it, label it as false.\n\n"
+        # instructions = "Evaluate the truthfulness of the statement based solely on the provided context.\n\n"
+        instructions = "Instruction:\nOnly consider the statement true if it can be directly verified by the information in the context. If the information in the statement cannot be found in the context or differs from it, label it as false.\n\n"
         true_example = self.demons[0]
         false_example = random.choice(self.demons[1:])
 
diff --git a/FactScoreLite/factscore.py b/FactScoreLite/factscore.py
@@ -98,18 +98,27 @@ def get_decisions(
         decisions = self.decisions_handler.load()
         scores = []
         init_scores = []
-        cur_knw_idx = 0
 
         for entry in decisions:
             score, init_score = self.calculate_score(entry["decision"])
             init_scores.append(init_score)
             scores.append(score)
-            cur_knw_idx += 1
 
-        for entry in tqdm(generation_facts_pairs[len(decisions) :]):
+        assert len(generation_facts_pairs) == len(
+            knowledge_sources
+        ), "Number of generation-facts pairs and knowledge sources should be the same."
+
+        current_index = len(decisions)
+
+        for entry, knowledge_source in tqdm(
+            zip(
+                generation_facts_pairs[current_index:],
+                knowledge_sources[current_index:],
+            )
+        ):
             generation, facts = entry["generation"], entry["facts"]
 
-            decision = self.fact_scorer.get_score(facts, knowledge_sources[cur_knw_idx])
+            decision = self.fact_scorer.get_score(facts, knowledge_source)
             score, init_score = self.calculate_score(decision)
 
             init_scores.append(init_score)
diff --git a/README.md b/README.md
@@ -170,13 +170,9 @@ The following prompt template is used to instruct GPT for scoring facts:
 ```
 # fact_scorer.py
 
-Evaluate the truthfulness of the statement based solely on the provided context and provide the reason for your decision.
-
-
 Instruction:
 Only consider the statement true if it can be directly verified by the information in the context. If the information in the statement cannot be found in the context or differs from it, label it as false.
 
-
 Context:
 knw 1
 Statement:
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = FactScoreLite
-version = 1.2.0
+version = 1.3.0
 author = armingh2000
 author_email = 
 license = MIT
diff --git a/tests/test_fact_scorer.py b/tests/test_fact_scorer.py
@@ -122,7 +122,6 @@ def test_get_instructions_true_false_demons(fact_scorer):
     # Test case for a single demon in self.demons
     fact_scorer.demons = mock_demons_data
     expected_instructions = (
-        "Evaluate the truthfulness of the statement based solely on the provided context and provide the reason for your decision.\n\n"
         "Instruction:\nOnly consider the statement true if it can be directly verified by the information in the context. If the information in the statement cannot be found in the context or differs from it, label it as false.\n\n"
         "Context:\nknw 1\n"
         "Statement:\nfact 1 True or False?\n"