Merge pull request #22 from armingh2000/feature/fact_scorer_demons

armingh2000 · web-flow · commit 49748e648eed · 2024-04-18T22:24:26.000-04:00
Feature/fact scorer demons
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -67,6 +67,15 @@ All notable changes to this project will be documented in this file.
 - Add tests for the fix.
 - Remove unnecessary code.
 
+## v 1.1.0 - 2024-04-18
+
+- Update fact scorer prompt
+- Add tests for fact scorer demon load
+- Fix demon format in atomic facts tests
+- Rename demon files
+- Add fact scorer demons json file
+- Add CONTRIBUTING.md guidelines
+
 <!--
 ### Added
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,55 @@
+# Contributing to FactScoreLite
+
+We love your input! We want to make contributing to this project as easy and transparent as possible, whether it's:
+
+- Reporting a bug
+- Discussing the current state of the code
+- Submitting a fix
+- Proposing new features
+- Becoming a maintainer
+
+## We Develop with Github
+
+We use GitHub to host code, to track issues and feature requests, as well as accept pull requests.
+
+## We Use [Github Flow](https://guides.github.com/introduction/flow/index.html), So All Code Changes Happen Through Pull Requests
+
+Pull requests are the best way to propose changes to the codebase (we use [GitHub Flow](https://guides.github.com/introduction/flow/index.html)). We actively welcome your pull requests:
+
+1. Fork the repo and create your branch from `main`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+4. Ensure the test suite passes.
+5. Make sure your code lints.
+6. Issue that pull request!
+
+## Any contributions you make will be under the MIT Software License
+
+In short, when you submit code changes, your submissions are understood to be under the same [MIT License](LICENSE.md) that covers the project. Feel free to contact the maintainers if that's a concern.
+
+## Report bugs using Github's [issues](https://github.com/armingh2000/FactScoreLite/issues)
+
+We use GitHub issues to track public bugs. Report a bug by [opening a new issue](https://github.com/armingh2000/FactScoreLite/issues/new); it's that easy!
+
+## Write bug reports with detail, background, and sample code
+
+**Great Bug Reports** tend to have:
+
+- A quick summary and/or background
+- Steps to reproduce
+  - Be specific!
+  - Give sample code if you can.
+- What you expected would happen
+- What actually happens
+- Notes (possibly including why you think this might be happening, or stuff you tried that didn't work)
+
+## Use a Consistent Coding Style
+
+- You can autoformat code using [black](https://github.com/psf/black) for Python
+- Include comments in your code where necessary
+- Write meaningful commit messages
+- If you are creating new functions/methods, make sure you add docstrings.
+
+## License
+
+By contributing, you agree that your contributions will be licensed under its MIT License.
diff --git a/FactScoreLite/atomic_facts.py b/FactScoreLite/atomic_facts.py
@@ -45,15 +45,15 @@ def load_demons(self):
         Returns:
             list: A list of examples (demonstrations).
         """
-        with open(configs.demons_path, "r") as file:
+        with open(configs.atomic_facts_demons_path, "r") as file:
             demons = json.load(file)
 
         return demons
 
     def get_instructions(self) -> str:
         """
         Prepare instructions for the prompt generation.
-        Instructions include the examples given in the demons.json file.
+        Instructions include the examples given in the atomic_facts_demons.json file.
 
         Returns:
             str: The instructions for the prompt generation.
diff --git a/FactScoreLite/configs.py b/FactScoreLite/configs.py
@@ -4,7 +4,8 @@
 data_path = importlib.resources.files("FactScoreLite") / "data"
 
 # Path to the data file within the package
-demons_path = data_path / "demons.json"
+atomic_facts_demons_path = data_path / "atomic_facts_demons.json"
+fact_scorer_demons_path = data_path / "fact_scorer_demons.json"
 
 # OpenAI API
 max_tokens = 1024
diff --git a/FactScoreLite/data/atomic_facts_demons.json b/FactScoreLite/data/atomic_facts_demons.json
@@ -1,4 +1,3 @@
-
 [
   {
     "Sentence": "The Turbo V6 engine boasts an impressive horsepower of 450 and a peak torque of 510 lb-ft, achieved between 2,500 and 5,500 rpm, equipped with a 10-speed automatic transmission and a dual-exhaust system, enhancing both performance and sound.",
diff --git a/FactScoreLite/data/fact_scorer_demons.json b/FactScoreLite/data/fact_scorer_demons.json
@@ -0,0 +1,33 @@
+[
+    {
+      "knowledge_source": "For the optimal operation of your 2022 Honda Accord, the engine oil should be replaced with 0W-20 synthetic oil every 7,500 miles under normal driving conditions.",
+      "fact": "The 2022 Honda Accord requires an oil change every 7,500 miles.",
+      "is_supported": true,
+      "reason": "The fact is directly supported as the manual specifies the oil change interval and the type of oil to use."
+    },
+    {
+      "knowledge_source": "Tire maintenance is crucial for the longevity of your tires and vehicle handling. Rotating your tires at recommended intervals helps distribute wear evenly and extends tire life.",
+      "fact": "Tire rotation for vehicles should be done every 10,000 miles.",
+      "is_supported": false,
+      "reason": "The knowledge source suggests the importance of regular tire rotation but does not specify the 10,000 miles interval."
+    },
+    {
+      "knowledge_source": "Ensure your vehicle's compatibility with connected services. The 2021 Ford Mustang supports Apple CarPlay, enabling a seamless integration with your Apple devices.",
+      "fact": "The 2021 Ford Mustang is compatible with Android Auto.",
+      "is_supported": false,
+      "reason": "The fact is not supported as the manual only mentions compatibility with Apple CarPlay."
+    },
+    {
+      "knowledge_source": "Your vehicle's climate control system is designed to maintain the cabin temperature for comfort.",
+      "fact": "The vehicle includes a heated steering wheel.",
+      "is_supported": false,
+      "reason": "This fact is irrelevant as the manual does not mention steering wheel heating."
+    },
+    {
+      "knowledge_source": "The fuel system should only be filled with unleaded gasoline, as specified in the manual to avoid engine damage.",
+      "fact": "The vehicle can be filled with both gasoline and diesel.",
+      "is_supported": false,
+      "reason": "This fact contradicts the knowledge source which clearly states that only unleaded gasoline should be used."
+    }
+  ]
+  
diff --git a/FactScoreLite/fact_scorer.py b/FactScoreLite/fact_scorer.py
@@ -1,12 +1,53 @@
 import string
 from .openai_agent import OpenAIAgent
+from . import configs
+import json
+import random
 
 
 class FactScorer:
     def __init__(self):
+        # Examples (demonstrations) that is used in prompt generation
+        self.demons = self.load_demons()
         # To interact with OpenAI APIs
         self.openai_agent = OpenAIAgent()
 
+    def load_demons(self):
+        """
+        Load examples (demonstrations) from a JSON file.
+        This will be used in the prompt generation.
+
+        Returns:
+            list: A list of examples (demonstrations).
+        """
+        with open(configs.fact_scorer_demons_path, "r") as file:
+            demons = json.load(file)
+
+        return demons
+
+    def get_instructions(self) -> str:
+        """
+        Prepare instructions for the prompt generation.
+        Instructions include the examples given in the fact_scorer_demons.json file.
+
+        Returns:
+            str: The instructions for the prompt generation.
+        """
+
+        instructions = "Evaluate the truthfulness of the statement based solely on the provided context and provide the reason for your decision.\n\n"
+        instructions += "Instruction:\nOnly consider the statement true if it can be directly verified by the information in the context. If the information in the statement cannot be found in the context or differs from it, label it as false.\n\n"
+        true_example = self.demons[0]
+        false_example = random.choice(self.demons[1:])
+
+        for demon in [true_example, false_example]:
+            instructions += f"Context:\n{demon['knowledge_source']}\n"
+            instructions += f"Statement:\n{demon['fact']} True or False?\n"
+            instructions += f"Output:\n{demon['is_supported']}\n\n"
+            # TODO: add reason (+change parsing)
+            # instructions += f"Reason:\n{demon['reason']}\n\n"
+
+        return instructions
+
     def get_score(self, facts: list, knowledge_source: str) -> list:
         """
         Calculates the score of each atomic fact based on the knowledge source.
@@ -26,15 +67,10 @@ def get_score(self, facts: list, knowledge_source: str) -> list:
             atom = atom.strip()
 
             # Prompt that will be sent to GPT
-            prompt = "Answer the question based on the given context.\n\n"
-            prompt += f"Context:\n{knowledge_source}"
-
-            if not prompt[-1] in string.punctuation:
-                prompt += "."
-
-            prompt += "\n\n"
-
-            prompt += f"Input: {atom} True or False?\nOutput:\n"
+            prompt = self.get_instructions()
+            prompt += f"Context:\n{knowledge_source}\n"
+            prompt += f"Statement:\n{atom} True or False?\n"
+            prompt += "Output:\n"
 
             output = self.openai_agent.generate(prompt)
 
diff --git a/README.md b/README.md
@@ -108,7 +108,7 @@ from FactScoreLite import FactScorer
 scores = FactScorer.get_scores(facts, knowledge_sources)
 ```
 
-## Prompt Engineering
+## Fact Extraction Prompt Engineering
 
 To instruct GPT on how to break each sentence into facts, we have included [examples](FactScoreLite/data/demons.json) (demonstrations, i.e., demons) that is contained in the prompt. These demons are currently for the vehicle domain. However, you might want to create your own domain specific demons. To do this, you can use GPT to create demons based on your requirements. We prompted GPT with [instructions](FactScoreLite/data/demons_generation_prompt.txt) on how to generate the demons required for the vehicle domain. However, you can alter it based on your needs.
 
@@ -117,7 +117,7 @@ Once you have your own demons.json file, you can include it in the program by se
 ```python
 import FactScoreLite
 
-FactScoreLite.configs.demons_path = "/path/to/your/json/file"
+FactScoreLite.configs.atomic_facts_demons_path = "/path/to/your/json/file"
 
 # rest of your code
 ```
@@ -149,21 +149,54 @@ target_sentence
 Independent Facts:
 ```
 
-### Facts Scoring Prompt
+### Facts Scoring Prompt Engineering
 
-The prompt used for scoring facts:
+We also use [example demonstrations](/FactScoreLite/data/fact_scorer_demons.json) for scoring instructions prompt. The file contains one positive and multiple negative examples. In each prompt, the positive example in addition to a randomly selected negative prompt is added so that GPT performs better and more accurately. The file also contains reasons for each assignment; However, they are not used in the prompt generation but is a good way of improving the accuracy of GPT on scoring in the future.
+
+You can also set your own domain-specific examples for the run by running the following:
+
+```python
+import FactScoreLite
+
+FactScoreLite.configs.fact_scorer_demons_path = "/path/to/your/json/file"
+
+# rest of your code
+```
+
+### Fact Scoring Prompt
+
+The following prompt template is used to instruct GPT for scoring facts:
 
 ```
 # fact_scorer.py
 
-Answer the question based on the given context.
+Evaluate the truthfulness of the statement based solely on the provided context and provide the reason for your decision.
+
+
+Instruction:
+Only consider the statement true if it can be directly verified by the information in the context. If the information in the statement cannot be found in the context or differs from it, label it as false.
+
 
 Context:
-knowledge_source
+knw 1
+Statement:
+fact 1 True or False?
+Output:
+True
 
-Input:
-fact True or False?
+Context:
+knw 2
+Statement:
+fact 2 True or False?
 Output:
+False
+
+Context:
+target_knowledge_source
+Statement:
+target_fact True or False?
+Output:
+
 ```
 
 ## Running the Tests
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = FactScoreLite
-version = 1.0.1
+version = 1.1.0
 author = armingh2000
 author_email = 
 license = MIT
diff --git a/tests/test_atomic_facts.py b/tests/test_atomic_facts.py
@@ -24,12 +24,10 @@ def generator(monkeypatch):
 
 
 # Sample data to be returned by the mock
-mock_demons_data = {
-    "demons": [
-        {"Sentence": "Example sentence 1.", "Independent Facts": ["Fact 1", "Fact 2"]},
-        {"Sentence": "Example sentence 2.", "Independent Facts": ["Fact 3", "Fact 4"]},
-    ]
-}
+mock_demons_data = [
+    {"Sentence": "Example sentence 1.", "Independent Facts": ["Fact 1", "Fact 2"]},
+    {"Sentence": "Example sentence 2.", "Independent Facts": ["Fact 3", "Fact 4"]},
+]
 
 
 # Test for the load_demons method
@@ -39,7 +37,9 @@ def test_load_demons(generator):
     # Use patch to mock open function within the context of your test
     with patch("builtins.open", mock_open(read_data=mock_json_str)):
         # Also mock configs.demons_path to avoid dependency on external config files
-        with patch.object(configs, "demons_path", "fake/path/to/demons.json"):
+        with patch.object(
+            configs, "atomic_facts_demons_path", "fake/path/to/atomic_facts_demons.json"
+        ):
             demons = generator.load_demons()
             # Assert that the returned data matches your mock data
             assert (
diff --git a/tests/test_fact_scorer.py b/tests/test_fact_scorer.py
@@ -1,6 +1,8 @@
 import pytest
-from unittest.mock import patch
+from unittest.mock import mock_open, patch
 from FactScoreLite.fact_scorer import FactScorer
+import json
+from FactScoreLite import configs
 
 
 @pytest.fixture
@@ -82,3 +84,51 @@ def test_complex_knowledge_source_and_atomic_facts(fact_scorer, mock_openai_agen
     assert all(
         isinstance(decision, dict) for decision in result
     ), "Each item in the returned list should be a dictionary."
+
+
+# Sample data to be returned by the mock
+mock_demons_data = [
+    {
+        "knowledge_source": "knw 1",
+        "fact": "fact 1",
+        "is_supported": True,
+    },
+    {
+        "knowledge_source": "knw 2",
+        "fact": "fact 2",
+        "is_supported": False,
+    },
+]
+
+
+# Test for the load_demons method
+def test_load_demons(fact_scorer):
+    # Convert your sample data to a JSON string for mocking
+    mock_json_str = json.dumps(mock_demons_data)
+    # Use patch to mock open function within the context of your test
+    with patch("builtins.open", mock_open(read_data=mock_json_str)):
+        # Also mock configs.demons_path to avoid dependency on external config files
+        with patch.object(
+            configs, "atomic_facts_demons_path", "fake/path/to/fact_scorer_demons.json"
+        ):
+            demons = fact_scorer.load_demons()
+            # Assert that the returned data matches your mock data
+            assert (
+                demons == mock_demons_data
+            ), "The method should load and return the demons correctly."
+
+
+def test_get_instructions_true_false_demons(fact_scorer):
+    # Test case for a single demon in self.demons
+    fact_scorer.demons = mock_demons_data
+    expected_instructions = (
+        "Evaluate the truthfulness of the statement based solely on the provided context and provide the reason for your decision.\n\n"
+        "Instruction:\nOnly consider the statement true if it can be directly verified by the information in the context. If the information in the statement cannot be found in the context or differs from it, label it as false.\n\n"
+        "Context:\nknw 1\n"
+        "Statement:\nfact 1 True or False?\n"
+        "Output:\nTrue\n\n"
+        "Context:\nknw 2\n"
+        "Statement:\nfact 2 True or False?\n"
+        "Output:\nFalse\n\n"
+    )
+    assert fact_scorer.get_instructions() == expected_instructions

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`[`
`3`	`2`	`{`
`4`	`3`	`"Sentence": "The Turbo V6 engine boasts an impressive horsepower of 450 and a peak torque of 510 lb-ft, achieved between 2,500 and 5,500 rpm, equipped with a 10-speed automatic transmission and a dual-exhaust system, enhancing both performance and sound.",`