|
2 | 2 | import os
|
3 | 3 | import time
|
4 | 4 | from pathlib import Path
|
| 5 | +from typing import Any |
5 | 6 |
|
6 | 7 | import pytest
|
7 | 8 |
|
|
10 | 11 | from onyx.connectors.salesforce.connector import SalesforceConnector
|
11 | 12 |
|
12 | 13 |
|
13 |
| -def load_test_data(file_name: str = "test_salesforce_data.json") -> dict[str, dict]: |
| 14 | +def extract_key_value_pairs_to_set( |
| 15 | + list_of_unparsed_key_value_strings: list[str], |
| 16 | +) -> set[str]: |
| 17 | + set_of_key_value_pairs = set() |
| 18 | + for string_key_value_pairs in list_of_unparsed_key_value_strings: |
| 19 | + list_of_parsed_key_values = string_key_value_pairs.split("\n") |
| 20 | + for key_value_pair in list_of_parsed_key_values: |
| 21 | + set_of_key_value_pairs.add(key_value_pair.strip()) |
| 22 | + return set_of_key_value_pairs |
| 23 | + |
| 24 | + |
| 25 | +def load_test_data( |
| 26 | + file_name: str = "test_salesforce_data.json", |
| 27 | +) -> dict[str, list[str] | dict[str, Any]]: |
14 | 28 | current_dir = Path(__file__).parent
|
15 | 29 | with open(current_dir / file_name, "r") as f:
|
16 | 30 | return json.load(f)
|
@@ -51,17 +65,34 @@ def test_salesforce_connector_basic(salesforce_connector: SalesforceConnector) -
|
51 | 65 | assert len(all_docs) == 6
|
52 | 66 | assert target_test_doc is not None
|
53 | 67 |
|
54 |
| - # The order of the sections and of the content of the text fields is not deterministic, |
55 |
| - # so we check the links are present and the text isn't empty |
| 68 | + # Set of received links |
56 | 69 | received_links: set[str] = set()
|
| 70 | + # List of received text fields, which contain key-value pairs seperated by newlines |
| 71 | + recieved_text: list[str] = [] |
| 72 | + |
| 73 | + # Iterate over the sections of the target test doc to extract the links and text |
57 | 74 | for section in target_test_doc.sections:
|
58 | 75 | assert section.link
|
59 | 76 | assert section.text
|
60 | 77 | received_links.add(section.link)
|
| 78 | + recieved_text.append(section.text) |
61 | 79 |
|
| 80 | + # Check that the received links match the expected links from the test data json |
62 | 81 | expected_links = set(test_data["expected_links"])
|
63 | 82 | assert received_links == expected_links
|
64 | 83 |
|
| 84 | + # Check that the received key-value pairs from the text fields match the expected key-value pairs from the test data json |
| 85 | + expected_text = test_data["expected_text"] |
| 86 | + if not isinstance(expected_text, list): |
| 87 | + raise ValueError("Expected text is not a list") |
| 88 | + unparsed_expected_key_value_pairs: list[str] = expected_text |
| 89 | + received_key_value_pairs = extract_key_value_pairs_to_set(recieved_text) |
| 90 | + expected_key_value_pairs = extract_key_value_pairs_to_set( |
| 91 | + unparsed_expected_key_value_pairs |
| 92 | + ) |
| 93 | + assert received_key_value_pairs == expected_key_value_pairs |
| 94 | + |
| 95 | + # Check that the rest fields match the expected fields from the test data json |
65 | 96 | assert target_test_doc.source == DocumentSource.SALESFORCE
|
66 | 97 | assert target_test_doc.semantic_identifier == test_data["semantic_identifier"]
|
67 | 98 | assert target_test_doc.metadata == test_data["metadata"]
|
|
0 commit comments