Fix: Avoid concurrent dialect patching in model testing (#4266)

VaggelisD · web-flow · commit f3dd3d67605d · 2025-05-01T19:01:40.000+03:00
diff --git a/sqlmesh/core/test/definition.py b/sqlmesh/core/test/definition.py
@@ -1,10 +1,11 @@
 from __future__ import annotations
 
 import datetime
+import threading
 import typing as t
 import unittest
 from collections import Counter
-from contextlib import AbstractContextManager, nullcontext
+from contextlib import nullcontext, contextmanager, AbstractContextManager
 from itertools import chain
 from pathlib import Path
 from unittest.mock import patch
@@ -46,6 +47,8 @@
 class ModelTest(unittest.TestCase):
     __test__ = False
 
+    CONCURRENT_RENDER_LOCK = threading.Lock()
+
     def __init__(
         self,
         body: t.Dict[str, t.Any],
@@ -57,6 +60,7 @@ def __init__(
         path: Path | None = None,
         preserve_fixtures: bool = False,
         default_catalog: str | None = None,
+        concurrency: bool = False,
     ) -> None:
         """ModelTest encapsulates a unit test for a model.
 
@@ -79,6 +83,7 @@ def __init__(
         self.preserve_fixtures = preserve_fixtures
         self.default_catalog = default_catalog
         self.dialect = dialect
+        self.concurrency = concurrency
 
         self._fixture_table_cache: t.Dict[str, exp.Table] = {}
         self._normalized_column_name_cache: t.Dict[str, str] = {}
@@ -310,6 +315,7 @@ def create_test(
         path: Path | None,
         preserve_fixtures: bool = False,
         default_catalog: str | None = None,
+        concurrency: bool = False,
     ) -> t.Optional[ModelTest]:
         """Create a SqlModelTest or a PythonModelTest.
 
@@ -353,6 +359,7 @@ def create_test(
             path,
             preserve_fixtures,
             default_catalog,
+            concurrency,
         )
 
     def __str__(self) -> str:
@@ -512,10 +519,34 @@ def _normalize_column_name(self, name: str) -> str:
 
         return normalized_name
 
-    def _execute(self, query: exp.Query) -> pd.DataFrame:
+    @contextmanager
+    def _concurrent_render_context(self) -> t.Iterator[None]:
+        """
+        Context manager that ensures that the tests are executed safely in a concurrent environment.
+        This is needed in case `execution_time` is set, as we'd then have to:
+        - Freeze time through `time_machine` (not thread safe)
+        - Globally patch the SQLGlot dialect so that any date/time nodes are evaluated at the `execution_time` during generation
+        """
+        import time_machine
+
+        lock_ctx: AbstractContextManager = (
+            self.CONCURRENT_RENDER_LOCK if self.concurrency else nullcontext()
+        )
+        time_ctx: AbstractContextManager = nullcontext()
+        dialect_patch_ctx: AbstractContextManager = nullcontext()
+
+        if self._execution_time:
+            time_ctx = time_machine.travel(self._execution_time, tick=False)
+            dialect_patch_ctx = patch.dict(
+                self._test_adapter_dialect.generator_class.TRANSFORMS, self._transforms
+            )
+
+        with lock_ctx, time_ctx, dialect_patch_ctx:
+            yield
+
+    def _execute(self, query: exp.Query | str) -> pd.DataFrame:
         """Executes the given query using the testing engine adapter and returns a DataFrame."""
-        with patch.dict(self._test_adapter_dialect.generator_class.TRANSFORMS, self._transforms):
-            return self.engine_adapter.fetchdf(query)
+        return self.engine_adapter.fetchdf(query)
 
     def _create_df(
         self,
@@ -570,13 +601,25 @@ def test_ctes(self, ctes: t.Dict[str, exp.Expression], recursive: bool = False)
                 for alias, cte in ctes.items():
                     cte_query = cte_query.with_(alias, cte.this, recursive=recursive)
 
-                actual = self._execute(cte_query)
+                with self._concurrent_render_context():
+                    # Similar to the model's query, we render the CTE query under the locked context
+                    # so that the execution (fetchdf) can continue concurrently between the threads
+                    sql = cte_query.sql(
+                        self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql
+                    )
+
+                actual = self._execute(sql)
                 expected = self._create_df(values, columns=cte_query.named_selects, partial=partial)
 
                 self.assert_equal(expected, actual, sort=sort, partial=partial)
 
     def runTest(self) -> None:
-        query = self._render_model_query()
+        with self._concurrent_render_context():
+            # Render the model's query and generate the SQL under the locked context so that
+            # execution (fetchdf) can continue concurrently between the threads
+            query = self._render_model_query()
+            sql = query.sql(self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql)
+
         with_clause = query.args.get("with")
 
         if with_clause:
@@ -593,7 +636,7 @@ def runTest(self) -> None:
             partial = values.get("partial")
             sort = query.args.get("order") is None
 
-            actual = self._execute(query)
+            actual = self._execute(sql)
             expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
 
             self.assert_equal(expected, actual, sort=sort, partial=partial)
@@ -626,6 +669,7 @@ def __init__(
         path: Path | None = None,
         preserve_fixtures: bool = False,
         default_catalog: str | None = None,
+        concurrency: bool = False,
     ) -> None:
         """PythonModelTest encapsulates a unit test for a Python model.
 
@@ -651,6 +695,7 @@ def __init__(
             path,
             preserve_fixtures,
             default_catalog,
+            concurrency,
         )
 
         self.context = TestExecutionContext(
@@ -674,22 +719,13 @@ def runTest(self) -> None:
 
     def _execute_model(self) -> pd.DataFrame:
         """Executes the python model and returns a DataFrame."""
-        if self._execution_time:
-            import time_machine
-
-            time_ctx: AbstractContextManager = time_machine.travel(self._execution_time, tick=False)
-        else:
-            time_ctx = nullcontext()
+        with self._concurrent_render_context():
+            variables = self.body.get("vars", {}).copy()
+            time_kwargs = {key: variables.pop(key) for key in TIME_KWARG_KEYS if key in variables}
+            df = next(self.model.render(context=self.context, **time_kwargs, **variables))
 
-        with patch.dict(self._test_adapter_dialect.generator_class.TRANSFORMS, self._transforms):
-            with time_ctx:
-                variables = self.body.get("vars", {}).copy()
-                time_kwargs = {
-                    key: variables.pop(key) for key in TIME_KWARG_KEYS if key in variables
-                }
-                df = next(self.model.render(context=self.context, **time_kwargs, **variables))
-                assert not isinstance(df, exp.Expression)
-                return df if isinstance(df, pd.DataFrame) else df.toPandas()
+        assert not isinstance(df, exp.Expression)
+        return df if isinstance(df, pd.DataFrame) else df.toPandas()
 
 
 def generate_test(
diff --git a/sqlmesh/core/test/result.py b/sqlmesh/core/test/result.py
@@ -100,7 +100,8 @@ def log_test_report(self, test_duration: float) -> None:
         for test_case, failure in failures:
             stream.writeln(unittest.TextTestResult.separator1)
             stream.writeln(f"FAIL: {test_case}")
-            stream.writeln(f"{test_case.shortDescription()}")
+            if test_description := test_case.shortDescription():
+                stream.writeln(test_description)
             stream.writeln(unittest.TextTestResult.separator2)
             stream.writeln(failure)
 
diff --git a/sqlmesh/core/test/runner.py b/sqlmesh/core/test/runner.py
@@ -120,6 +120,9 @@ def run_tests(
         default_catalog_dialect=default_catalog_dialect,
     )
 
+    # Ensure workers are not greater than the number of tests
+    num_workers = min(len(model_test_metadata) or 1, default_test_connection.concurrent_tasks)
+
     def _run_single_test(
         metadata: ModelTestMetadata, engine_adapter: EngineAdapter
     ) -> t.Optional[ModelTextTestResult]:
@@ -132,6 +135,7 @@ def _run_single_test(
             path=metadata.path,
             default_catalog=default_catalog,
             preserve_fixtures=preserve_fixtures,
+            concurrency=num_workers > 1,
         )
 
         if not test:
@@ -159,9 +163,6 @@ def _run_single_test(
 
     test_results = []
 
-    # Ensure workers are not greater than the number of tests
-    num_workers = min(len(model_test_metadata) or 1, default_test_connection.concurrent_tasks)
-
     start_time = time.perf_counter()
     try:
         with ThreadPoolExecutor(max_workers=num_workers) as pool:
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
@@ -2370,3 +2370,103 @@ def test_number_of_tests_found(tmp_path: Path) -> None:
     # Case 3: The "new_test.yaml::test_example_full_model2" should amount to a single subtest
     results = context.test(tests=[f"{test_file}::test_example_full_model2"])
     assert len(results.successes) == 1
+
+
+def test_freeze_time_concurrent(tmp_path: Path) -> None:
+    tests_dir = tmp_path / "tests"
+    tests_dir.mkdir()
+
+    macros_dir = tmp_path / "macros"
+    macros_dir.mkdir()
+
+    macro_file = macros_dir / "test_datetime_now.py"
+    macro_file.write_text(
+        """
+from sqlglot import exp
+import datetime
+from sqlmesh.core.macros import macro
+
+@macro()
+def test_datetime_now(evaluator):
+  return exp.cast(exp.Literal.string(datetime.datetime.now(tz=datetime.timezone.utc)), exp.DataType.Type.DATE)
+  
+@macro()
+def test_sqlglot_expr(evaluator):
+  return exp.CurrentDate().sql(evaluator.dialect)
+    """
+    )
+
+    models_dir = tmp_path / "models"
+    models_dir.mkdir()
+    sql_model1 = models_dir / "sql_model1.sql"
+    sql_model1.write_text(
+        """
+        MODEL(NAME sql_model1);
+        SELECT @test_datetime_now() AS col_exec_ds_time, @test_sqlglot_expr() AS col_current_date;
+        """
+    )
+
+    for model_name in ["sql_model1", "sql_model2", "py_model"]:
+        for i in range(5):
+            test_2019 = tmp_path / "tests" / f"test_2019_{model_name}_{i}.yaml"
+            test_2019.write_text(
+                f"""
+    test_2019_{model_name}_{i}:
+      model: {model_name}
+      vars:
+        execution_time: '2019-12-01'
+      outputs:
+        query:
+          rows:
+            - col_exec_ds_time: '2019-12-01'
+              col_current_date: '2019-12-01'
+              """
+            )
+
+            test_2025 = tmp_path / "tests" / f"test_2025_{model_name}_{i}.yaml"
+            test_2025.write_text(
+                f"""
+    test_2025_{model_name}_{i}:
+      model: {model_name}
+      vars:
+        execution_time: '2025-12-01'
+      outputs:
+        query:
+          rows:
+            - col_exec_ds_time: '2025-12-01'
+              col_current_date: '2025-12-01'
+              """
+            )
+
+    ctx = Context(
+        paths=tmp_path,
+        config=Config(default_test_connection=DuckDBConnectionConfig(concurrent_tasks=8)),
+    )
+
+    @model(
+        "py_model",
+        columns={"col_exec_ds_time": "timestamp_ntz", "col_current_date": "timestamp_ntz"},
+    )
+    def execute(context, start, end, execution_time, **kwargs):
+        datetime_now_utc = datetime.datetime.now(tz=datetime.timezone.utc)
+
+        context.engine_adapter.execute(exp.select("CURRENT_DATE()"))
+        current_date = context.engine_adapter.cursor.fetchone()[0]
+
+        return pd.DataFrame(
+            [{"col_exec_ds_time": datetime_now_utc, "col_current_date": current_date}]
+        )
+
+    python_model = model.get_registry()["py_model"].model(module_path=Path("."), path=Path("."))
+    ctx.upsert_model(python_model)
+
+    ctx.upsert_model(
+        _create_model(
+            meta="MODEL(NAME sql_model2)",
+            query="SELECT @execution_ds::timestamp_ntz AS col_exec_ds_time, current_date()::date AS col_current_date",
+            default_catalog=ctx.default_catalog,
+        )
+    )
+
+    results = ctx.test()
+    assert len(results.successes) == 30