|
46 | 46 | from pathlib import Path
|
47 | 47 | from shutil import rmtree
|
48 | 48 | from types import MappingProxyType
|
| 49 | +from datetime import datetime |
49 | 50 |
|
50 | 51 | from sqlglot import Dialect, exp
|
51 | 52 | from sqlglot.helper import first
|
|
125 | 126 | format_tz_datetime,
|
126 | 127 | now_timestamp,
|
127 | 128 | now,
|
| 129 | + to_datetime, |
| 130 | + make_exclusive, |
128 | 131 | )
|
129 | 132 | from sqlmesh.utils.errors import (
|
130 | 133 | CircuitBreakerError,
|
@@ -1222,6 +1225,7 @@ def plan(
|
1222 | 1225 | diff_rendered: t.Optional[bool] = None,
|
1223 | 1226 | skip_linter: t.Optional[bool] = None,
|
1224 | 1227 | explain: t.Optional[bool] = None,
|
| 1228 | + min_intervals: t.Optional[int] = None, |
1225 | 1229 | ) -> Plan:
|
1226 | 1230 | """Interactively creates a plan.
|
1227 | 1231 |
|
@@ -1268,6 +1272,8 @@ def plan(
|
1268 | 1272 | diff_rendered: Whether the diff should compare raw vs rendered models
|
1269 | 1273 | skip_linter: Linter runs by default so this will skip it if enabled
|
1270 | 1274 | explain: Whether to explain the plan instead of applying it.
|
| 1275 | + min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered |
| 1276 | + on every model when checking for missing intervals |
1271 | 1277 |
|
1272 | 1278 | Returns:
|
1273 | 1279 | The populated Plan object.
|
@@ -1296,6 +1302,7 @@ def plan(
|
1296 | 1302 | diff_rendered=diff_rendered,
|
1297 | 1303 | skip_linter=skip_linter,
|
1298 | 1304 | explain=explain,
|
| 1305 | + min_intervals=min_intervals, |
1299 | 1306 | )
|
1300 | 1307 |
|
1301 | 1308 | plan = plan_builder.build()
|
@@ -1345,6 +1352,7 @@ def plan_builder(
|
1345 | 1352 | diff_rendered: t.Optional[bool] = None,
|
1346 | 1353 | skip_linter: t.Optional[bool] = None,
|
1347 | 1354 | explain: t.Optional[bool] = None,
|
| 1355 | + min_intervals: t.Optional[int] = None, |
1348 | 1356 | ) -> PlanBuilder:
|
1349 | 1357 | """Creates a plan builder.
|
1350 | 1358 |
|
@@ -1381,6 +1389,8 @@ def plan_builder(
|
1381 | 1389 | enable_preview: Indicates whether to enable preview for forward-only models in development environments.
|
1382 | 1390 | run: Whether to run latest intervals as part of the plan application.
|
1383 | 1391 | diff_rendered: Whether the diff should compare raw vs rendered models
|
| 1392 | + min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered |
| 1393 | + on every model when checking for missing intervals |
1384 | 1394 |
|
1385 | 1395 | Returns:
|
1386 | 1396 | The plan builder.
|
@@ -1408,6 +1418,7 @@ def plan_builder(
|
1408 | 1418 | "run": run,
|
1409 | 1419 | "diff_rendered": diff_rendered,
|
1410 | 1420 | "skip_linter": skip_linter,
|
| 1421 | + "min_intervals": min_intervals, |
1411 | 1422 | }
|
1412 | 1423 | user_provided_flags: t.Dict[str, UserProvidedFlags] = {
|
1413 | 1424 | k: v for k, v in kwargs.items() if v is not None
|
@@ -1530,6 +1541,16 @@ def plan_builder(
|
1530 | 1541 | # Refresh snapshot intervals to ensure that they are up to date with values reflected in the max_interval_end_per_model.
|
1531 | 1542 | self.state_sync.refresh_snapshot_intervals(context_diff.snapshots.values())
|
1532 | 1543 |
|
| 1544 | + start_override_per_model = self._calculate_start_override_per_model( |
| 1545 | + min_intervals, |
| 1546 | + start or default_start, |
| 1547 | + end or default_end, |
| 1548 | + execution_time or now(), |
| 1549 | + backfill_models, |
| 1550 | + snapshots, |
| 1551 | + max_interval_end_per_model, |
| 1552 | + ) |
| 1553 | + |
1533 | 1554 | return self.PLAN_BUILDER_TYPE(
|
1534 | 1555 | context_diff=context_diff,
|
1535 | 1556 | start=start,
|
@@ -1560,7 +1581,8 @@ def plan_builder(
|
1560 | 1581 | ),
|
1561 | 1582 | end_bounded=not run,
|
1562 | 1583 | ensure_finalized_snapshots=self.config.plan.use_finalized_state,
|
1563 |
| - interval_end_per_model=max_interval_end_per_model, |
| 1584 | + start_override_per_model=start_override_per_model, |
| 1585 | + end_override_per_model=max_interval_end_per_model, |
1564 | 1586 | console=self.console,
|
1565 | 1587 | user_provided_flags=user_provided_flags,
|
1566 | 1588 | explain=explain or False,
|
@@ -2850,15 +2872,15 @@ def _plan_preview_enabled(self) -> bool:
|
2850 | 2872 | def _get_plan_default_start_end(
|
2851 | 2873 | self,
|
2852 | 2874 | snapshots: t.Dict[str, Snapshot],
|
2853 |
| - max_interval_end_per_model: t.Dict[str, int], |
| 2875 | + max_interval_end_per_model: t.Dict[str, datetime], |
2854 | 2876 | backfill_models: t.Optional[t.Set[str]],
|
2855 | 2877 | modified_model_names: t.Set[str],
|
2856 | 2878 | execution_time: t.Optional[TimeLike] = None,
|
2857 | 2879 | ) -> t.Tuple[t.Optional[int], t.Optional[int]]:
|
2858 | 2880 | if not max_interval_end_per_model:
|
2859 | 2881 | return None, None
|
2860 | 2882 |
|
2861 |
| - default_end = max(max_interval_end_per_model.values()) |
| 2883 | + default_end = to_timestamp(max(max_interval_end_per_model.values())) |
2862 | 2884 | default_start: t.Optional[int] = None
|
2863 | 2885 | # Infer the default start by finding the smallest interval start that corresponds to the default end.
|
2864 | 2886 | for model_name in backfill_models or modified_model_names or max_interval_end_per_model:
|
@@ -2887,19 +2909,101 @@ def _get_plan_default_start_end(
|
2887 | 2909 |
|
2888 | 2910 | return default_start, default_end
|
2889 | 2911 |
|
| 2912 | + def _calculate_start_override_per_model( |
| 2913 | + self, |
| 2914 | + min_intervals: t.Optional[int], |
| 2915 | + plan_start: t.Optional[TimeLike], |
| 2916 | + plan_end: t.Optional[TimeLike], |
| 2917 | + plan_execution_time: TimeLike, |
| 2918 | + backfill_model_fqns: t.Optional[t.Set[str]], |
| 2919 | + snapshots_by_model_fqn: t.Dict[str, Snapshot], |
| 2920 | + end_override_per_model: t.Optional[t.Dict[str, datetime]], |
| 2921 | + ) -> t.Dict[str, datetime]: |
| 2922 | + if not min_intervals or not backfill_model_fqns or not plan_start: |
| 2923 | + # If there are no models to backfill, there are no intervals to consider for backfill, so we dont need to consider a minimum number |
| 2924 | + # If the plan doesnt have a start date, all intervals are considered already so we dont need to consider a minimum number |
| 2925 | + # If we dont have a minimum number of intervals to consider, then we dont need to adjust the start date on a per-model basis |
| 2926 | + return {} |
| 2927 | + |
| 2928 | + start_overrides: t.Dict[str, datetime] = {} |
| 2929 | + end_override_per_model = end_override_per_model or {} |
| 2930 | + |
| 2931 | + plan_execution_time_dt = to_datetime(plan_execution_time) |
| 2932 | + plan_start_dt = to_datetime(plan_start, relative_base=plan_execution_time_dt) |
| 2933 | + plan_end_dt = to_datetime( |
| 2934 | + plan_end or plan_execution_time_dt, relative_base=plan_execution_time_dt |
| 2935 | + ) |
| 2936 | + |
| 2937 | + # we need to take the DAG into account so that parent models can be expanded to cover at least as much as their children |
| 2938 | + # for example, A(hourly) <- B(daily) |
| 2939 | + # if min_intervals=1, A would have 1 hour and B would have 1 day |
| 2940 | + # but B depends on A so in order for B to have 1 valid day, A needs to be expanded to 24 hours |
| 2941 | + backfill_dag: DAG[str] = DAG() |
| 2942 | + for fqn in backfill_model_fqns: |
| 2943 | + backfill_dag.add( |
| 2944 | + fqn, |
| 2945 | + [ |
| 2946 | + p.name |
| 2947 | + for p in snapshots_by_model_fqn[fqn].parents |
| 2948 | + if p.name in backfill_model_fqns |
| 2949 | + ], |
| 2950 | + ) |
| 2951 | + |
| 2952 | + # start from the leaf nodes and work back towards the root because the min_start at the root node is determined by the calculated starts in the leaf nodes |
| 2953 | + reversed_dag = backfill_dag.reversed |
| 2954 | + graph = reversed_dag.graph |
| 2955 | + |
| 2956 | + for model_fqn in reversed_dag: |
| 2957 | + # Get the earliest start from all immediate children of this snapshot |
| 2958 | + # this works because topological ordering guarantees that they've already been visited |
| 2959 | + # and we always set a start override |
| 2960 | + min_child_start = min( |
| 2961 | + [start_overrides[immediate_child_fqn] for immediate_child_fqn in graph[model_fqn]], |
| 2962 | + default=plan_start_dt, |
| 2963 | + ) |
| 2964 | + |
| 2965 | + snapshot = snapshots_by_model_fqn.get(model_fqn) |
| 2966 | + |
| 2967 | + if not snapshot: |
| 2968 | + continue |
| 2969 | + |
| 2970 | + starting_point = end_override_per_model.get(model_fqn, plan_end_dt) |
| 2971 | + if node_end := snapshot.node.end: |
| 2972 | + # if we dont do this, if the node end is a *date* (as opposed to a timestamp) |
| 2973 | + # we end up incorrectly winding back an extra day |
| 2974 | + node_end_dt = make_exclusive(node_end) |
| 2975 | + |
| 2976 | + if node_end_dt < plan_end_dt: |
| 2977 | + # if the model has an end date that has already elapsed, use that as a starting point for calculating min_intervals |
| 2978 | + # instead of the plan end. If we use the plan end, we will return intervals in the future which are invalid |
| 2979 | + starting_point = node_end_dt |
| 2980 | + |
| 2981 | + snapshot_start = snapshot.node.cron_floor(starting_point) |
| 2982 | + |
| 2983 | + for _ in range(min_intervals): |
| 2984 | + # wind back the starting point by :min_intervals intervals to arrive at the minimum snapshot start date |
| 2985 | + snapshot_start = snapshot.node.cron_prev(snapshot_start) |
| 2986 | + |
| 2987 | + start_overrides[model_fqn] = min(min_child_start, snapshot_start) |
| 2988 | + |
| 2989 | + return start_overrides |
| 2990 | + |
2890 | 2991 | def _get_max_interval_end_per_model(
|
2891 | 2992 | self, snapshots: t.Dict[str, Snapshot], backfill_models: t.Optional[t.Set[str]]
|
2892 |
| - ) -> t.Dict[str, int]: |
| 2993 | + ) -> t.Dict[str, datetime]: |
2893 | 2994 | models_for_interval_end = (
|
2894 | 2995 | self._get_models_for_interval_end(snapshots, backfill_models)
|
2895 | 2996 | if backfill_models is not None
|
2896 | 2997 | else None
|
2897 | 2998 | )
|
2898 |
| - return self.state_sync.max_interval_end_per_model( |
2899 |
| - c.PROD, |
2900 |
| - models=models_for_interval_end, |
2901 |
| - ensure_finalized_snapshots=self.config.plan.use_finalized_state, |
2902 |
| - ) |
| 2999 | + return { |
| 3000 | + model_fqn: to_datetime(ts) |
| 3001 | + for model_fqn, ts in self.state_sync.max_interval_end_per_model( |
| 3002 | + c.PROD, |
| 3003 | + models=models_for_interval_end, |
| 3004 | + ensure_finalized_snapshots=self.config.plan.use_finalized_state, |
| 3005 | + ).items() |
| 3006 | + } |
2903 | 3007 |
|
2904 | 3008 | @staticmethod
|
2905 | 3009 | def _get_models_for_interval_end(
|
|
0 commit comments