From 282d70c59934b1e115c24d362bd131b1fbc64439 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Sat, 19 Apr 2025 18:28:46 -0400 Subject: [PATCH 01/20] Add preliminary support for ISO-8601 timestamps (no timezones at the moment) --- src/borg/helpers/time.py | 97 ++++++++++++++++++++++++++++++++++++++++ src/borg/manifest.py | 9 +++- 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 5e2e85ee6d..4907ef0f6f 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -185,3 +185,100 @@ def isoformat(self): def archive_ts_now(): """return tz-aware datetime obj for current time for usage as archive timestamp""" return datetime.now(timezone.utc) # utc time / utc timezone + +class DatePatternError(ValueError): + """Raised when a date: archive pattern cannot be parsed.""" + + +def local(dt: datetime) -> datetime: + """Attach the system local timezone to naive dt without converting.""" + if dt.tzinfo is None: + dt = dt.replace(tzinfo=datetime.now().astimezone().tzinfo) + return dt + + +def exact_predicate(dt: datetime): + """Return predicate matching archives whose ts equals dt (UTC).""" + dt_utc = local(dt).astimezone(timezone.utc) + return lambda ts: ts == dt_utc + + +def interval_predicate(start: datetime, end: datetime): + start_utc = local(start).astimezone(timezone.utc) + end_utc = local(end).astimezone(timezone.utc) + return lambda ts: start_utc <= ts < end_utc + + +def compile_date_pattern(expr: str): + """ + Turn a date: expression into a predicate ts->bool. + Supports: + 1) Full ISO‑8601 timestamps with minute (and optional seconds/fraction) + 2) Hour-only: YYYY‑MM‑DDTHH -> interval of 1 hour + 3) Minute-only: YYYY‑MM‑DDTHH:MM -> interval of 1 minute + 4) YYYY, YYYY‑MM, YYYY‑MM‑DD -> day/month/year intervals + 5) Unix epoch (@123456789) -> exact match + Naive inputs are assumed local, then converted into UTC. + TODO: verify working for fractional seconds; add timezone support. + """ + expr = expr.strip() + + # 1) Full timestamp (with fraction) + full_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+") + if full_re.match(expr): + dt = parse_local_timestamp(expr, tzinfo=timezone.utc) + return exact_predicate(dt) # no interval, since we have a fractional timestamp + + # 2) Seconds-only + second_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$") + if second_re.match(expr): + start = parse_local_timestamp(expr, tzinfo=timezone.utc) + return interval_predicate(start, start + timedelta(seconds=1)) + + # 3) Minute-only + minute_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$") + if minute_re.match(expr): + start = parse_local_timestamp(expr + ":00", tzinfo=timezone.utc) + return interval_predicate(start, start + timedelta(minutes=1)) + + # 4) Hour-only + hour_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}$") + if hour_re.match(expr): + start = parse_local_timestamp(expr + ":00:00", tzinfo=timezone.utc) + return interval_predicate(start, start + timedelta(hours=1)) + + + # Unix epoch (@123456789) - Note: We don't support fractional seconds here, since Unix epochs are almost always whole numbers. + if expr.startswith("@"): + try: + epoch = int(expr[1:]) + except ValueError: + raise DatePatternError(f"invalid epoch: {expr!r}") + start = datetime.fromtimestamp(epoch, tz=timezone.utc) + return interval_predicate(start, start + timedelta(seconds=1)) # match within the second + + # Year/Year-month/Year-month-day + parts = expr.split("-") + try: + if len(parts) == 1: # YYYY + year = int(parts[0]) + start = datetime(year, 1, 1) + end = datetime(year + 1, 1, 1) + + elif len(parts) == 2: # YYYY‑MM + year, month = map(int, parts) + start = datetime(year, month, 1) + end = offset_n_months(start, 1) + + elif len(parts) == 3: # YYYY‑MM‑DD + year, month, day = map(int, parts) + start = datetime(year, month, day) + end = start + timedelta(days=1) + + else: + raise DatePatternError(f"unrecognised date: {expr!r}") + + except ValueError as e: + raise DatePatternError(str(e)) from None + + return interval_predicate(start, end) diff --git a/src/borg/manifest.py b/src/borg/manifest.py index 608bfcaab4..d928b70c4a 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -14,7 +14,7 @@ from .constants import * # NOQA from .helpers.datastruct import StableDict from .helpers.parseformat import bin_to_hex, hex_to_bin -from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now +from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now, compile_date_pattern, DatePatternError from .helpers.errors import Error, CommandError from .item import ArchiveItem from .patterns import get_regex_from_pattern @@ -198,6 +198,13 @@ def _matching_info_tuples(self, match_patterns, match_end, *, deleted=False): elif match.startswith("host:"): wanted_host = match.removeprefix("host:") archive_infos = [x for x in archive_infos if x.host == wanted_host] + elif match.startswith("date:"): + wanted_date = match.removeprefix("date:") + try: + pred = compile_date_pattern(wanted_date) + except DatePatternError as e: + raise CommandError(f"Invalid date pattern: {match} ({e})") + archive_infos = [x for x in archive_infos if pred(x.ts)] else: # do a match on the name match = match.removeprefix("name:") # accept optional name: prefix regex = get_regex_from_pattern(match) From db46cdb4318d3c9a29a1f57487686c680069c7d1 Mon Sep 17 00:00:00 2001 From: Gabe De Almeida Date: Sun, 20 Apr 2025 21:49:56 -0400 Subject: [PATCH 02/20] reformatted to pass style checks --- src/borg/helpers/time.py | 20 ++++++++++---------- src/borg/manifest.py | 8 +++++++- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 4907ef0f6f..dfedc92f96 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -186,6 +186,7 @@ def archive_ts_now(): """return tz-aware datetime obj for current time for usage as archive timestamp""" return datetime.now(timezone.utc) # utc time / utc timezone + class DatePatternError(ValueError): """Raised when a date: archive pattern cannot be parsed.""" @@ -205,7 +206,7 @@ def exact_predicate(dt: datetime): def interval_predicate(start: datetime, end: datetime): start_utc = local(start).astimezone(timezone.utc) - end_utc = local(end).astimezone(timezone.utc) + end_utc = local(end).astimezone(timezone.utc) return lambda ts: start_utc <= ts < end_utc @@ -227,7 +228,7 @@ def compile_date_pattern(expr: str): full_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+") if full_re.match(expr): dt = parse_local_timestamp(expr, tzinfo=timezone.utc) - return exact_predicate(dt) # no interval, since we have a fractional timestamp + return exact_predicate(dt) # no interval, since we have a fractional timestamp # 2) Seconds-only second_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$") @@ -247,7 +248,6 @@ def compile_date_pattern(expr: str): start = parse_local_timestamp(expr + ":00:00", tzinfo=timezone.utc) return interval_predicate(start, start + timedelta(hours=1)) - # Unix epoch (@123456789) - Note: We don't support fractional seconds here, since Unix epochs are almost always whole numbers. if expr.startswith("@"): try: @@ -255,25 +255,25 @@ def compile_date_pattern(expr: str): except ValueError: raise DatePatternError(f"invalid epoch: {expr!r}") start = datetime.fromtimestamp(epoch, tz=timezone.utc) - return interval_predicate(start, start + timedelta(seconds=1)) # match within the second + return interval_predicate(start, start + timedelta(seconds=1)) # match within the second # Year/Year-month/Year-month-day parts = expr.split("-") try: - if len(parts) == 1: # YYYY + if len(parts) == 1: # YYYY year = int(parts[0]) start = datetime(year, 1, 1) - end = datetime(year + 1, 1, 1) + end = datetime(year + 1, 1, 1) - elif len(parts) == 2: # YYYY‑MM + elif len(parts) == 2: # YYYY‑MM year, month = map(int, parts) start = datetime(year, month, 1) - end = offset_n_months(start, 1) + end = offset_n_months(start, 1) - elif len(parts) == 3: # YYYY‑MM‑DD + elif len(parts) == 3: # YYYY‑MM‑DD year, month, day = map(int, parts) start = datetime(year, month, day) - end = start + timedelta(days=1) + end = start + timedelta(days=1) else: raise DatePatternError(f"unrecognised date: {expr!r}") diff --git a/src/borg/manifest.py b/src/borg/manifest.py index d928b70c4a..d9ee7288bf 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -14,7 +14,13 @@ from .constants import * # NOQA from .helpers.datastruct import StableDict from .helpers.parseformat import bin_to_hex, hex_to_bin -from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now, compile_date_pattern, DatePatternError +from .helpers.time import ( + parse_timestamp, + calculate_relative_offset, + archive_ts_now, + compile_date_pattern, + DatePatternError, +) from .helpers.errors import Error, CommandError from .item import ArchiveItem from .patterns import get_regex_from_pattern From 4363bf789d2a2c23eae6d3a052ec9b60f4406df3 Mon Sep 17 00:00:00 2001 From: Gabe De Almeida Date: Sun, 20 Apr 2025 22:36:55 -0400 Subject: [PATCH 03/20] Applied recommended changes from ThomasWald, still working as intended. Working on testing now --- src/borg/helpers/time.py | 127 ++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 63 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index dfedc92f96..f4087b674f 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -212,73 +212,74 @@ def interval_predicate(start: datetime, end: datetime): def compile_date_pattern(expr: str): """ - Turn a date: expression into a predicate ts->bool. - Supports: - 1) Full ISO‑8601 timestamps with minute (and optional seconds/fraction) - 2) Hour-only: YYYY‑MM‑DDTHH -> interval of 1 hour - 3) Minute-only: YYYY‑MM‑DDTHH:MM -> interval of 1 minute - 4) YYYY, YYYY‑MM, YYYY‑MM‑DD -> day/month/year intervals - 5) Unix epoch (@123456789) -> exact match - Naive inputs are assumed local, then converted into UTC. - TODO: verify working for fractional seconds; add timezone support. + Accepts any of: + YYYY + YYYY-MM + YYYY-MM-DD + YYYY-MM-DDTHH + YYYY-MM-DDTHH:MM + YYYY-MM-DDTHH:MM:SS + and returns a predicate that is True for timestamps in that interval. """ expr = expr.strip() - - # 1) Full timestamp (with fraction) - full_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+") - if full_re.match(expr): - dt = parse_local_timestamp(expr, tzinfo=timezone.utc) - return exact_predicate(dt) # no interval, since we have a fractional timestamp - - # 2) Seconds-only - second_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$") - if second_re.match(expr): - start = parse_local_timestamp(expr, tzinfo=timezone.utc) + pattern = r""" + ^ + (?: + (?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+) # full timestamp with fraction + | (?P \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}) # no fraction + | (?P \d{4}-\d{2}-\d{2}T\d{2}:\d{2}) # minute precision + | (?P \d{4}-\d{2}-\d{2}T\d{2}) # hour precision + | (?P \d{4}-\d{2}-\d{2}) # day precision + | (?P \d{4}-\d{2}) # month precision + | (?P \d{4}) # year precision + | @(?P\d+) # unix epoch + ) + $ + """ + m = re.match(pattern, expr, re.VERBOSE) + if not m: + raise DatePatternError(f"unrecognised date: {expr!r}") + + gd = m.groupdict() + # 1) fractional‐second exact match + if gd["fraction"]: + dt = parse_local_timestamp(gd["fraction"], tzinfo=timezone.utc) + return exact_predicate(dt) + # 2) second‐precision interval + if gd["second"]: + start = parse_local_timestamp(gd["second"], tzinfo=timezone.utc) return interval_predicate(start, start + timedelta(seconds=1)) - - # 3) Minute-only - minute_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$") - if minute_re.match(expr): - start = parse_local_timestamp(expr + ":00", tzinfo=timezone.utc) + # 3) minute‐precision interval + if gd["minute"]: + start = parse_local_timestamp(gd["minute"] + ":00", tzinfo=timezone.utc) return interval_predicate(start, start + timedelta(minutes=1)) - - # 4) Hour-only - hour_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}$") - if hour_re.match(expr): - start = parse_local_timestamp(expr + ":00:00", tzinfo=timezone.utc) + # 4) hour‐precision interval + if gd["hour"]: + start = parse_local_timestamp(gd["hour"] + ":00:00", tzinfo=timezone.utc) return interval_predicate(start, start + timedelta(hours=1)) - - # Unix epoch (@123456789) - Note: We don't support fractional seconds here, since Unix epochs are almost always whole numbers. - if expr.startswith("@"): - try: - epoch = int(expr[1:]) - except ValueError: - raise DatePatternError(f"invalid epoch: {expr!r}") + # 5a) day‐precision interval + if gd["day"]: + y, mo, d = map(int, gd["day"].split("-")) + start = datetime(y, mo, d) + end = start + timedelta(days=1) + return interval_predicate(start, end) + # 5b) month‐precision interval + if gd["month"]: + y, mo = map(int, gd["month"].split("-")) + start = datetime(y, mo, 1) + end = offset_n_months(start, 1) + return interval_predicate(start, end) + # 5c) year‐precision interval + if gd["year"]: + y = int(gd["year"]) + start = datetime(y, 1, 1) + end = datetime(y + 1, 1, 1) + return interval_predicate(start, end) + # 6) unix‐epoch exact‐second match + if gd["epoch"]: + epoch = int(gd["epoch"]) start = datetime.fromtimestamp(epoch, tz=timezone.utc) - return interval_predicate(start, start + timedelta(seconds=1)) # match within the second - - # Year/Year-month/Year-month-day - parts = expr.split("-") - try: - if len(parts) == 1: # YYYY - year = int(parts[0]) - start = datetime(year, 1, 1) - end = datetime(year + 1, 1, 1) - - elif len(parts) == 2: # YYYY‑MM - year, month = map(int, parts) - start = datetime(year, month, 1) - end = offset_n_months(start, 1) - - elif len(parts) == 3: # YYYY‑MM‑DD - year, month, day = map(int, parts) - start = datetime(year, month, day) - end = start + timedelta(days=1) - - else: - raise DatePatternError(f"unrecognised date: {expr!r}") - - except ValueError as e: - raise DatePatternError(str(e)) from None + return interval_predicate(start, start + timedelta(seconds=1)) - return interval_predicate(start, end) + # should never get here + raise DatePatternError(f"unrecognised date: {expr!r}") From 69e8608ae47e0bda15093ef11099eb54f87b4271 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Sun, 20 Apr 2025 23:54:42 -0400 Subject: [PATCH 04/20] fix bug with local timezone attachment not correctly respecting DST --- src/borg/helpers/time.py | 8 ++++---- src/borg/testsuite/archiver/match_archives_date_test.py | 0 2 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 src/borg/testsuite/archiver/match_archives_date_test.py diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 4907ef0f6f..392da2aa9d 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -191,22 +191,22 @@ class DatePatternError(ValueError): def local(dt: datetime) -> datetime: - """Attach the system local timezone to naive dt without converting.""" + """Interpret naive dt as local time, attach timezone info from the local tz.""" if dt.tzinfo is None: - dt = dt.replace(tzinfo=datetime.now().astimezone().tzinfo) + dt = dt.astimezone() return dt def exact_predicate(dt: datetime): """Return predicate matching archives whose ts equals dt (UTC).""" dt_utc = local(dt).astimezone(timezone.utc) - return lambda ts: ts == dt_utc + return lambda ts: ts.astimezone(timezone.utc) == dt_utc def interval_predicate(start: datetime, end: datetime): start_utc = local(start).astimezone(timezone.utc) end_utc = local(end).astimezone(timezone.utc) - return lambda ts: start_utc <= ts < end_utc + return lambda ts: start_utc <= ts.astimezone(timezone.utc) < end_utc def compile_date_pattern(expr: str): diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py new file mode 100644 index 0000000000..e69de29bb2 From 5c20d8f5e68ff4d1c9e56d0e7d30f8676ffc5b4e Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Tue, 22 Apr 2025 01:39:09 -0400 Subject: [PATCH 05/20] Reformatted for consistency with code style guide --- src/borg/helpers/time.py | 24 +++++++++++++----------- src/borg/manifest.py | 8 +++++++- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 392da2aa9d..1c3359690a 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -186,6 +186,7 @@ def archive_ts_now(): """return tz-aware datetime obj for current time for usage as archive timestamp""" return datetime.now(timezone.utc) # utc time / utc timezone + class DatePatternError(ValueError): """Raised when a date: archive pattern cannot be parsed.""" @@ -205,7 +206,7 @@ def exact_predicate(dt: datetime): def interval_predicate(start: datetime, end: datetime): start_utc = local(start).astimezone(timezone.utc) - end_utc = local(end).astimezone(timezone.utc) + end_utc = local(end).astimezone(timezone.utc) return lambda ts: start_utc <= ts.astimezone(timezone.utc) < end_utc @@ -227,7 +228,7 @@ def compile_date_pattern(expr: str): full_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+") if full_re.match(expr): dt = parse_local_timestamp(expr, tzinfo=timezone.utc) - return exact_predicate(dt) # no interval, since we have a fractional timestamp + return exact_predicate(dt) # no interval, since we have a fractional timestamp # 2) Seconds-only second_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$") @@ -247,33 +248,34 @@ def compile_date_pattern(expr: str): start = parse_local_timestamp(expr + ":00:00", tzinfo=timezone.utc) return interval_predicate(start, start + timedelta(hours=1)) - - # Unix epoch (@123456789) - Note: We don't support fractional seconds here, since Unix epochs are almost always whole numbers. + # Unix epoch (@123456789) - Note: We don't support fractional seconds here, + # since Unix epochs are almost always whole numbers. if expr.startswith("@"): try: epoch = int(expr[1:]) except ValueError: raise DatePatternError(f"invalid epoch: {expr!r}") start = datetime.fromtimestamp(epoch, tz=timezone.utc) - return interval_predicate(start, start + timedelta(seconds=1)) # match within the second + # match within the second + return interval_predicate(start, start + timedelta(seconds=1)) # Year/Year-month/Year-month-day parts = expr.split("-") try: - if len(parts) == 1: # YYYY + if len(parts) == 1: # YYYY year = int(parts[0]) start = datetime(year, 1, 1) - end = datetime(year + 1, 1, 1) + end = datetime(year + 1, 1, 1) - elif len(parts) == 2: # YYYY‑MM + elif len(parts) == 2: # YYYY‑MM year, month = map(int, parts) start = datetime(year, month, 1) - end = offset_n_months(start, 1) + end = offset_n_months(start, 1) - elif len(parts) == 3: # YYYY‑MM‑DD + elif len(parts) == 3: # YYYY‑MM‑DD year, month, day = map(int, parts) start = datetime(year, month, day) - end = start + timedelta(days=1) + end = start + timedelta(days=1) else: raise DatePatternError(f"unrecognised date: {expr!r}") diff --git a/src/borg/manifest.py b/src/borg/manifest.py index d928b70c4a..d9ee7288bf 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -14,7 +14,13 @@ from .constants import * # NOQA from .helpers.datastruct import StableDict from .helpers.parseformat import bin_to_hex, hex_to_bin -from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now, compile_date_pattern, DatePatternError +from .helpers.time import ( + parse_timestamp, + calculate_relative_offset, + archive_ts_now, + compile_date_pattern, + DatePatternError, +) from .helpers.errors import Error, CommandError from .item import ArchiveItem from .patterns import get_regex_from_pattern From 6f1bcd408d2db701b3f9e93058834345aa00a138 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Tue, 22 Apr 2025 01:40:51 -0400 Subject: [PATCH 06/20] Added basic test suite for ISO-8601 and Unix timestamp matching --- .../archiver/match_archives_date_test.py | 145 ++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py index e69de29bb2..46d43eabee 100644 --- a/src/borg/testsuite/archiver/match_archives_date_test.py +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -0,0 +1,145 @@ +from datetime import datetime, timezone + +from ...constants import * # NOQA +from . import cmd, create_src_archive, generate_archiver_tests, RK_ENCRYPTION + +pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA + + +# (archive_name, timestamp) +YEAR_ARCHIVES = [ + ("archive-year-start", "2025-01-01T00:00:00"), + ("archive-year-same", "2025-12-31T23:59:59"), + ("archive-year-diff", "2024-12-31T23:59:59"), +] + +MONTH_ARCHIVES = [ + ("archive-mon-start", "2025-02-01T00:00:00"), + ("archive-mon-same", "2025-02-28T23:59:59"), + ("archive-mon-diff", "2025-01-31T23:59:59"), +] + +HOUR_ARCHIVES = [ + ("archive-hour-start", "2025-01-01T14:00:00"), + ("archive-hour-same", "2025-01-01T14:59:59"), + ("archive-hour-diff", "2025-01-01T13:59:59"), +] + +MINUTE_ARCHIVES = [ + ("archive-min-start", "2025-01-01T13:31:00"), + ("archive-min-same", "2025-01-01T13:31:59"), + ("archive-min-diff", "2025-01-01T13:30:59"), +] + +SECOND_ARCHIVES = [ + ("archive-sec-target", "2025-01-01T13:30:45"), + ("archive-sec-before", "2025-01-01T13:30:44"), + ("archive-sec-after", "2025-01-01T13:30:46"), +] + + +def test_match_archives_year(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in YEAR_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # older‐year should only hit the 2024 filter + out_2024 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2024", exit_code=0) + assert "archive-year-diff" in out_2024 + assert "archive-year-start" not in out_2024 + assert "archive-year-same" not in out_2024 + + # 2025 filter should hit both minimum and maximum possible days in 2025 + out_2025 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025", exit_code=0) + assert "archive-year-start" in out_2025 + assert "archive-year-same" in out_2025 + assert "archive-year-diff" not in out_2025 + + +def test_match_archives_month(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in MONTH_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # January only includes January + out_jan = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01", exit_code=0) + assert "archive-mon-diff" in out_jan + assert "archive-mon-start" not in out_jan + assert "archive-mon-same" not in out_jan + + # February includes minimum and maximum possible days in February + out_feb = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-02", exit_code=0) + assert "archive-mon-start" in out_feb + assert "archive-mon-same" in out_feb + assert "archive-mon-diff" not in out_feb + + +def test_match_archives_hour(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in HOUR_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 13:00‐range only matches 13:00 hour + out_13 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13", exit_code=0) + assert "archive-hour-diff" in out_13 + assert "archive-hour-start" not in out_13 + assert "archive-hour-same" not in out_13 + + # 14:00‐range matches both beginning and end of the hour + out_14 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T14", exit_code=0) + assert "archive-hour-start" in out_14 + assert "archive-hour-same" in out_14 + assert "archive-hour-diff" not in out_14 + + +def test_match_archives_minute(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in MINUTE_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 13:30 only matches 13:30 minute + out_1330 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13:30", exit_code=0) + assert "archive-min-diff" in out_1330 + assert "archive-min-start" not in out_1330 + assert "archive-min-same" not in out_1330 + + # 13:31 matches both beginning and end of the minute + out_1331 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13:31", exit_code=0) + assert "archive-min-start" in out_1331 + assert "archive-min-same" in out_1331 + assert "archive-min-diff" not in out_1331 + + +def test_match_archives_second(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in SECOND_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # exact‐second match only + out_exact = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T13:30:45", exit_code=0) + assert "archive-sec-target" in out_exact + assert "archive-sec-before" not in out_exact + assert "archive-sec-after" not in out_exact + + +def test_unix_timestamps(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + create_src_archive(archiver, "archive-sec-before", ts="2025-01-01T13:30:44") + create_src_archive(archiver, "archive-sec-target", ts="2025-01-01T13:30:45") + create_src_archive(archiver, "archive-sec-after", ts="2025-01-01T13:30:46") + # localize the datetime, since the archive creation time will be local + dt_target = datetime.fromisoformat("2025-01-01T13:30:45").astimezone() + + utc_ts_target = int(dt_target.astimezone(timezone.utc).timestamp()) + + output = cmd(archiver, "repo-list", "-v", f"--match-archives=date:@{utc_ts_target}", exit_code=0) + + assert "archive-sec-target" in output + assert "archive-sec-before" not in output + assert "archive-sec-after" not in output From e9a8c5f48cd7dadc10fa5d40fef6c8b1cb0fb708 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Tue, 22 Apr 2025 02:52:54 -0400 Subject: [PATCH 07/20] add day-precision filter test for `date:YYYY-MM-DD` --- .../archiver/match_archives_date_test.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py index 46d43eabee..75a6d62bcc 100644 --- a/src/borg/testsuite/archiver/match_archives_date_test.py +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -19,6 +19,12 @@ ("archive-mon-diff", "2025-01-31T23:59:59"), ] +DAY_ARCHIVES = [ + ("archive-day-start", "2025-01-02T00:00:00"), + ("archive-day-same", "2025-01-02T23:59:59"), + ("archive-day-diff", "2025-01-01T23:59:59"), +] + HOUR_ARCHIVES = [ ("archive-hour-start", "2025-01-01T14:00:00"), ("archive-hour-same", "2025-01-01T14:59:59"), @@ -76,6 +82,25 @@ def test_match_archives_month(archivers, request): assert "archive-mon-diff" not in out_feb +def test_match_archives_day(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in DAY_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 2025-01-01 only includes 2025-01-01 + out_01 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01", exit_code=0) + assert "archive-day-diff" in out_01 + assert "archive-day-start" not in out_01 + assert "archive-day-same" not in out_01 + + # 2025-01-02 includes minimum and maximum possible times in 2025-01-02 + out_02 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-02", exit_code=0) + assert "archive-day-start" in out_02 + assert "archive-day-same" in out_02 + assert "archive-day-diff" not in out_02 + + def test_match_archives_hour(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) From 470758d860263ccc97a737b50533bbc8df849b2d Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Tue, 22 Apr 2025 04:56:23 -0400 Subject: [PATCH 08/20] support timezone suffixes in date: patterns and add tests --- src/borg/helpers/time.py | 96 +++++++++++++------ .../archiver/match_archives_date_test.py | 93 ++++++++++++++++++ 2 files changed, 162 insertions(+), 27 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 65e6b73e64..30087af788 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -1,6 +1,7 @@ import os import re from datetime import datetime, timezone, timedelta +from zoneinfo import ZoneInfo def parse_timestamp(timestamp, tzinfo=timezone.utc): @@ -191,25 +192,48 @@ class DatePatternError(ValueError): """Raised when a date: archive pattern cannot be parsed.""" -def local(dt: datetime) -> datetime: - """Interpret naive dt as local time, attach timezone info from the local tz.""" - if dt.tzinfo is None: - dt = dt.astimezone() - return dt - - def exact_predicate(dt: datetime): """Return predicate matching archives whose ts equals dt (UTC).""" - dt_utc = local(dt).astimezone(timezone.utc) + dt_utc = dt.astimezone(timezone.utc) return lambda ts: ts.astimezone(timezone.utc) == dt_utc def interval_predicate(start: datetime, end: datetime): - start_utc = local(start).astimezone(timezone.utc) - end_utc = local(end).astimezone(timezone.utc) + start_utc = start.astimezone(timezone.utc) + end_utc = end.astimezone(timezone.utc) return lambda ts: start_utc <= ts.astimezone(timezone.utc) < end_utc +def parse_tz(tzstr: str): + """ + Parses a UTC offset like +08:00 or [Region/Name] into a timezone object. + """ + if not tzstr: + return None + if tzstr == "Z": + return timezone.utc + if tzstr[0] in "+-": + sign = 1 if tzstr[0] == "+" else -1 + try: + hh, mm = map(int, tzstr[1:].split(":")) + if not (0 <= mm < 60): + raise ValueError + except Exception: + raise DatePatternError("invalid UTC offset format") + # we do it this way so that, for example, -8:30 is + # -8 hours and -30 minutes, not -8 hours and +30 minutes + total_minutes = sign * (hh * 60 + mm) + # enforce ISO-8601 bounds (-12:00 to +14:00) + if not (-12 * 60 <= total_minutes <= 14 * 60): + raise DatePatternError("UTC offset outside ISO-8601 bounds") + return timezone(timedelta(minutes=total_minutes)) + # [Region/Name] + try: + return ZoneInfo(tzstr.strip("[]")) + except Exception: + raise DatePatternError("invalid timezone format") + + def compile_date_pattern(expr: str): """ Accepts any of: @@ -219,7 +243,9 @@ def compile_date_pattern(expr: str): YYYY-MM-DDTHH YYYY-MM-DDTHH:MM YYYY-MM-DDTHH:MM:SS - and returns a predicate that is True for timestamps in that interval. + Unix epoch (@123456789) + …with an optional trailing timezone (Z or ±HH:MM or [Region/City]). + Returns a predicate that is True for timestamps in that interval. """ expr = expr.strip() pattern = r""" @@ -234,6 +260,7 @@ def compile_date_pattern(expr: str): | (?P \d{4}) # year precision | @(?P\d+) # unix epoch ) + (?PZ|[+\-]\d{2}:\d{2}|\[[^\]]+\])? # optional timezone or [Region/City] $ """ m = re.match(pattern, expr, re.VERBOSE) @@ -241,40 +268,55 @@ def compile_date_pattern(expr: str): raise DatePatternError(f"unrecognised date: {expr!r}") gd = m.groupdict() + tz = parse_tz(gd.get("tz")) # will be None if tzstr is empty -> local timezone + + # unix epoch and user-specified timezone are mutually exclusive + if gd["epoch"] and tz is not None: + raise DatePatternError("unix‐epoch patterns (@123456789) are UTC and must not include a timezone suffix") + # 1) fractional‐second exact match if gd["fraction"]: - dt = parse_local_timestamp(gd["fraction"], tzinfo=timezone.utc) + ts = gd["fraction"] + dt = parse_timestamp(ts, tzinfo=tz) return exact_predicate(dt) + # 2) second‐precision interval if gd["second"]: - start = parse_local_timestamp(gd["second"], tzinfo=timezone.utc) + ts = gd["second"] + start = parse_timestamp(ts, tzinfo=tz) + # within one second return interval_predicate(start, start + timedelta(seconds=1)) + # 3) minute‐precision interval if gd["minute"]: - start = parse_local_timestamp(gd["minute"] + ":00", tzinfo=timezone.utc) + ts = gd["minute"] + ":00" + start = parse_timestamp(ts, tzinfo=tz) return interval_predicate(start, start + timedelta(minutes=1)) + # 4) hour‐precision interval if gd["hour"]: - start = parse_local_timestamp(gd["hour"] + ":00:00", tzinfo=timezone.utc) + ts = gd["hour"] + ":00:00" + start = parse_timestamp(ts, tzinfo=tz) return interval_predicate(start, start + timedelta(hours=1)) + # 5a) day‐precision interval if gd["day"]: - y, mo, d = map(int, gd["day"].split("-")) - start = datetime(y, mo, d) - end = start + timedelta(days=1) - return interval_predicate(start, end) + ts = gd["day"] + "T00:00:00" + start = parse_timestamp(ts, tzinfo=tz) + return interval_predicate(start, start + timedelta(days=1)) + # 5b) month‐precision interval if gd["month"]: - y, mo = map(int, gd["month"].split("-")) - start = datetime(y, mo, 1) - end = offset_n_months(start, 1) - return interval_predicate(start, end) + ts = gd["month"] + "-01T00:00:00" + start = parse_timestamp(ts, tzinfo=tz) + return interval_predicate(start, offset_n_months(start, 1)) + # 5c) year‐precision interval if gd["year"]: - y = int(gd["year"]) - start = datetime(y, 1, 1) - end = datetime(y + 1, 1, 1) - return interval_predicate(start, end) + ts = gd["year"] + "-01-01T00:00:00" + start = parse_timestamp(ts, tzinfo=tz) + return interval_predicate(start, offset_n_months(start, 12)) + # 6) unix‐epoch exact‐second match if gd["epoch"]: epoch = int(gd["epoch"]) diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py index 75a6d62bcc..e598c8f475 100644 --- a/src/borg/testsuite/archiver/match_archives_date_test.py +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -1,7 +1,9 @@ +import pytest from datetime import datetime, timezone from ...constants import * # NOQA from . import cmd, create_src_archive, generate_archiver_tests, RK_ENCRYPTION +from ...helpers.errors import CommandError pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA @@ -168,3 +170,94 @@ def test_unix_timestamps(archivers, request): assert "archive-sec-target" in output assert "archive-sec-before" not in output assert "archive-sec-after" not in output + + +TIMEZONE_ARCHIVES = [("archive-la", "2025-01-01T12:01:00-08:00"), ("archive-utc", "2025-01-02T12:01:00+00:00")] + + +@pytest.mark.parametrize("timezone_variant", ["2025-01-01T12:01:00-08:00", "2025-01-01T12:01:00[America/Los_Angeles]"]) +def test_match_la_equivalents(archivers, request, timezone_variant): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in TIMEZONE_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + output = cmd(archiver, "repo-list", "-v", f"--match-archives=date:{timezone_variant}", exit_code=0) + assert "archive-la" in output + assert "archive-utc" not in output + + +@pytest.mark.parametrize( + "timezone_variant", ["2025-01-02T12:01:00+00:00", "2025-01-02T12:01:00Z", "2025-01-02T12:01:00[Etc/UTC]"] +) +def test_match_utc_equivalents(archivers, request, timezone_variant): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in TIMEZONE_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + output = cmd(archiver, "repo-list", "-v", f"--match-archives=date:{timezone_variant}", exit_code=0) + assert "archive-utc" in output + assert "archive-la" not in output + + +HOUR_TZ_ARCHIVES = [ + ("archive-hour-diff", "2025-01-01T09:59:00Z"), + ("archive-hour-start", "2025-01-01T10:00:00Z"), + ("archive-hour-same", "2025-01-01T10:59:59Z"), +] + + +def test_match_hour_from_different_tz(archivers, request): + """ + Test that the date filter works for hours with archives created in a different timezone. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in HOUR_TZ_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # We're filtering “local 11:00” in +01:00 zone, which is 10:00–10:59:59 UTC + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T11+01:00", exit_code=0) + assert "archive-hour-start" in out + assert "archive-hour-same" in out + assert "archive-hour-diff" not in out + + +def test_match_day_from_different_tz(archivers, request): + """ + Test that the date filter works for days with archives created in a different timezone. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + # Local 2025‑03‑02T00:30:00+02:00 → UTC 2025‑03‑01T22:30:00Z + create_src_archive(archiver, "archive-utc-bound", ts="2025-03-02T00:30:00+02:00") + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-03-01[Etc/UTC]", exit_code=0) + assert "archive-utc-bound" in out + + +@pytest.mark.parametrize( + "invalid_expr", + [ + "2025-01-01T00:00:00+14:01", # beyond +14:00 (ISO 8601 boundary) + "2025-01-01T00:00:00-12:01", # beyond -12:00 (ISO 8601 boundary) + "2025-01-01T00:00:00+09:99", # invalid minutes + "2025-01-01T00:00:00[garbage]", # invalid region + "2025-01-01T00:00:00[Not/AZone]", # structured but nonexistent + ], +) +def test_invalid_timezones_rejected(archivers, request, invalid_expr): + """ + Test that invalid timezone expressions are rejected. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + with pytest.raises(CommandError) as excinfo: + cmd(archiver, "repo-list", "-v", f"--match-archives=date:{invalid_expr}") + + msg = str(excinfo.value) + assert "Invalid date pattern" in msg + assert invalid_expr in msg From df2d33d72b14656a50e8f86ee7d830549e586160 Mon Sep 17 00:00:00 2001 From: Gabe De Almeida Date: Wed, 23 Apr 2025 01:42:30 -0400 Subject: [PATCH 09/20] Wildcard working. Done some manual testing, will focus on more rigorous testing later today --- src/borg/helpers/time.py | 115 ++++++++++++++++++++++++++------------- testfile.txt | 1 + 2 files changed, 77 insertions(+), 39 deletions(-) create mode 100644 testfile.txt diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 30087af788..c4acb25cb4 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -245,6 +245,10 @@ def compile_date_pattern(expr: str): YYYY-MM-DDTHH:MM:SS Unix epoch (@123456789) …with an optional trailing timezone (Z or ±HH:MM or [Region/City]). + Additionally supports wildcards (`*`) in year, month, or day (or any combination), e.g.: + "*-04-22" # April 22 of any year + "2025-*-01" # 1st day of any month in 2025 + "*-*-15" # 15th of every month, any year Returns a predicate that is True for timestamps in that interval. """ expr = expr.strip() @@ -252,15 +256,20 @@ def compile_date_pattern(expr: str): ^ (?: (?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+) # full timestamp with fraction - | (?P \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}) # no fraction - | (?P \d{4}-\d{2}-\d{2}T\d{2}:\d{2}) # minute precision - | (?P \d{4}-\d{2}-\d{2}T\d{2}) # hour precision - | (?P \d{4}-\d{2}-\d{2}) # day precision - | (?P \d{4}-\d{2}) # month precision - | (?P \d{4}) # year precision - | @(?P\d+) # unix epoch + | (?P \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}) # no fraction + | (?P \d{4}-\d{2}-\d{2}T\d{2}:\d{2}) # minute precision + | (?P \d{4}-\d{2}-\d{2}T\d{2}) # hour precision + | (?P \d{4}-\d{2}-\d{2}) # day precision + | (?P \d{4}-\d{2}) # month precision + | (?P \d{4}) # year precision + | @(?P \d+) # unix epoch + | (?P + (?:\d{4}|\*) # year or * + (?:-(?:\d{2}|\*)){0,2} # optional month/day or wildcards + (?:T(?:\d{2}|\*)(?::(?:\d{2}|\*)){0,2})? # optional time with wildcards + ) ) - (?PZ|[+\-]\d{2}:\d{2}|\[[^\]]+\])? # optional timezone or [Region/City] + (?PZ|[+\-]\d{2}:\d{2}|\[[^\]]+\])? # optional timezone or [Region/City] $ """ m = re.match(pattern, expr, re.VERBOSE) @@ -268,60 +277,88 @@ def compile_date_pattern(expr: str): raise DatePatternError(f"unrecognised date: {expr!r}") gd = m.groupdict() - tz = parse_tz(gd.get("tz")) # will be None if tzstr is empty -> local timezone - - # unix epoch and user-specified timezone are mutually exclusive - if gd["epoch"] and tz is not None: - raise DatePatternError("unix‐epoch patterns (@123456789) are UTC and must not include a timezone suffix") - - # 1) fractional‐second exact match + tz = parse_tz(gd.get("tz")) # None => local timezone + + # Wildcard branch: match each specified component + if gd["wild"]: + part = gd["wild"] + date_part, *time_rest = part.split('T', 1) + time_part = time_rest[0] if time_rest else '' + + dfields = date_part.split('-') + y_pat = dfields[0] + m_pat = dfields[1] if len(dfields) > 1 else '*' + d_pat = dfields[2] if len(dfields) > 2 else '*' + + tfields = time_part.split(':') if time_part else [] + h_pat = tfields[0] if len(tfields) > 0 else '*' + M_pat = tfields[1] if len(tfields) > 1 else '*' + S_pat = tfields[2] if len(tfields) > 2 else '*' + + to_int = lambda p: None if p == '*' else int(p) + to_float = lambda p: None if p == '*' else float(p) + + yi = to_int(y_pat) + mi = to_int(m_pat) + di = to_int(d_pat) + hi = to_int(h_pat) + ni = to_int(M_pat) + si = to_float(S_pat) + + def wildcard_pred(ts: datetime): + dt = ts.astimezone(timezone.utc) + if yi is not None and dt.year != yi: return False + if mi is not None and dt.month != mi: return False + if di is not None and dt.day != di: return False + if hi is not None and dt.hour != hi: return False + if ni is not None and dt.minute != ni: return False + if si is not None: + sec = dt.second + dt.microsecond/1e6 + if not (si <= sec < si + 1): return False + return True + + return wildcard_pred + + # 1) fractional-second exact match if gd["fraction"]: - ts = gd["fraction"] - dt = parse_timestamp(ts, tzinfo=tz) + dt = parse_timestamp(gd["fraction"], tzinfo=tz) return exact_predicate(dt) - # 2) second‐precision interval + # 2) second-precision interval if gd["second"]: - ts = gd["second"] - start = parse_timestamp(ts, tzinfo=tz) - # within one second + start = parse_timestamp(gd["second"], tzinfo=tz) return interval_predicate(start, start + timedelta(seconds=1)) - # 3) minute‐precision interval + # 3) minute-precision interval if gd["minute"]: - ts = gd["minute"] + ":00" - start = parse_timestamp(ts, tzinfo=tz) + start = parse_timestamp(gd["minute"] + ":00", tzinfo=tz) return interval_predicate(start, start + timedelta(minutes=1)) - # 4) hour‐precision interval + # 4) hour-precision interval if gd["hour"]: - ts = gd["hour"] + ":00:00" - start = parse_timestamp(ts, tzinfo=tz) + start = parse_timestamp(gd["hour"] + ":00:00", tzinfo=tz) return interval_predicate(start, start + timedelta(hours=1)) - # 5a) day‐precision interval + # 5a) day-precision interval if gd["day"]: - ts = gd["day"] + "T00:00:00" - start = parse_timestamp(ts, tzinfo=tz) + start = parse_timestamp(gd["day"] + "T00:00:00", tzinfo=tz) return interval_predicate(start, start + timedelta(days=1)) - # 5b) month‐precision interval + # 5b) month-precision interval if gd["month"]: - ts = gd["month"] + "-01T00:00:00" - start = parse_timestamp(ts, tzinfo=tz) + start = parse_timestamp(gd["month"] + "-01T00:00:00", tzinfo=tz) return interval_predicate(start, offset_n_months(start, 1)) - # 5c) year‐precision interval + # 5c) year-precision interval if gd["year"]: - ts = gd["year"] + "-01-01T00:00:00" - start = parse_timestamp(ts, tzinfo=tz) + start = parse_timestamp(gd["year"] + "-01-01T00:00:00", tzinfo=tz) return interval_predicate(start, offset_n_months(start, 12)) - # 6) unix‐epoch exact‐second match + # 6) unix-epoch exact-second match if gd["epoch"]: epoch = int(gd["epoch"]) start = datetime.fromtimestamp(epoch, tz=timezone.utc) return interval_predicate(start, start + timedelta(seconds=1)) - # should never get here - raise DatePatternError(f"unrecognised date: {expr!r}") + # unreachable + raise DatePatternError(f"unrecognised date: {expr!r}") \ No newline at end of file diff --git a/testfile.txt b/testfile.txt new file mode 100644 index 0000000000..e1d60b712d --- /dev/null +++ b/testfile.txt @@ -0,0 +1 @@ +Hello from Borg! From 870bf7a133c21a804299b7f026ed24e605733e03 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Thu, 24 Apr 2025 23:09:30 -0400 Subject: [PATCH 10/20] add tests for wildcard support in date: archive match patterns; reformat to pass style checks --- src/borg/helpers/time.py | 49 ++++---- .../archiver/match_archives_date_test.py | 111 ++++++++++++++++++ 2 files changed, 140 insertions(+), 20 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index c4acb25cb4..4c4401a7e2 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -245,7 +245,7 @@ def compile_date_pattern(expr: str): YYYY-MM-DDTHH:MM:SS Unix epoch (@123456789) …with an optional trailing timezone (Z or ±HH:MM or [Region/City]). - Additionally supports wildcards (`*`) in year, month, or day (or any combination), e.g.: + Additionally supports wildcards (`*`) in year, month, or day (or any combination), e.g.: "*-04-22" # April 22 of any year "2025-*-01" # 1st day of any month in 2025 "*-*-15" # 15th of every month, any year @@ -282,21 +282,24 @@ def compile_date_pattern(expr: str): # Wildcard branch: match each specified component if gd["wild"]: part = gd["wild"] - date_part, *time_rest = part.split('T', 1) - time_part = time_rest[0] if time_rest else '' + date_part, *time_rest = part.split("T", 1) + time_part = time_rest[0] if time_rest else "" - dfields = date_part.split('-') + dfields = date_part.split("-") y_pat = dfields[0] - m_pat = dfields[1] if len(dfields) > 1 else '*' - d_pat = dfields[2] if len(dfields) > 2 else '*' + m_pat = dfields[1] if len(dfields) > 1 else "*" + d_pat = dfields[2] if len(dfields) > 2 else "*" - tfields = time_part.split(':') if time_part else [] - h_pat = tfields[0] if len(tfields) > 0 else '*' - M_pat = tfields[1] if len(tfields) > 1 else '*' - S_pat = tfields[2] if len(tfields) > 2 else '*' + tfields = time_part.split(":") if time_part else [] + h_pat = tfields[0] if len(tfields) > 0 else "*" + M_pat = tfields[1] if len(tfields) > 1 else "*" + S_pat = tfields[2] if len(tfields) > 2 else "*" - to_int = lambda p: None if p == '*' else int(p) - to_float = lambda p: None if p == '*' else float(p) + def to_int(p): + return None if p == "*" else int(p) + + def to_float(p): + return None if p == "*" else float(p) yi = to_int(y_pat) mi = to_int(m_pat) @@ -307,14 +310,20 @@ def compile_date_pattern(expr: str): def wildcard_pred(ts: datetime): dt = ts.astimezone(timezone.utc) - if yi is not None and dt.year != yi: return False - if mi is not None and dt.month != mi: return False - if di is not None and dt.day != di: return False - if hi is not None and dt.hour != hi: return False - if ni is not None and dt.minute != ni: return False + if yi is not None and dt.year != yi: + return False + if mi is not None and dt.month != mi: + return False + if di is not None and dt.day != di: + return False + if hi is not None and dt.hour != hi: + return False + if ni is not None and dt.minute != ni: + return False if si is not None: - sec = dt.second + dt.microsecond/1e6 - if not (si <= sec < si + 1): return False + sec = dt.second + dt.microsecond / 1e6 + if not (si <= sec < si + 1): + return False return True return wildcard_pred @@ -361,4 +370,4 @@ def wildcard_pred(ts: datetime): return interval_predicate(start, start + timedelta(seconds=1)) # unreachable - raise DatePatternError(f"unrecognised date: {expr!r}") \ No newline at end of file + raise DatePatternError(f"unrecognised date: {expr!r}") diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py index e598c8f475..e859a8e98f 100644 --- a/src/borg/testsuite/archiver/match_archives_date_test.py +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -261,3 +261,114 @@ def test_invalid_timezones_rejected(archivers, request, invalid_expr): msg = str(excinfo.value) assert "Invalid date pattern" in msg assert invalid_expr in msg + + +WILDCARD_DAY_ARCHIVES = [ + ("wd-jan12", "2025-01-12T00:00:00"), + ("wd-feb12", "2025-02-12T23:59:59"), + ("wd-jan13", "2025-01-13T00:00:00"), +] + + +# Day-only wildcard: *-*-12 +def test_match_wildcard_specific_day(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in WILDCARD_DAY_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:*-*-12", exit_code=0) + assert "wd-jan12" in out + assert "wd-feb12" in out + assert "wd-jan13" not in out + + +WILDCARD_MONTH_ARCHIVES = [ + ("wm-apr1", "2025-04-01T00:00:00"), + ("wm-apr30", "2025-04-30T23:59:59"), + ("wm-mar31", "2025-03-31T23:59:59"), +] + + +# Month-only wildcard: *-04 +def test_match_wildcard_every_april(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in WILDCARD_MONTH_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:*-04", exit_code=0) + assert "wm-apr1" in out + assert "wm-apr30" in out + assert "wm-mar31" not in out + + +WILDCARD_MINUTE_ARCHIVES = [ + ("w-min-a", "2025-01-01T12:10:00"), + ("w-min-b", "2025-01-01T12:59:00"), + ("w-min-c", "2025-01-01T12:10:01"), # should not match +] + + +# Time-of-day wildcard (minute‐level): 2025-01-01T12:*:00 +def test_match_wildcard_any_minute_at_second_zero(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in WILDCARD_MINUTE_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T12:*:00", exit_code=0) + assert "w-min-a" in out + assert "w-min-b" in out + assert "w-min-c" not in out + + +# Wildcard plus timezone: day in America/Detroit +WILDCARD_TZ_ARCHIVES = [ + # UTC 2025-04-12T03:59:59Z -> local EDT = 2025-04-11T23:59:59 (before - should not match) + ("w-tz-before", "2025-04-12T03:59:59Z"), + # UTC 2025-04-12T04:00:00Z -> local EDT = 2025-04-12T00:00:00 (start - should match) + ("w-tz-start", "2025-04-12T04:00:00Z"), + # UTC 2025-04-12T16:30:00Z -> local EDT = 2025-04-12T12:30:00 (halfway - should match) + ("w-tz-mid", "2025-04-12T16:30:00Z"), + # UTC 2025-04-13T03:59:59Z -> local EDT = 2025-04-12T23:59:59 (inclusive end - should still match) + ("w-tz-same", "2025-04-13T03:59:59Z"), + # UTC 2025-04-13T04:00:00Z -> local EDT = 2025-04-13T00:00:00 (after) + ("w-tz-after", "2025-04-13T04:00:00Z"), +] + + +def test_match_wildcard_day_with_tz(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in WILDCARD_TZ_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-04-12T*:*:*[America/Detroit]", exit_code=0) + # only the three in the EDT-local-Apr-12 window with second=0 should match + assert "w-tz-start" in out + assert "w-tz-mid" in out + assert "w-tz-same" in out + assert "w-tz-before" not in out + assert "w-tz-after" not in out + + +WILDCARD_MIXED_ARCHIVES = [ + ("wmix-hit1", "2025-01-01T12:00:00"), # matches: 01-01 12:00 + ("wmix-hit2", "2025-01-01T12:59:59"), # matches: 01-01 12:* + ("wmix-miss1", "2025-01-01T13:00:00"), # wrong hour + ("wmix-miss2", "2025-01-02T12:00:00"), # wrong day +] + + +def test_match_wildcard_mixed_day_and_hour(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + for name, ts in WILDCARD_MIXED_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:*-01-01T12:*", exit_code=0) + assert "wmix-hit1" in out + assert "wmix-hit2" in out + assert "wmix-miss1" not in out + assert "wmix-miss2" not in out From 461df75afd18693d794072f27924a803422b2f28 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Thu, 24 Apr 2025 23:16:16 -0400 Subject: [PATCH 11/20] fix bug with wildcards in date: match patterns not respecting supplied timezones; added mutual exclusion enforcement to wildcard+epoch --- src/borg/helpers/time.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 4c4401a7e2..6feac1ae38 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -281,6 +281,8 @@ def compile_date_pattern(expr: str): # Wildcard branch: match each specified component if gd["wild"]: + if gd["epoch"]: + raise DatePatternError("wildcards and epoch cannot be used together") part = gd["wild"] date_part, *time_rest = part.split("T", 1) time_part = time_rest[0] if time_rest else "" @@ -309,7 +311,7 @@ def to_float(p): si = to_float(S_pat) def wildcard_pred(ts: datetime): - dt = ts.astimezone(timezone.utc) + dt = ts.astimezone(tz) if yi is not None and dt.year != yi: return False if mi is not None and dt.month != mi: From 9553c35fe2a3182304f10d72341c7e38c7df0bff Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Thu, 24 Apr 2025 23:28:48 -0400 Subject: [PATCH 12/20] remove stray testfile.txt --- testfile.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 testfile.txt diff --git a/testfile.txt b/testfile.txt deleted file mode 100644 index e1d60b712d..0000000000 --- a/testfile.txt +++ /dev/null @@ -1 +0,0 @@ -Hello from Borg! From 409733b4561c086ba456c4ec12bf2704501f5124 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Thu, 24 Apr 2025 23:59:10 -0400 Subject: [PATCH 13/20] refactor date: pattern parser to use structured bottom-up regex, per maintainer recommendation (all tests passing) --- src/borg/helpers/time.py | 183 ++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 91 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 6feac1ae38..d8e7b02a77 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -246,130 +246,131 @@ def compile_date_pattern(expr: str): Unix epoch (@123456789) …with an optional trailing timezone (Z or ±HH:MM or [Region/City]). Additionally supports wildcards (`*`) in year, month, or day (or any combination), e.g.: - "*-04-22" # April 22 of any year + "*-04-22" # April 22 of any year "2025-*-01" # 1st day of any month in 2025 "*-*-15" # 15th of every month, any year Returns a predicate that is True for timestamps in that interval. """ expr = expr.strip() pattern = r""" - ^ - (?: - (?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+) # full timestamp with fraction - | (?P \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}) # no fraction - | (?P \d{4}-\d{2}-\d{2}T\d{2}:\d{2}) # minute precision - | (?P \d{4}-\d{2}-\d{2}T\d{2}) # hour precision - | (?P \d{4}-\d{2}-\d{2}) # day precision - | (?P \d{4}-\d{2}) # month precision - | (?P \d{4}) # year precision - | @(?P \d+) # unix epoch - | (?P - (?:\d{4}|\*) # year or * - (?:-(?:\d{2}|\*)){0,2} # optional month/day or wildcards - (?:T(?:\d{2}|\*)(?::(?:\d{2}|\*)){0,2})? # optional time with wildcards - ) - ) - (?PZ|[+\-]\d{2}:\d{2}|\[[^\]]+\])? # optional timezone or [Region/City] - $ + ^ + (?: + @(?P\d+) # unix epoch + | (?P \d{4}|\*) # year (YYYY or *) + (?:-(?P \d{2}|\*) # month (MM or *) + (?:-(?P \d{2}|\*) # day (DD or *) + (?:[T ](?P \d{2}|\*) # hour (HH or *) + (?::(?P\d{2}|\*) # minute (MM or *) + (?::(?P\d{2}(?:\.\d+)?|\*))? # second (SS or SS.fff or *) + )? + )? + )? + )? + ) + (?PZ|[+\-]\d\d:\d\d|\[[^\]]+\])? # optional timezone suffix + $ """ m = re.match(pattern, expr, re.VERBOSE) if not m: raise DatePatternError(f"unrecognised date: {expr!r}") gd = m.groupdict() - tz = parse_tz(gd.get("tz")) # None => local timezone - - # Wildcard branch: match each specified component - if gd["wild"]: - if gd["epoch"]: - raise DatePatternError("wildcards and epoch cannot be used together") - part = gd["wild"] - date_part, *time_rest = part.split("T", 1) - time_part = time_rest[0] if time_rest else "" - - dfields = date_part.split("-") - y_pat = dfields[0] - m_pat = dfields[1] if len(dfields) > 1 else "*" - d_pat = dfields[2] if len(dfields) > 2 else "*" - - tfields = time_part.split(":") if time_part else [] - h_pat = tfields[0] if len(tfields) > 0 else "*" - M_pat = tfields[1] if len(tfields) > 1 else "*" - S_pat = tfields[2] if len(tfields) > 2 else "*" - - def to_int(p): - return None if p == "*" else int(p) - - def to_float(p): - return None if p == "*" else float(p) - - yi = to_int(y_pat) - mi = to_int(m_pat) - di = to_int(d_pat) - hi = to_int(h_pat) - ni = to_int(M_pat) - si = to_float(S_pat) - - def wildcard_pred(ts: datetime): + tz = parse_tz(gd["tz"]) + + # 1) epoch is checked first because it is syntactically and semantically incompatible + # with other datetime components (e.g., year/month/day/wildcards). + if gd["epoch"]: + e = int(gd["epoch"]) + start = datetime.fromtimestamp(e, tz=timezone.utc) + return interval_predicate(start, start + timedelta(seconds=1)) + + # 2) detect explicit wildcards (*) in any named group + wildcard_fields = ("year", "month", "day", "hour", "minute", "second") + if any(gd[f] == "*" for f in wildcard_fields if f in gd): + # build a discrete‐match predicate + yi = None if gd["year"] == "*" else int(gd["year"]) + mi = None if gd["month"] == "*" else int(gd["month"]) if gd["month"] else None + di = None if gd["day"] == "*" else int(gd["day"]) if gd["day"] else None + hi = None if gd["hour"] == "*" else int(gd["hour"]) if gd["hour"] else None + ni = None if gd["minute"] == "*" else int(gd["minute"]) if gd["minute"] else None + si = None + if gd["second"]: + if gd["second"] != "*": + si = float(gd["second"]) + + def wildcard_pred(ts): dt = ts.astimezone(tz) - if yi is not None and dt.year != yi: - return False - if mi is not None and dt.month != mi: - return False - if di is not None and dt.day != di: - return False - if hi is not None and dt.hour != hi: - return False - if ni is not None and dt.minute != ni: - return False - if si is not None: - sec = dt.second + dt.microsecond / 1e6 - if not (si <= sec < si + 1): - return False - return True + return ( + (yi is None or dt.year == yi) + and (mi is None or dt.month == mi) + and (di is None or dt.day == di) + and (hi is None or dt.hour == hi) + and (ni is None or dt.minute == ni) + and (si is None or (si <= dt.second + dt.microsecond / 1e6 < si + 1)) + ) return wildcard_pred - # 1) fractional-second exact match - if gd["fraction"]: - dt = parse_timestamp(gd["fraction"], tzinfo=tz) - return exact_predicate(dt) + # 3) fraction‐precision exact match + if gd["second"] and "." in gd["second"]: + start = datetime( + int(gd["year"]), + int(gd["month"]), + int(gd["day"]), + int(gd["hour"]), + int(gd["minute"]), + int(float(gd["second"])), + tzinfo=tz, + ) + return exact_predicate(start) - # 2) second-precision interval + # 4) second‐precision interval if gd["second"]: - start = parse_timestamp(gd["second"], tzinfo=tz) + start = datetime( + int(gd["year"]), + int(gd["month"]), + int(gd["day"]), + int(gd["hour"] or 0), + int(gd["minute"] or 0), + int(gd["second"]), + tzinfo=tz, + ) return interval_predicate(start, start + timedelta(seconds=1)) - # 3) minute-precision interval + # 5) minute‐precision if gd["minute"]: - start = parse_timestamp(gd["minute"] + ":00", tzinfo=tz) + start = datetime( + int(gd["year"]), + int(gd["month"]), + int(gd["day"]), + int(gd["hour"] or 0), + int(gd["minute"]), + second=0, + tzinfo=tz, + ) return interval_predicate(start, start + timedelta(minutes=1)) - # 4) hour-precision interval + # 6) hour‐precision if gd["hour"]: - start = parse_timestamp(gd["hour"] + ":00:00", tzinfo=tz) + start = datetime( + int(gd["year"]), int(gd["month"]), int(gd["day"]), int(gd["hour"]), minute=0, second=0, tzinfo=tz + ) return interval_predicate(start, start + timedelta(hours=1)) - # 5a) day-precision interval + # 7) day‐precision if gd["day"]: - start = parse_timestamp(gd["day"] + "T00:00:00", tzinfo=tz) + start = datetime(int(gd["year"]), int(gd["month"]), int(gd["day"]), hour=0, minute=0, second=0, tzinfo=tz) return interval_predicate(start, start + timedelta(days=1)) - # 5b) month-precision interval + # 8) month‐precision if gd["month"]: - start = parse_timestamp(gd["month"] + "-01T00:00:00", tzinfo=tz) + start = datetime(int(gd["year"]), int(gd["month"]), day=1, hour=0, minute=0, second=0, tzinfo=tz) return interval_predicate(start, offset_n_months(start, 1)) - # 5c) year-precision interval + # 9) year‐precision if gd["year"]: - start = parse_timestamp(gd["year"] + "-01-01T00:00:00", tzinfo=tz) + start = datetime(int(gd["year"]), month=1, day=1, hour=0, minute=0, second=0, tzinfo=tz) return interval_predicate(start, offset_n_months(start, 12)) - # 6) unix-epoch exact-second match - if gd["epoch"]: - epoch = int(gd["epoch"]) - start = datetime.fromtimestamp(epoch, tz=timezone.utc) - return interval_predicate(start, start + timedelta(seconds=1)) - - # unreachable + # fallback raise DatePatternError(f"unrecognised date: {expr!r}") From de038065d2788116b5031b7ba12f9d1969d6a962 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Fri, 25 Apr 2025 01:06:31 -0400 Subject: [PATCH 14/20] refactor date: pattern parsing to use helper functions for datetime construction and interval parsing --- src/borg/helpers/time.py | 182 ++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 88 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index d8e7b02a77..fb3b0feade 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -234,6 +234,92 @@ def parse_tz(tzstr: str): raise DatePatternError("invalid timezone format") +def _build_datetime_from_groups(gd: dict, tz: timezone) -> datetime: + """ + Construct a datetime from partial ISO groups, filling missing fields with + the earliest valid value, and attaching tzinfo. + """ + year = int(gd["year"]) + month = int(gd.get("month") or 1) + day = int(gd.get("day") or 1) + hour = int(gd.get("hour") or 0) + minute = int(gd.get("minute") or 0) + # handle fractional seconds + microsecond = 0 + second = 0 + sec_str = gd.get("second") + if sec_str: + if "." in sec_str: + whole, frac = sec_str.split(".", 1) + second = int(whole) + # pad or trim frac to microseconds + microsecond = int(float(f"0.{frac}") * 1_000_000) + else: + second = int(sec_str) + return datetime(year, month, day, hour, minute, second, microsecond, tzinfo=tz) + + +pattern = r""" + ^ + (?: + @(?P\d+) # unix epoch + | (?P \d{4}|\*) # year (YYYY or *) + (?:-(?P \d{2}|\*) # month (MM or *) + (?:-(?P \d{2}|\*) # day (DD or *) + (?:[T ](?P \d{2}|\*) # hour (HH or *) + (?::(?P\d{2}|\*) # minute (MM or *) + (?::(?P\d{2}(?:\.\d+)?|\*))? # second (SS or SS.fff or *) + )? + )? + )? + )? + ) + (?PZ|[+\-]\d\d:\d\d|\[[^\]]+\])? # optional timezone suffix + $ +""" + + +def _parse_to_interval(expr: str) -> tuple[datetime, datetime]: + """ + Parse a possibly incomplete ISO-8601 timestamp (with optional timezone) into + a start and end datetime representing the full interval. + """ + # note: we match the same pattern that supports wildcards, but at the point this function is called, + # we know that the pattern contains no wildcards. This is to allow us to reuse the same regex. + m = re.match(pattern, expr, re.VERBOSE) + if not m: + raise DatePatternError(f"unrecognised date: {expr!r}") + gd = m.groupdict() + # handle unix-epoch forms directly + if gd["epoch"]: + epoch = int(gd["epoch"]) + start = datetime.fromtimestamp(epoch, tz=timezone.utc) + end = start + timedelta(seconds=1) + return start, end + + tz = parse_tz(gd["tz"]) + # build the start moment + start = _build_datetime_from_groups(gd, tz) + # determine the end moment based on the highest precision present + if gd["second"]: + # fractional or whole second precision + end = start + timedelta(seconds=1) + elif gd["minute"]: + end = start + timedelta(minutes=1) + elif gd["hour"]: + end = start + timedelta(hours=1) + elif gd["day"]: + end = start + timedelta(days=1) + elif gd["month"]: + end = offset_n_months(start, 1) + elif gd["year"]: + end = offset_n_months(start, 12) + else: + # fallback to one-second window (shouldn't occur) + end = start + timedelta(seconds=1) + return start, end + + def compile_date_pattern(expr: str): """ Accepts any of: @@ -252,24 +338,6 @@ def compile_date_pattern(expr: str): Returns a predicate that is True for timestamps in that interval. """ expr = expr.strip() - pattern = r""" - ^ - (?: - @(?P\d+) # unix epoch - | (?P \d{4}|\*) # year (YYYY or *) - (?:-(?P \d{2}|\*) # month (MM or *) - (?:-(?P \d{2}|\*) # day (DD or *) - (?:[T ](?P \d{2}|\*) # hour (HH or *) - (?::(?P\d{2}|\*) # minute (MM or *) - (?::(?P\d{2}(?:\.\d+)?|\*))? # second (SS or SS.fff or *) - )? - )? - )? - )? - ) - (?PZ|[+\-]\d\d:\d\d|\[[^\]]+\])? # optional timezone suffix - $ - """ m = re.match(pattern, expr, re.VERBOSE) if not m: raise DatePatternError(f"unrecognised date: {expr!r}") @@ -277,14 +345,7 @@ def compile_date_pattern(expr: str): gd = m.groupdict() tz = parse_tz(gd["tz"]) - # 1) epoch is checked first because it is syntactically and semantically incompatible - # with other datetime components (e.g., year/month/day/wildcards). - if gd["epoch"]: - e = int(gd["epoch"]) - start = datetime.fromtimestamp(e, tz=timezone.utc) - return interval_predicate(start, start + timedelta(seconds=1)) - - # 2) detect explicit wildcards (*) in any named group + # 1) detect explicit wildcards (*) in any named group wildcard_fields = ("year", "month", "day", "hour", "minute", "second") if any(gd[f] == "*" for f in wildcard_fields if f in gd): # build a discrete‐match predicate @@ -311,66 +372,11 @@ def wildcard_pred(ts): return wildcard_pred - # 3) fraction‐precision exact match + # 2) fraction‐precision exact match if gd["second"] and "." in gd["second"]: - start = datetime( - int(gd["year"]), - int(gd["month"]), - int(gd["day"]), - int(gd["hour"]), - int(gd["minute"]), - int(float(gd["second"])), - tzinfo=tz, - ) - return exact_predicate(start) - - # 4) second‐precision interval - if gd["second"]: - start = datetime( - int(gd["year"]), - int(gd["month"]), - int(gd["day"]), - int(gd["hour"] or 0), - int(gd["minute"] or 0), - int(gd["second"]), - tzinfo=tz, - ) - return interval_predicate(start, start + timedelta(seconds=1)) - - # 5) minute‐precision - if gd["minute"]: - start = datetime( - int(gd["year"]), - int(gd["month"]), - int(gd["day"]), - int(gd["hour"] or 0), - int(gd["minute"]), - second=0, - tzinfo=tz, - ) - return interval_predicate(start, start + timedelta(minutes=1)) - - # 6) hour‐precision - if gd["hour"]: - start = datetime( - int(gd["year"]), int(gd["month"]), int(gd["day"]), int(gd["hour"]), minute=0, second=0, tzinfo=tz - ) - return interval_predicate(start, start + timedelta(hours=1)) - - # 7) day‐precision - if gd["day"]: - start = datetime(int(gd["year"]), int(gd["month"]), int(gd["day"]), hour=0, minute=0, second=0, tzinfo=tz) - return interval_predicate(start, start + timedelta(days=1)) - - # 8) month‐precision - if gd["month"]: - start = datetime(int(gd["year"]), int(gd["month"]), day=1, hour=0, minute=0, second=0, tzinfo=tz) - return interval_predicate(start, offset_n_months(start, 1)) - - # 9) year‐precision - if gd["year"]: - start = datetime(int(gd["year"]), month=1, day=1, hour=0, minute=0, second=0, tzinfo=tz) - return interval_predicate(start, offset_n_months(start, 12)) - - # fallback - raise DatePatternError(f"unrecognised date: {expr!r}") + dt = _build_datetime_from_groups(gd, tz) + return exact_predicate(dt) + + # 3) remaining precisions: use _parse_to_interval to get start/end + start, end = _parse_to_interval(expr) + return interval_predicate(start, end) From 796981c2e464a65b02a15464a06ecbece05af0f4 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Fri, 25 Apr 2025 01:27:22 -0400 Subject: [PATCH 15/20] add explicit time interval matching in date: archive match pattern (with basic tests) --- src/borg/helpers/time.py | 16 +++++- .../archiver/match_archives_date_test.py | 55 +++++++++++++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index fb3b0feade..0ed58b3047 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -338,6 +338,16 @@ def compile_date_pattern(expr: str): Returns a predicate that is True for timestamps in that interval. """ expr = expr.strip() + + # 1) detect explicit user-defined intervals (split slash outside brackets to allow for [Region/Name]) + parts = re.split(r"/(?![^\[]*\])", expr, maxsplit=1) + if len(parts) == 2: + left, right = parts + start_left, _ = _parse_to_interval(left) + # use the start here to make it an exclusive upper bound, for behavior consistent with + # the rest of the date pattern matching + start_right, _ = _parse_to_interval(right) + return interval_predicate(start_left, start_right) m = re.match(pattern, expr, re.VERBOSE) if not m: raise DatePatternError(f"unrecognised date: {expr!r}") @@ -345,7 +355,7 @@ def compile_date_pattern(expr: str): gd = m.groupdict() tz = parse_tz(gd["tz"]) - # 1) detect explicit wildcards (*) in any named group + # 2) detect explicit wildcards (*) in any named group wildcard_fields = ("year", "month", "day", "hour", "minute", "second") if any(gd[f] == "*" for f in wildcard_fields if f in gd): # build a discrete‐match predicate @@ -372,11 +382,11 @@ def wildcard_pred(ts): return wildcard_pred - # 2) fraction‐precision exact match + # 3) fraction‐precision exact match if gd["second"] and "." in gd["second"]: dt = _build_datetime_from_groups(gd, tz) return exact_predicate(dt) - # 3) remaining precisions: use _parse_to_interval to get start/end + # 4) remaining precisions: use _parse_to_interval to get start/end start, end = _parse_to_interval(expr) return interval_predicate(start, end) diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py index e859a8e98f..7e09704f3e 100644 --- a/src/borg/testsuite/archiver/match_archives_date_test.py +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -372,3 +372,58 @@ def test_match_wildcard_mixed_day_and_hour(archivers, request): assert "wmix-hit2" in out assert "wmix-miss1" not in out assert "wmix-miss2" not in out + + +# Interval matching tests + +INTERVAL_ARCHIVES = [ + ("int-before", "2025-03-31T23:59:59"), + ("int-start", "2025-04-01T00:00:00"), + ("int-mid", "2025-04-15T12:00:00"), + ("int-end", "2025-05-01T00:00:00"), + ("int-after", "2025-05-01T00:00:01"), +] + + +# Explicit interval match tests +def test_match_explicit_interval(archivers, request): + """ + Test matching archives between two explicit, fully-specified timestamps. + The interval is inclusive of the start and exclusive of the end. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + for name, ts in INTERVAL_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-04-01T00:00:00/2025-05-01T00:00:00", exit_code=0) + assert "int-start" in out + assert "int-mid" in out + assert "int-before" not in out + assert "int-end" not in out # exclusive end + assert "int-after" not in out + + +def test_match_explicit_interval_with_timezone(archivers, request): + """ + Test matching archives between two explicit timestamps with timezone offsets. + Interval is inclusive of the start and exclusive of the end. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + TZ_INTERVAL_ARCHIVES = [ + ("tz-start", "2025-06-01T00:00:00+02:00"), # UTC 2025-05-31T22:00:00Z + ("tz-mid", "2025-06-01T12:00:00+02:00"), # UTC 2025-06-01T10:00:00Z + ("tz-end", "2025-06-02T00:00:00+02:00"), # UTC 2025-06-01T22:00:00Z + ] + for name, ts in TZ_INTERVAL_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # Express the interval in UTC, matching the UTC equivalents. + out = cmd( + archiver, "repo-list", "-v", "--match-archives=date:2025-05-31T22:00:00Z/2025-06-01T22:00:00Z", exit_code=0 + ) + assert "tz-start" in out + assert "tz-mid" in out + assert "tz-end" not in out From 7b8a194d6f318f7e19f1cd20b1dfba269adc8a3c Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Fri, 25 Apr 2025 02:20:06 -0400 Subject: [PATCH 16/20] add duration-based interval support for date: archive match patterns; preserve time-of-day in offset_n_months --- src/borg/helpers/time.py | 61 +++++- .../archiver/match_archives_date_test.py | 175 ++++++++++++++++++ 2 files changed, 232 insertions(+), 4 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 0ed58b3047..527ae42a92 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -160,8 +160,15 @@ def get_month_and_year_from_total(total_completed_months): following_month, year_of_following_month = get_month_and_year_from_total(total_months + 1) max_days_in_month = (datetime(year_of_following_month, following_month, 1) - timedelta(1)).day - return datetime(day=min(from_ts.day, max_days_in_month), month=target_month, year=target_year).replace( - tzinfo=from_ts.tzinfo + return datetime( + year=target_year, + month=target_month, + day=min(from_ts.day, max_days_in_month), + hour=from_ts.hour, + minute=from_ts.minute, + second=from_ts.second, + microsecond=from_ts.microsecond, + tzinfo=from_ts.tzinfo, ) @@ -279,6 +286,38 @@ def _build_datetime_from_groups(gd: dict, tz: timezone) -> datetime: """ +# +# duration parsing for strings like D1Y2M3W4D5h6m7s +_DURATION_RE = re.compile( + r"^D" + r"(?:(?P\d+)Y)?" + r"(?:(?P\d+)M)?" + r"(?:(?P\d+)W)?" + r"(?:(?P\d+)D)?" + r"(?:(?P\d+)h)?" + r"(?:(?P\d+)m)?" + r"(?:(?P\d+)s)?" + r"$" +) + + +def _parse_duration(expr: str) -> tuple[int, timedelta]: + """ + Parse D… duration into (months, timedelta of days/weeks/hours/minutes/seconds). + """ + m = _DURATION_RE.match(expr) + if not m: + raise DatePatternError(f"invalid duration: {expr!r}") + gd = m.groupdict(default="0") + total_months = int(gd["years"]) * 12 + int(gd["months"]) + days = int(gd["weeks"]) * 7 + int(gd["days"]) + hours = int(gd["hours"]) + minutes = int(gd["minutes"]) + seconds = int(gd["seconds"]) + td = timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds) + return total_months, td + + def _parse_to_interval(expr: str) -> tuple[datetime, datetime]: """ Parse a possibly incomplete ISO-8601 timestamp (with optional timezone) into @@ -343,9 +382,23 @@ def compile_date_pattern(expr: str): parts = re.split(r"/(?![^\[]*\])", expr, maxsplit=1) if len(parts) == 2: left, right = parts + # duration / timestamp + if left.startswith("D") and not right.startswith("D"): + # months are handled separately via offset_n_months() because month lengths vary + months, td = _parse_duration(left) + end_dt, _ = _parse_to_interval(right) + start_dt = offset_n_months(end_dt, -months) - td + return interval_predicate(start_dt, end_dt) + # timestamp / duration + if right.startswith("D") and not left.startswith("D"): + start_dt, _ = _parse_to_interval(left) + # months are handled separately via offset_n_months() because month lengths vary + months, td = _parse_duration(right) + mid_dt = offset_n_months(start_dt, months) + end_dt = mid_dt + td + return interval_predicate(start_dt, end_dt) + # timestamp / timestamp start_left, _ = _parse_to_interval(left) - # use the start here to make it an exclusive upper bound, for behavior consistent with - # the rest of the date pattern matching start_right, _ = _parse_to_interval(right) return interval_predicate(start_left, start_right) m = re.match(pattern, expr, re.VERBOSE) diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py index 7e09704f3e..caa48bdd8b 100644 --- a/src/borg/testsuite/archiver/match_archives_date_test.py +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -427,3 +427,178 @@ def test_match_explicit_interval_with_timezone(archivers, request): assert "tz-start" in out assert "tz-mid" in out assert "tz-end" not in out + + +# Duration-based interval tests + + +# Test duration prefix (duration/timestamp): 1-day before midnight +def test_match_duration_prefix_day(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + DURATION_ARCHIVES = [ + ("dur-start", "2025-04-01T00:00:00"), + ("dur-mid", "2025-04-01T12:00:00"), + ("dur-end", "2025-04-02T00:00:00"), + ("dur-after", "2025-04-02T00:00:01"), + ] + for name, ts in DURATION_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # D1D/2025-04-02T00:00:00 should cover 2025-04-01 inclusive to 2025-04-02 exclusive + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:D1D/2025-04-02T00:00:00", exit_code=0) + assert "dur-start" in out + assert "dur-mid" in out + assert "dur-end" not in out + assert "dur-after" not in out + + +# Test duration suffix (timestamp/duration): 1-day after midnight +def test_match_duration_suffix_day(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + DURATION_ARCHIVES = [ + ("dur2-before", "2025-03-31T23:59:59"), + ("dur2-start", "2025-04-01T00:00:00"), + ("dur2-mid", "2025-04-01T12:00:00"), + ("dur2-end", "2025-04-02T00:00:00"), + ] + for name, ts in DURATION_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 2025-04-01T00:00:00/D1D should cover 2025-04-01 00:00 inclusive to 2025-04-02 exclusive + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-04-01T00:00:00/D1D", exit_code=0) + assert "dur2-before" not in out + assert "dur2-start" in out + assert "dur2-mid" in out + assert "dur2-end" not in out + + +# Test duration prefix for 1-month +def test_match_duration_prefix_month(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + MONTH_DUR_ARCHIVES = [ + ("dpm-start", "2025-01-01T00:00:00"), + ("dpm-mid", "2025-01-15T12:00:00"), + ("dpm-end", "2025-02-01T00:00:00"), + ("dpm-after", "2025-02-01T00:00:01"), + ] + for name, ts in MONTH_DUR_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # D1M/2025-02-01T00:00:00 should cover entire January 2025 + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:D1M/2025-02-01T00:00:00", exit_code=0) + assert "dpm-start" in out + assert "dpm-mid" in out + assert "dpm-end" not in out + assert "dpm-after" not in out + + +# Test duration suffix for 1-week +def test_match_duration_suffix_week(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + WEEK_DUR_ARCHIVES = [ + ("dw-before", "2025-01-01T00:00:00"), + ("dw-start", "2025-01-08T00:00:00"), + ("dw-mid", "2025-01-10T12:00:00"), + ("dw-end", "2025-01-15T00:00:00"), + ] + for name, ts in WEEK_DUR_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 2025-01-08T00:00:00/D1W should cover 2025-01-08 to 2025-01-15 + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-08T00:00:00/D1W", exit_code=0) + assert "dw-before" not in out + assert "dw-start" in out + assert "dw-mid" in out + assert "dw-end" not in out + + +# Test composite duration prefix (1 month + 1 day) +def test_match_duration_composite_prefix(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + COMP_ARCHIVES = [ + ("cp-start", "2025-01-01T00:00:00"), + ("cp-mid", "2025-02-01T00:00:00"), + ("cp-end", "2025-02-02T00:00:00"), + ("cp-after", "2025-02-02T00:00:01"), + ] + for name, ts in COMP_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # D1M1D/2025-02-02T00:00:00 should cover 2025-01-01 to 2025-02-02 + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:D1M1D/2025-02-02T00:00:00", exit_code=0) + assert "cp-start" in out + assert "cp-mid" in out + assert "cp-end" not in out + assert "cp-after" not in out + + +# Test duration suffix for hours (timestamp/D3h) +def test_match_duration_suffix_hours(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + HOUR_DUR_ARCHIVES = [ + ("dh-before", "2025-04-01T09:59:59"), + ("dh-start", "2025-04-01T10:00:00"), + ("dh-mid", "2025-04-01T11:30:00"), + ("dh-end", "2025-04-01T12:59:59"), + ("dh-after", "2025-04-01T13:00:00"), + ] + for name, ts in HOUR_DUR_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 2025-04-01T10:00:00/D3h should cover 10:00 to 13:00 exclusive + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-04-01T10:00:00/D3h", exit_code=0) + assert "dh-before" not in out + assert "dh-start" in out + assert "dh-mid" in out + assert "dh-end" in out + assert "dh-after" not in out + + +# Test duration prefix for minutes (D30m/timestamp) +def test_match_duration_prefix_minutes(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + MIN_DUR_ARCHIVES = [ + ("dm-before", "2025-04-01T00:29:59"), + ("dm-start", "2025-04-01T00:30:00"), + ("dm-end", "2025-04-01T00:59:59"), + ("dm-after", "2025-04-01T01:00:00"), + ] + for name, ts in MIN_DUR_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # D30m/2025-04-01T01:00:00 should cover 00:30 to 01:00 exclusive + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:D30m/2025-04-01T01:00:00", exit_code=0) + assert "dm-before" not in out + assert "dm-start" in out + assert "dm-end" in out + assert "dm-after" not in out + + +# Test composite duration suffix (timestamp/D1h30m) +def test_match_duration_suffix_composite_h_m(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + COMP_HM_ARCHIVES = [ + ("chm-before", "2025-04-01T00:59:59"), + ("chm-start", "2025-04-01T01:00:00"), + ("chm-mid", "2025-04-01T02:15:00"), + ("chm-end", "2025-04-01T02:29:59"), + ("chm-after", "2025-04-01T02:30:00"), + ] + for name, ts in COMP_HM_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # 2025-04-01T01:00:00/D1h30m should cover 01:00 to 02:30 exclusive + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-04-01T01:00:00/D1h30m", exit_code=0) + assert "chm-before" not in out + assert "chm-start" in out + assert "chm-mid" in out + assert "chm-end" in out + assert "chm-after" not in out From 8e3f1e4e52289bb9c561a81ad8b48597d7c0c73c Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Fri, 25 Apr 2025 03:24:34 -0400 Subject: [PATCH 17/20] add support for keyword-based date intervals in archive date: matching; include tests for oldest/newest scenarios --- src/borg/helpers/time.py | 2 + src/borg/manifest.py | 39 +++++- .../archiver/match_archives_date_test.py | 122 ++++++++++++++++++ 3 files changed, 162 insertions(+), 1 deletion(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 527ae42a92..8a03923f7b 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -208,6 +208,8 @@ def exact_predicate(dt: datetime): def interval_predicate(start: datetime, end: datetime): start_utc = start.astimezone(timezone.utc) end_utc = end.astimezone(timezone.utc) + if start_utc > end_utc: + raise DatePatternError("start date must be before end date") return lambda ts: start_utc <= ts.astimezone(timezone.utc) < end_utc diff --git a/src/borg/manifest.py b/src/borg/manifest.py index d9ee7288bf..036c8a4bbf 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -206,11 +206,48 @@ def _matching_info_tuples(self, match_patterns, match_end, *, deleted=False): archive_infos = [x for x in archive_infos if x.host == wanted_host] elif match.startswith("date:"): wanted_date = match.removeprefix("date:") + # resolve keyword tokens for oldest, newest, now + parts = re.split(r"/(?![^\[]*\])", wanted_date, maxsplit=1) + orig_left = parts[0] + orig_right = parts[1] if len(parts) == 2 else None + + def resolve_kw(token): + if token == "oldest": + return min(x.ts for x in archive_infos).isoformat(timespec="seconds") + if token == "newest": + return max(x.ts for x in archive_infos).isoformat(timespec="seconds") + if token == "now": + return archive_ts_now().isoformat(timespec="seconds") + return token # token is not a keyword, return it as is + + left = resolve_kw(orig_left) + if orig_right is not None: + # interval keyword/keyword or keyword/timestamp or timestamp/keyword + right = resolve_kw(orig_right) + wanted_date = f"{left}/{right}" + elif orig_left in ("oldest", "newest", "now"): + # single keyword: exact match only for that timestamp + dt = parse_timestamp(left) + archive_infos = [x for x in archive_infos if x.ts == dt] + continue + else: + wanted_date = orig_left + # compile and filter try: pred = compile_date_pattern(wanted_date) except DatePatternError as e: raise CommandError(f"Invalid date pattern: {match} ({e})") - archive_infos = [x for x in archive_infos if pred(x.ts)] + # filter by predicate, but include newest timestamp if it was requested + # This is a bit of a hack to get around the fact that compile_date_pattern + # returns a predicate that is not inclusive of the end date. However, + # oldest/newest should intuitively include the newest archive, hence this hack. + had_newest = "newest" in (orig_left, orig_right) + base_infos = archive_infos + if had_newest and base_infos: + newest_ts = max(x.ts for x in base_infos) + archive_infos = [x for x in archive_infos if pred(x.ts) or x.ts == newest_ts] + else: + archive_infos = [x for x in archive_infos if pred(x.ts)] else: # do a match on the name match = match.removeprefix("name:") # accept optional name: prefix regex = get_regex_from_pattern(match) diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py index caa48bdd8b..59e592ff6c 100644 --- a/src/borg/testsuite/archiver/match_archives_date_test.py +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -602,3 +602,125 @@ def test_match_duration_suffix_composite_h_m(archivers, request): assert "chm-mid" in out assert "chm-end" in out assert "chm-after" not in out + + +# Keyword-based interval tests (oldest/newest) + + +def test_match_keyword_oldest_to_timestamp(archivers, request): + """ + Test 'oldest/TIMESTAMP' selects from the earliest archive up to the given timestamp (exclusive). + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + KEYWORD_ARCHIVES = [ + ("arch1", "2025-01-01T00:00:00"), + ("arch2", "2025-01-02T00:00:00"), + ("arch3", "2025-01-03T00:00:00"), + ] + for name, ts in KEYWORD_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # oldest is arch1; oldest/arch2 => interval [arch1, arch2) + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:oldest/2025-01-02T00:00:00", exit_code=0) + assert "arch1" in out + assert "arch2" not in out + assert "arch3" not in out + + +def test_match_keyword_timestamp_to_newest(archivers, request): + """ + Test 'TIMESTAMP/newest' selects from the given timestamp up to the latest archive (inclusive). + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + KEYWORD_ARCHIVES = [ + ("arch1", "2025-01-01T00:00:00"), + ("arch2", "2025-01-02T00:00:00"), + ("arch3", "2025-01-03T00:00:00"), + ] + for name, ts in KEYWORD_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + # newest is arch3; arch2/newest => interval [arch2, arch3) + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-02T00:00:00/newest", exit_code=0) + assert "arch1" not in out + assert "arch2" in out + assert "arch3" in out + + +def test_match_keyword_oldest_to_newest(archivers, request): + """ + Test 'oldest/newest' selects from the earliest archive up to the latest (exclusive). + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + KEYWORD_ARCHIVES = [ + ("arch1", "2025-01-01T00:00:00"), + ("arch2", "2025-01-02T00:00:00"), + ("arch3", "2025-01-03T00:00:00"), + ] + for name, ts in KEYWORD_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:oldest/newest", exit_code=0) + assert "arch1" in out + assert "arch2" in out + assert "arch3" in out + + +# Keyword permutations tests: oldest/now and now/newest + + +def test_match_keyword_oldest_to_now(archivers, request): + """ + Test 'oldest/now' selects all archives since the earliest up to now. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + KEYWORD_ARCHIVES = [("k1", "2025-01-01T00:00:00"), ("k2", "2025-02-01T00:00:00"), ("k3", "2025-03-01T00:00:00")] + for name, ts in KEYWORD_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:oldest/now", exit_code=0) + # all created archives are before 'now', so should all match + assert "k1" in out + assert "k2" in out + assert "k3" in out + + +def test_match_keyword_now_to_newest_invalid(archivers, request): + """ + Test 'now/newest' should error, since newest will always be before 'now'. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + KEYWORD_ARCHIVES = [("kA", "2025-01-01T00:00:00"), ("kB", "2025-02-01T00:00:00"), ("kC", "2025-03-01T00:00:00")] + for name, ts in KEYWORD_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + with pytest.raises(CommandError) as excinfo: + cmd(archiver, "repo-list", "-v", "--match-archives=date:now/newest") + + msg = str(excinfo.value) + assert "Invalid date pattern" in msg + + +def test_match_keyword_exact(archivers, request): + """ + Test date:oldest returns the oldest archive, and date:newest returns the newest archive. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + KEYWORD_ARCHIVES = [("k1", "2025-01-01T00:00:00"), ("k2", "2025-02-01T00:00:00"), ("k3", "2025-03-01T00:00:00")] + for name, ts in KEYWORD_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:oldest", exit_code=0) + assert "k1" in out + assert "k2" not in out + assert "k3" not in out + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:newest", exit_code=0) + assert "k3" in out + assert "k2" not in out + assert "k1" not in out From 904853dd2f8701ed36118d61c92985c34f312072 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Fri, 25 Apr 2025 03:55:08 -0400 Subject: [PATCH 18/20] refactor time.py: rename internal functions for clarity and consistency --- src/borg/helpers/time.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 8a03923f7b..9c39f8b5b3 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -243,7 +243,7 @@ def parse_tz(tzstr: str): raise DatePatternError("invalid timezone format") -def _build_datetime_from_groups(gd: dict, tz: timezone) -> datetime: +def build_datetime_from_groups(gd: dict, tz: timezone) -> datetime: """ Construct a datetime from partial ISO groups, filling missing fields with the earliest valid value, and attaching tzinfo. @@ -268,7 +268,7 @@ def _build_datetime_from_groups(gd: dict, tz: timezone) -> datetime: return datetime(year, month, day, hour, minute, second, microsecond, tzinfo=tz) -pattern = r""" +MAIN_RE = r""" ^ (?: @(?P\d+) # unix epoch @@ -288,9 +288,7 @@ def _build_datetime_from_groups(gd: dict, tz: timezone) -> datetime: """ -# -# duration parsing for strings like D1Y2M3W4D5h6m7s -_DURATION_RE = re.compile( +DURATION_RE = re.compile( r"^D" r"(?:(?P\d+)Y)?" r"(?:(?P\d+)M)?" @@ -303,11 +301,11 @@ def _build_datetime_from_groups(gd: dict, tz: timezone) -> datetime: ) -def _parse_duration(expr: str) -> tuple[int, timedelta]: +def parse_duration(expr: str) -> tuple[int, timedelta]: """ Parse D… duration into (months, timedelta of days/weeks/hours/minutes/seconds). """ - m = _DURATION_RE.match(expr) + m = DURATION_RE.match(expr) if not m: raise DatePatternError(f"invalid duration: {expr!r}") gd = m.groupdict(default="0") @@ -320,14 +318,14 @@ def _parse_duration(expr: str) -> tuple[int, timedelta]: return total_months, td -def _parse_to_interval(expr: str) -> tuple[datetime, datetime]: +def parse_to_interval(expr: str) -> tuple[datetime, datetime]: """ Parse a possibly incomplete ISO-8601 timestamp (with optional timezone) into a start and end datetime representing the full interval. """ # note: we match the same pattern that supports wildcards, but at the point this function is called, # we know that the pattern contains no wildcards. This is to allow us to reuse the same regex. - m = re.match(pattern, expr, re.VERBOSE) + m = re.match(MAIN_RE, expr, re.VERBOSE) if not m: raise DatePatternError(f"unrecognised date: {expr!r}") gd = m.groupdict() @@ -340,7 +338,7 @@ def _parse_to_interval(expr: str) -> tuple[datetime, datetime]: tz = parse_tz(gd["tz"]) # build the start moment - start = _build_datetime_from_groups(gd, tz) + start = build_datetime_from_groups(gd, tz) # determine the end moment based on the highest precision present if gd["second"]: # fractional or whole second precision @@ -387,23 +385,23 @@ def compile_date_pattern(expr: str): # duration / timestamp if left.startswith("D") and not right.startswith("D"): # months are handled separately via offset_n_months() because month lengths vary - months, td = _parse_duration(left) - end_dt, _ = _parse_to_interval(right) + months, td = parse_duration(left) + end_dt, _ = parse_to_interval(right) start_dt = offset_n_months(end_dt, -months) - td return interval_predicate(start_dt, end_dt) # timestamp / duration if right.startswith("D") and not left.startswith("D"): - start_dt, _ = _parse_to_interval(left) + start_dt, _ = parse_to_interval(left) # months are handled separately via offset_n_months() because month lengths vary - months, td = _parse_duration(right) + months, td = parse_duration(right) mid_dt = offset_n_months(start_dt, months) end_dt = mid_dt + td return interval_predicate(start_dt, end_dt) # timestamp / timestamp - start_left, _ = _parse_to_interval(left) - start_right, _ = _parse_to_interval(right) + start_left, _ = parse_to_interval(left) + start_right, _ = parse_to_interval(right) return interval_predicate(start_left, start_right) - m = re.match(pattern, expr, re.VERBOSE) + m = re.match(MAIN_RE, expr, re.VERBOSE) if not m: raise DatePatternError(f"unrecognised date: {expr!r}") @@ -439,9 +437,9 @@ def wildcard_pred(ts): # 3) fraction‐precision exact match if gd["second"] and "." in gd["second"]: - dt = _build_datetime_from_groups(gd, tz) + dt = build_datetime_from_groups(gd, tz) return exact_predicate(dt) # 4) remaining precisions: use _parse_to_interval to get start/end - start, end = _parse_to_interval(expr) + start, end = parse_to_interval(expr) return interval_predicate(start, end) From 6032c4a058eb5352252f06d7e223a506ae556aa8 Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Fri, 25 Apr 2025 04:27:21 -0400 Subject: [PATCH 19/20] add support for ISO week-date and ordinal-date matching in date: archive patterns; include tests --- src/borg/helpers/time.py | 69 +++++++++---- .../archiver/match_archives_date_test.py | 96 +++++++++++++++++++ 2 files changed, 149 insertions(+), 16 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 9c39f8b5b3..7026141092 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -1,6 +1,6 @@ import os import re -from datetime import datetime, timezone, timedelta +from datetime import datetime, timezone, timedelta, date from zoneinfo import ZoneInfo @@ -268,26 +268,39 @@ def build_datetime_from_groups(gd: dict, tz: timezone) -> datetime: return datetime(year, month, day, hour, minute, second, microsecond, tzinfo=tz) +# Regex for ISO-8601 timestamps: +# Accepts both 'T' and space as separators between date and time per RFC-3339/IXDTF. MAIN_RE = r""" ^ (?: - @(?P\d+) # unix epoch - | (?P \d{4}|\*) # year (YYYY or *) - (?:-(?P \d{2}|\*) # month (MM or *) - (?:-(?P \d{2}|\*) # day (DD or *) - (?:[T ](?P \d{2}|\*) # hour (HH or *) - (?::(?P\d{2}|\*) # minute (MM or *) - (?::(?P\d{2}(?:\.\d+)?|\*))? # second (SS or SS.fff or *) + # ISO week date: YYYY-Www or YYYY-Www-D + (?P\d{4})-W(?P\d{2})(?:-(?P\d))? + | # Ordinal date: YYYY-DDD + (?P\d{4})-(?P\d{3}) + | # Unix epoch + @(?P\d+) + | # Calendar date + (?P\d{4}|\*) # year (YYYY or *) + (?:- # start month/day/time block + (?P\d{2}|\*) # month (MM or *) + (?:- # start day/time block + (?P\d{2}|\*) # day (DD or *) + (?:[T ] # date/time separator (T or space) + (?P\d{2}|\*) # hour (HH or *) + (?: + :(?P\d{2}|\*) # minute (MM or *) + (?: + :(?P\d{2}(?:\.\d+)?|\*) # second (SS or SS.fff or *) + )? + )? )? - )? - )? + )? )? ) - (?PZ|[+\-]\d\d:\d\d|\[[^\]]+\])? # optional timezone suffix + (?PZ|[+\-]\d\d:\d\d|\[[^\]]+\])? # optional timezone suffix (Z, ±HH:MM or [Zone]) $ """ - DURATION_RE = re.compile( r"^D" r"(?:(?P\d+)Y)?" @@ -328,7 +341,33 @@ def parse_to_interval(expr: str) -> tuple[datetime, datetime]: m = re.match(MAIN_RE, expr, re.VERBOSE) if not m: raise DatePatternError(f"unrecognised date: {expr!r}") + gd = m.groupdict() + tz = parse_tz(gd["tz"]) + # ISO week-date support (YYYY-Www or YYYY-Www-D) + if gd.get("isoweek_year"): + y = int(gd["isoweek_year"]) + w = int(gd["isoweek_week"]) + d = int(gd.get("isoweek_day") or 1) + # fromisocalendar returns a date + iso_date = date.fromisocalendar(y, w, d) + start = datetime(iso_date.year, iso_date.month, iso_date.day, tzinfo=tz) + if gd.get("isoweek_day"): + # if we have a day, we want to end at the next day + end = start + timedelta(days=1) + else: + # match the whole week + end = start + timedelta(weeks=1) + return start, end + + # Ordinal date support (YYYY-DDD) + if gd.get("ordinal_year"): + y = int(gd["ordinal_year"]) + doy = int(gd["ordinal_day"]) + start = datetime(y, 1, 1, tzinfo=tz) + timedelta(days=doy - 1) + end = start + timedelta(days=1) + return start, end + # handle unix-epoch forms directly if gd["epoch"]: epoch = int(gd["epoch"]) @@ -336,7 +375,6 @@ def parse_to_interval(expr: str) -> tuple[datetime, datetime]: end = start + timedelta(seconds=1) return start, end - tz = parse_tz(gd["tz"]) # build the start moment start = build_datetime_from_groups(gd, tz) # determine the end moment based on the highest precision present @@ -365,9 +403,8 @@ def compile_date_pattern(expr: str): YYYY YYYY-MM YYYY-MM-DD - YYYY-MM-DDTHH - YYYY-MM-DDTHH:MM - YYYY-MM-DDTHH:MM:SS + YYYY-MM-DDTHH (with 'T') or YYYY-MM-DD HH:MM (with space) + YYYY-MM-DD HH:MM:SS (RFC-3339 space-separated) Unix epoch (@123456789) …with an optional trailing timezone (Z or ±HH:MM or [Region/City]). Additionally supports wildcards (`*`) in year, month, or day (or any combination), e.g.: diff --git a/src/borg/testsuite/archiver/match_archives_date_test.py b/src/borg/testsuite/archiver/match_archives_date_test.py index 59e592ff6c..3548fc9d02 100644 --- a/src/borg/testsuite/archiver/match_archives_date_test.py +++ b/src/borg/testsuite/archiver/match_archives_date_test.py @@ -724,3 +724,99 @@ def test_match_keyword_exact(archivers, request): assert "k3" in out assert "k2" not in out assert "k1" not in out + + +# ISO week-date and ordinal-date support tests + + +def test_match_iso_week(archivers, request): + """ + Test matching archives by ISO week number (YYYY-Www). + Week 10 of 2025 runs from 2025-03-03 to 2025-03-09 inclusive. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + WEEK10_ARCHIVES = [ + ("iso-week-before", "2025-03-02T23:59:59"), + ("iso-week-start", "2025-03-03T00:00:00"), + ("iso-week-mid", "2025-03-05T12:00:00"), + ("iso-week-end", "2025-03-09T23:59:59"), + ("iso-week-after", "2025-03-10T00:00:00"), + ] + for name, ts in WEEK10_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-W10", exit_code=0) + assert "iso-week-before" not in out + assert "iso-week-start" in out + assert "iso-week-mid" in out + assert "iso-week-end" in out + assert "iso-week-after" not in out + + +def test_match_iso_weekday(archivers, request): + """ + Test matching archives by ISO week and weekday (YYYY-Www-D). + Week 10 Day 3 of 2025 is Wednesday 2025-03-05. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + WEEKDAY_ARCHIVES = [ + ("iso-wed", "2025-03-05T08:00:00"), + ("iso-tue", "2025-03-04T12:00:00"), + ("iso-thu", "2025-03-06T18:00:00"), + ] + for name, ts in WEEKDAY_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-W10-3", exit_code=0) + assert "iso-wed" in out + assert "iso-tue" not in out + assert "iso-thu" not in out + + +def test_match_ordinal_date(archivers, request): + """ + Test matching archives by ordinal day of year (YYYY-DDD). + Day 032 of 2025 is 2025-02-01. + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + ORDINAL_ARCHIVES = [ + ("ord-jan31", "2025-01-31T23:59:59"), # day 031 + ("ord-feb1", "2025-02-01T00:00:00"), # day 032 + ("ord-feb1-end", "2025-02-01T23:59:59"), + ("ord-feb2", "2025-02-02T00:00:00"), # day 033 + ] + for name, ts in ORDINAL_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-032", exit_code=0) + assert "ord-jan31" not in out + assert "ord-feb1" in out + assert "ord-feb1-end" in out + assert "ord-feb2" not in out + + +def test_match_rfc3339(archivers, request): + """ + Test matching archives by RFC 3339 date format (use ' ' as delimiter rather than 'T'). + """ + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + RFC_ARCHIVES = [ + ("rfc-start", "2025-01-01T00:00:00Z"), + ("rfc-mid", "2025-01-01T12:00:00Z"), + ("rfc-max", "2025-01-01T23:59:59Z"), + ("rfc-after", "2025-01-02T00:00:00Z"), + ] + for name, ts in RFC_ARCHIVES: + create_src_archive(archiver, name, ts=ts) + + out = cmd( + archiver, "repo-list", "-v", "--match-archives=date:2025-01-01 00:00:00Z/2025-01-02 00:00:00Z", exit_code=0 + ) + assert "rfc-start" in out + assert "rfc-mid" in out + assert "rfc-max" in out + assert "rfc-after" not in out From 9cb5e5f1b9bc03c1f5f5e8e2b082273f2c43335c Mon Sep 17 00:00:00 2001 From: Charlie Herz Date: Fri, 25 Apr 2025 04:39:25 -0400 Subject: [PATCH 20/20] enhance compile_date_pattern docstring: clarify TIMESTAMP and DURATION formats --- src/borg/helpers/time.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 7026141092..2568d3f76d 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -399,7 +399,7 @@ def parse_to_interval(expr: str) -> tuple[datetime, datetime]: def compile_date_pattern(expr: str): """ - Accepts any of: + Accepts any TIMESTAMP of: YYYY YYYY-MM YYYY-MM-DD @@ -407,11 +407,19 @@ def compile_date_pattern(expr: str): YYYY-MM-DD HH:MM:SS (RFC-3339 space-separated) Unix epoch (@123456789) …with an optional trailing timezone (Z or ±HH:MM or [Region/City]). + + Also supports: + TIMESTAMP/TIMESTAMP + TIMESTAMP/DURATION + DURATION/TIMESTAMP. + DURATION is a string of the form: + D[years]Y[months]M[weeks]W[days]D[hours]h[minutes]m[seconds]s (any combination). + Additionally supports wildcards (`*`) in year, month, or day (or any combination), e.g.: "*-04-22" # April 22 of any year "2025-*-01" # 1st day of any month in 2025 "*-*-15" # 15th of every month, any year - Returns a predicate that is True for timestamps in that interval. + Returns a predicate that is True for timestamps in that interval (inclusive, exclusive). """ expr = expr.strip()