Skip to main content

kopiur_api/
retention.rs

1//! Grandfather-father-son (GFS) retention selection (ADR §4.4).
2//!
3//! `BackupConfig.spec.retention` is the **only** successful-retention driver
4//! (SKILL "Retention is GFS-only"). The operator periodically runs this selection
5//! over the `Backup` CRs for one `(identity, source)` tuple and deletes the CRs
6//! that fall outside the kept set; each deleted CR's `deletionPolicy` then governs
7//! the snapshot (§4.5). This module is the pure selection kernel — no kube types,
8//! no clock — so it's unit-testable with lightweight fakes.
9//!
10//! ## Algorithm (ADR-0001 §4.4, steps 2–4)
11//!
12//! 1. Sort candidates by end time, newest first.
13//! 2. Apply buckets in order: `keepLatest`, `keepHourly`, `keepDaily`,
14//!    `keepWeekly`, `keepMonthly`, `keepAnnual`.
15//!    - `keepLatest: N` keeps the N newest backups outright.
16//!    - Each time bucket keeps the **most recent** backup within each distinct
17//!      period (hour / day / ISO-week / month / year), up to its count `N`,
18//!      walking newest→oldest.
19//! 3. A backup kept by **any** bucket survives (union). Everything else is deleted.
20//!
21//! This is deliberately *not* a flat count: a backup that is the newest of its
22//! year is held by `keepAnnual` even if hundreds of newer dailies exist — the
23//! exact case a flat cap would silently drop (ADR §4.4 "Why not flat-count").
24//!
25//! ## Empty-policy semantics
26//!
27//! An all-`None` [`Retention`] selects **no** buckets, so the kept set is empty and
28//! every backup is marked for deletion. The caller (controller) is responsible for
29//! only invoking GFS when a retention policy is actually configured; this function
30//! reports faithfully what the given policy implies. This is documented and tested.
31
32use crate::common::Retention;
33use chrono::{DateTime, Datelike, Utc};
34use std::collections::BTreeSet;
35
36/// Anything that can stand in for a `Backup` during retention selection. Kept tiny
37/// so tests use trivial fakes instead of constructing full `Backup` CRs.
38pub trait BackupLike {
39    /// The snapshot's completion time — the GFS bucketing key (ADR §4.4 step 2).
40    fn end_time(&self) -> DateTime<Utc>;
41    /// A stable identifier (kopia snapshot ID or CR name) used in the result sets.
42    fn id(&self) -> &str;
43}
44
45/// The outcome of a GFS selection: which ids to keep and which to delete. Both are
46/// returned explicitly so callers never have to recompute the complement.
47#[derive(Debug, Clone, PartialEq, Eq, Default)]
48pub struct KeptSet {
49    /// Ids retained by at least one bucket.
50    pub keep: Vec<String>,
51    /// Ids selected by no bucket — eligible for pruning.
52    pub delete: Vec<String>,
53}
54
55/// Calendar period a timestamp falls in, used to deduplicate "one per period."
56/// Distinct values mean distinct periods; comparing these is how each bucket keeps
57/// the newest entry per period.
58fn hour_key(t: DateTime<Utc>) -> (i32, u32, u32) {
59    (t.year(), t.ordinal(), t.hour())
60}
61fn day_key(t: DateTime<Utc>) -> (i32, u32) {
62    (t.year(), t.ordinal())
63}
64fn week_key(t: DateTime<Utc>) -> (i32, u32) {
65    let iso = t.iso_week();
66    (iso.year(), iso.week())
67}
68fn month_key(t: DateTime<Utc>) -> (i32, u32) {
69    (t.year(), t.month())
70}
71fn year_key(t: DateTime<Utc>) -> i32 {
72    t.year()
73}
74
75use chrono::Timelike;
76
77/// Walk `sorted` (newest→oldest) and collect the index of the newest entry in each
78/// distinct period, stopping once `count` periods have been kept.
79fn keep_per_period<K, F>(
80    sorted: &[usize],
81    times: &[DateTime<Utc>],
82    count: usize,
83    key: F,
84) -> Vec<usize>
85where
86    K: Ord,
87    F: Fn(DateTime<Utc>) -> K,
88{
89    let mut kept = Vec::new();
90    let mut seen: BTreeSet<K> = BTreeSet::new();
91    for &idx in sorted {
92        if kept.len() >= count {
93            break;
94        }
95        let k = key(times[idx]);
96        if seen.insert(k) {
97            // First (= newest, since sorted desc) entry in this period.
98            kept.push(idx);
99        }
100    }
101    kept
102}
103
104/// Select the GFS-kept set from `backups` under `policy` (ADR §4.4).
105///
106/// Returns a [`KeptSet`] partitioning every input id into `keep`/`delete`. Input
107/// order is irrelevant; `keep` is returned newest-first, `delete` newest-first too.
108/// Ties on `end_time` are broken by id for determinism.
109///
110/// ```
111/// use chrono::{DateTime, TimeZone, Utc};
112/// use kopiur_api::{select_kept, BackupLike};
113/// use kopiur_api::common::Retention;
114///
115/// // A trivial fake honoring BackupLike — no kube CRs needed for selection.
116/// struct Snap { id: String, end: DateTime<Utc> }
117/// impl BackupLike for Snap {
118///     fn end_time(&self) -> DateTime<Utc> { self.end }
119///     fn id(&self) -> &str { &self.id }
120/// }
121/// let day = |d: u32| Utc.with_ymd_and_hms(2026, 5, d, 2, 0, 0).single().unwrap();
122/// let snaps = vec![
123///     Snap { id: "d24".into(), end: day(24) },
124///     Snap { id: "d23".into(), end: day(23) },
125///     Snap { id: "d22".into(), end: day(22) },
126/// ];
127///
128/// // keepDaily: 2 — keep the newest per day for the 2 newest days; prune the rest.
129/// let policy: Retention =
130///     serde_json::from_value(serde_json::json!({ "keepDaily": 2 })).unwrap();
131/// let kept = select_kept(&snaps, &policy);
132/// assert_eq!(kept.keep, vec!["d24", "d23"]); // newest-first
133/// assert_eq!(kept.delete, vec!["d22"]);
134/// ```
135pub fn select_kept<T: BackupLike>(backups: &[T], policy: &Retention) -> KeptSet {
136    if backups.is_empty() {
137        return KeptSet::default();
138    }
139
140    let times: Vec<DateTime<Utc>> = backups.iter().map(|b| b.end_time()).collect();
141
142    // Indices sorted by end_time descending; id as a deterministic tiebreaker.
143    let mut order: Vec<usize> = (0..backups.len()).collect();
144    order.sort_by(|&a, &b| {
145        times[b]
146            .cmp(&times[a])
147            .then_with(|| backups[a].id().cmp(backups[b].id()))
148    });
149
150    let mut keep_idx: BTreeSet<usize> = BTreeSet::new();
151
152    // keepLatest: the N newest outright.
153    if let Some(n) = policy.keep_latest {
154        for &idx in order.iter().take(n as usize) {
155            keep_idx.insert(idx);
156        }
157    }
158    if let Some(n) = policy.keep_hourly {
159        keep_idx.extend(keep_per_period(&order, &times, n as usize, hour_key));
160    }
161    if let Some(n) = policy.keep_daily {
162        keep_idx.extend(keep_per_period(&order, &times, n as usize, day_key));
163    }
164    if let Some(n) = policy.keep_weekly {
165        keep_idx.extend(keep_per_period(&order, &times, n as usize, week_key));
166    }
167    if let Some(n) = policy.keep_monthly {
168        keep_idx.extend(keep_per_period(&order, &times, n as usize, month_key));
169    }
170    if let Some(n) = policy.keep_annual {
171        keep_idx.extend(keep_per_period(&order, &times, n as usize, year_key));
172    }
173
174    let mut keep = Vec::new();
175    let mut delete = Vec::new();
176    for &idx in &order {
177        if keep_idx.contains(&idx) {
178            keep.push(backups[idx].id().to_string());
179        } else {
180            delete.push(backups[idx].id().to_string());
181        }
182    }
183    KeptSet { keep, delete }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    use chrono::TimeZone;
190
191    /// Minimal fake honoring `BackupLike` — no kube CRs in retention tests.
192    struct Fake {
193        id: String,
194        end: DateTime<Utc>,
195    }
196    impl BackupLike for Fake {
197        fn end_time(&self) -> DateTime<Utc> {
198            self.end
199        }
200        fn id(&self) -> &str {
201            &self.id
202        }
203    }
204
205    fn at(y: i32, mo: u32, d: u32, h: u32, mi: u32) -> DateTime<Utc> {
206        Utc.with_ymd_and_hms(y, mo, d, h, mi, 0).single().unwrap()
207    }
208    fn fake(id: &str, t: DateTime<Utc>) -> Fake {
209        Fake {
210            id: id.into(),
211            end: t,
212        }
213    }
214
215    fn policy(
216        latest: Option<u32>,
217        hourly: Option<u32>,
218        daily: Option<u32>,
219        weekly: Option<u32>,
220        monthly: Option<u32>,
221        annual: Option<u32>,
222    ) -> Retention {
223        Retention {
224            keep_latest: latest,
225            keep_hourly: hourly,
226            keep_daily: daily,
227            keep_weekly: weekly,
228            keep_monthly: monthly,
229            keep_annual: annual,
230        }
231    }
232
233    fn as_set(v: &[String]) -> BTreeSet<&str> {
234        v.iter().map(String::as_str).collect()
235    }
236
237    #[test]
238    fn empty_input_yields_empty_sets() {
239        let got = select_kept::<Fake>(&[], &policy(Some(5), None, None, None, None, None));
240        assert!(got.keep.is_empty());
241        assert!(got.delete.is_empty());
242    }
243
244    #[test]
245    fn empty_policy_keeps_nothing() {
246        // All-None policy → no buckets selected → everything deleted.
247        let backups = vec![
248            fake("a", at(2026, 5, 24, 2, 0)),
249            fake("b", at(2026, 5, 23, 2, 0)),
250        ];
251        let got = select_kept(&backups, &Retention::default());
252        assert!(got.keep.is_empty(), "empty policy keeps nothing");
253        assert_eq!(as_set(&got.delete), ["a", "b"].into_iter().collect());
254    }
255
256    #[test]
257    fn keep_latest_keeps_n_newest() {
258        let backups = vec![
259            fake("d1", at(2026, 5, 24, 2, 0)),
260            fake("d2", at(2026, 5, 23, 2, 0)),
261            fake("d3", at(2026, 5, 22, 2, 0)),
262            fake("d4", at(2026, 5, 21, 2, 0)),
263        ];
264        let got = select_kept(&backups, &policy(Some(2), None, None, None, None, None));
265        assert_eq!(as_set(&got.keep), ["d1", "d2"].into_iter().collect());
266        assert_eq!(as_set(&got.delete), ["d3", "d4"].into_iter().collect());
267    }
268
269    #[test]
270    fn keep_daily_keeps_one_newest_per_day() {
271        // Three backups on day 24 (keep the 02:00 one), one each on 23 and 22.
272        let backups = vec![
273            fake("a", at(2026, 5, 24, 0, 5)),
274            fake("b", at(2026, 5, 24, 1, 30)),
275            fake("c", at(2026, 5, 24, 2, 0)), // newest on the 24th
276            fake("d", at(2026, 5, 23, 2, 0)),
277            fake("e", at(2026, 5, 22, 2, 0)),
278        ];
279        let got = select_kept(&backups, &policy(None, None, Some(14), None, None, None));
280        // One per distinct day, newest within the day.
281        assert_eq!(as_set(&got.keep), ["c", "d", "e"].into_iter().collect());
282        assert_eq!(as_set(&got.delete), ["a", "b"].into_iter().collect());
283    }
284
285    #[test]
286    fn keep_daily_count_caps_number_of_days() {
287        let backups = vec![
288            fake("d24", at(2026, 5, 24, 2, 0)),
289            fake("d23", at(2026, 5, 23, 2, 0)),
290            fake("d22", at(2026, 5, 22, 2, 0)),
291            fake("d21", at(2026, 5, 21, 2, 0)),
292        ];
293        let got = select_kept(&backups, &policy(None, None, Some(2), None, None, None));
294        // Only the 2 newest days kept.
295        assert_eq!(as_set(&got.keep), ["d24", "d23"].into_iter().collect());
296        assert_eq!(as_set(&got.delete), ["d22", "d21"].into_iter().collect());
297    }
298
299    #[test]
300    fn keep_latest_unions_with_keep_daily() {
301        // Two backups same day: keepDaily keeps the newest (c), keepLatest:2 also
302        // pulls in the second-newest overall (b) even though it shares c's day.
303        let backups = vec![
304            fake("c", at(2026, 5, 24, 6, 0)),
305            fake("b", at(2026, 5, 24, 5, 0)),
306            fake("a", at(2026, 5, 23, 5, 0)),
307        ];
308        let got = select_kept(&backups, &policy(Some(2), None, Some(7), None, None, None));
309        // c kept by both; b kept by keepLatest; a kept by keepDaily (day 23).
310        assert_eq!(as_set(&got.keep), ["a", "b", "c"].into_iter().collect());
311        assert!(got.delete.is_empty());
312    }
313
314    #[test]
315    fn annual_snapshot_survives_flood_of_newer_dailies() {
316        // The §4.4 "why not flat-count" case. One old end-of-2024 snapshot plus a
317        // pile of 2026 dailies. keepDaily:3 + keepAnnual:2 must retain the 2024
318        // snapshot as the newest-of-its-year even though it's far down the list.
319        let mut backups = vec![fake("y2024", at(2024, 12, 31, 23, 0))];
320        for d in 1..=10u32 {
321            backups.push(fake(&format!("y2026-{d:02}"), at(2026, 5, d, 2, 0)));
322        }
323        // Newest 2026 daily is day 10; year 2026's representative is day 10,
324        // year 2024's representative is y2024.
325        let got = select_kept(&backups, &policy(None, None, Some(3), None, None, Some(2)));
326        let keep = as_set(&got.keep);
327        assert!(
328            keep.contains("y2024"),
329            "annual snapshot must not be dropped by daily flood; kept={keep:?}"
330        );
331        // keepDaily:3 keeps the 3 newest days of 2026.
332        assert!(keep.contains("y2026-10"));
333        assert!(keep.contains("y2026-09"));
334        assert!(keep.contains("y2026-08"));
335        // Older 2026 dailies not covered by any bucket are deleted.
336        assert!(got.delete.contains(&"y2026-01".to_string()));
337    }
338
339    #[test]
340    fn monthly_and_weekly_pick_newest_in_period() {
341        let backups = vec![
342            fake("may-late", at(2026, 5, 28, 2, 0)),
343            fake("may-early", at(2026, 5, 2, 2, 0)),
344            fake("apr", at(2026, 4, 15, 2, 0)),
345            fake("mar", at(2026, 3, 15, 2, 0)),
346        ];
347        let got = select_kept(&backups, &policy(None, None, None, None, Some(2), None));
348        // keepMonthly:2 → newest of May (may-late) and newest of April (apr).
349        assert_eq!(as_set(&got.keep), ["may-late", "apr"].into_iter().collect());
350    }
351
352    #[test]
353    fn every_backup_kept_by_any_bucket_survives() {
354        // Mixed policy; assert the kept set is exactly the union and no kept id
355        // appears in delete.
356        let backups = vec![
357            fake("now", at(2026, 5, 24, 12, 0)),
358            fake("earlier-today", at(2026, 5, 24, 1, 0)),
359            fake("yesterday", at(2026, 5, 23, 1, 0)),
360            fake("last-week", at(2026, 5, 16, 1, 0)),
361        ];
362        let got = select_kept(
363            &backups,
364            &policy(Some(1), None, Some(2), Some(2), None, None),
365        );
366        let keep = as_set(&got.keep);
367        let del = as_set(&got.delete);
368        for id in keep.iter() {
369            assert!(!del.contains(id), "id {id} in both keep and delete");
370        }
371        // Every input is accounted for exactly once.
372        assert_eq!(keep.len() + del.len(), 4);
373    }
374}