Skip to main content

kopiur_api/
maintenance.rs

1//! The `Maintenance` CRD — schedules `kopia maintenance run` quick + full and
2//! manages the ownership lease. At most one per repository. ADR-0001 §3.7.
3
4use crate::common::{CronSpec, FailurePolicy, MoverSpec, RepositoryRef};
5use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
6use kube::CustomResource;
7use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9
10/// The schedule an operator-managed `Maintenance` uses when the owning
11/// `Repository`/`ClusterRepository` does not override it: quick every 6h (30m
12/// jitter), full daily at 03:00 (1h jitter). Shared by the webhook (defaulting),
13/// the controller (projection), and tests, so the default lives in exactly one
14/// place. ADR §3.7.
15///
16/// ```
17/// use kopiur_api::default_maintenance_schedule;
18///
19/// let s = default_maintenance_schedule();
20/// assert_eq!(s.quick.cron, "0 */6 * * *");
21/// assert_eq!(s.quick.jitter.as_deref(), Some("30m"));
22/// assert_eq!(s.full.cron, "0 3 * * *");
23/// assert_eq!(s.full.jitter.as_deref(), Some("1h"));
24/// assert!(s.timezone.is_none());
25/// ```
26pub fn default_maintenance_schedule() -> MaintenanceSchedule {
27    MaintenanceSchedule {
28        quick: CronSpec {
29            cron: "0 */6 * * *".to_string(),
30            jitter: Some("30m".to_string()),
31        },
32        full: CronSpec {
33            cron: "0 3 * * *".to_string(),
34            jitter: Some("1h".to_string()),
35        },
36        timezone: None,
37    }
38}
39
40/// Maintenance schedule + ownership lease for one `Repository`/`ClusterRepository`. ADR §3.7.
41///
42/// Not `Eq`: `mover` transitively embeds k8s-openapi types.
43#[derive(CustomResource, Serialize, Deserialize, Clone, Debug, PartialEq, JsonSchema)]
44#[kube(
45    group = "kopiur.home-operations.com",
46    version = "v1alpha1",
47    kind = "Maintenance",
48    namespaced,
49    status = "MaintenanceStatus",
50    shortname = "kopiamaint",
51    category = "kopiur",
52    printcolumn = r#"{"name":"Repository","type":"string","jsonPath":".spec.repository.name"}"#,
53    printcolumn = r#"{"name":"Owner","type":"string","jsonPath":".status.ownership.owner"}"#,
54    printcolumn = r#"{"name":"Age","type":"date","jsonPath":".metadata.creationTimestamp"}"#
55)]
56#[serde(rename_all = "camelCase")]
57pub struct MaintenanceSpec {
58    /// Discriminated reference to a `Repository` or `ClusterRepository`. ADR §3.2.
59    pub repository: RepositoryRef,
60    /// Quick + full cron schedules (with a shared timezone) for `kopia
61    /// maintenance run`. ADR §3.7.
62    pub schedule: MaintenanceSchedule,
63    /// Ownership-lease configuration; at most one `Maintenance` may own a
64    /// repository at a time. ADR §3.7.
65    pub ownership: Ownership,
66    /// Mover (Job pod) overrides for the maintenance run — resources, scheduling,
67    /// etc. Object-store repositories typically tune this. ADR §3.7.
68    #[serde(default, skip_serializing_if = "Option::is_none")]
69    pub mover: Option<MoverSpec>,
70    /// How a failed maintenance run is retried/bounded (backoff, deadline). ADR §3.7.
71    #[serde(default, skip_serializing_if = "Option::is_none")]
72    pub failure_policy: Option<FailurePolicy>,
73}
74
75/// Quick + full cron schedules plus a shared timezone. ADR §3.7.
76#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema)]
77#[serde(rename_all = "camelCase")]
78pub struct MaintenanceSchedule {
79    /// Cron + jitter for `kopia maintenance run` (quick = cheap index/log work).
80    pub quick: CronSpec,
81    /// Cron + jitter for `kopia maintenance run --full` (content reclamation).
82    pub full: CronSpec,
83    /// IANA timezone both crons are evaluated in; absent means controller default.
84    #[serde(default, skip_serializing_if = "Option::is_none")]
85    pub timezone: Option<String>,
86}
87
88/// Ownership-lease configuration. At most one `Maintenance` may own a repository. ADR §3.7.
89#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema)]
90#[serde(rename_all = "camelCase")]
91pub struct Ownership {
92    /// Stable lease holder identity (e.g. `kopia-operator/nas-primary`). Two
93    /// `Maintenance` CRs claiming the same repository compare this. ADR §3.7.
94    pub owner: String,
95    /// What to do if the lease is already held by a different `owner`. ADR §3.7.
96    #[serde(default)]
97    pub takeover_policy: TakeoverPolicy,
98}
99
100/// What to do when another owner already holds the lease. Closed enum. ADR §3.7.
101///
102/// ```
103/// use kopiur_api::TakeoverPolicy;
104///
105/// // The safest default: never seize a lease another owner holds.
106/// assert_eq!(TakeoverPolicy::default(), TakeoverPolicy::Never);
107/// assert_eq!(
108///     serde_json::to_value(TakeoverPolicy::PromptCondition).unwrap(),
109///     serde_json::json!("PromptCondition"),
110/// );
111/// ```
112#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default, JsonSchema)]
113pub enum TakeoverPolicy {
114    /// Never take over an existing lease (default — safest).
115    #[default]
116    Never,
117    /// Surface a condition prompting an operator to decide.
118    PromptCondition,
119    /// Forcibly claim the lease.
120    Force,
121}
122
123/// Inline maintenance control on a `Repository`/`ClusterRepository`
124/// (`spec.maintenance`). ADR §3.1/§3.7.
125///
126/// Maintenance is **default-managed**: when this is absent (or `enabled: true`),
127/// the repository reconciler projects it into an *owned* `Maintenance` child CR,
128/// so kopia storage is reclaimed without the user remembering to author a
129/// separate `Maintenance`. The reconciler honors an externally-authored
130/// `Maintenance` referencing the repository regardless of `enabled` — setting
131/// `enabled: false` only tells the operator not to create its own; it never
132/// deletes, ignores, or warns about a user-managed one.
133///
134/// Not `Eq`: `mover` transitively embeds k8s-openapi types.
135#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, JsonSchema)]
136#[serde(rename_all = "camelCase")]
137pub struct RepositoryMaintenanceSpec {
138    /// Whether the operator manages a `Maintenance` CR for this repository.
139    /// Defaults to `true` (default-on). When `false`, the operator does not
140    /// create or manage one — but an externally-authored `Maintenance` is still
141    /// honored.
142    #[serde(default = "crate::common::default_true")]
143    pub enabled: bool,
144    /// Schedule override. When absent, the operator uses
145    /// [`default_maintenance_schedule`] (quick 6h / full daily).
146    #[serde(default, skip_serializing_if = "Option::is_none")]
147    pub schedule: Option<MaintenanceSchedule>,
148    /// Mover overrides for the managed `Maintenance` (object-store repositories).
149    #[serde(default, skip_serializing_if = "Option::is_none")]
150    pub mover: Option<MoverSpec>,
151    /// Failure handling (backoff/deadline) for the managed `Maintenance` run.
152    #[serde(default, skip_serializing_if = "Option::is_none")]
153    pub failure_policy: Option<FailurePolicy>,
154    /// Lease takeover policy for the managed `Maintenance`. Defaults to
155    /// [`TakeoverPolicy::Never`].
156    #[serde(default, skip_serializing_if = "Option::is_none")]
157    pub takeover_policy: Option<TakeoverPolicy>,
158    /// **ClusterRepository only** — namespace the managed (namespaced)
159    /// `Maintenance` CR is created in. Defaults to the operator's own namespace.
160    /// Forbidden on a namespaced `Repository` (its `Maintenance` always lives in
161    /// the repository's namespace), rejected by the admission webhook.
162    #[serde(default, skip_serializing_if = "Option::is_none")]
163    pub namespace: Option<String>,
164}
165
166impl Default for RepositoryMaintenanceSpec {
167    /// Default-on with no overrides. `enabled` is `true` here to match the serde
168    /// `default_true` so a constructed default and a deserialized `{}` agree.
169    fn default() -> Self {
170        Self {
171            enabled: true,
172            schedule: None,
173            mover: None,
174            failure_policy: None,
175            takeover_policy: None,
176            namespace: None,
177        }
178    }
179}
180
181/// Observed maintenance state: lease holder plus per-kind run results. ADR §3.7.
182#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Default, JsonSchema)]
183#[serde(rename_all = "camelCase")]
184pub struct MaintenanceStatus {
185    /// The `metadata.generation` this status reflects, for staleness detection.
186    #[serde(default, skip_serializing_if = "Option::is_none")]
187    pub observed_generation: Option<i64>,
188    /// Current lease holder, if the lease has been claimed. ADR §3.7.
189    #[serde(default, skip_serializing_if = "Option::is_none")]
190    pub ownership: Option<OwnershipStatus>,
191    /// Last/next-run state for the quick maintenance schedule. ADR §3.7.
192    #[serde(default, skip_serializing_if = "Option::is_none")]
193    pub quick: Option<RunStatus>,
194    /// Last/next-run state for the full maintenance schedule. ADR §3.7.
195    #[serde(default, skip_serializing_if = "Option::is_none")]
196    pub full: Option<RunStatus>,
197    /// Standard Kubernetes conditions surfacing maintenance health. ADR §5.
198    #[serde(default, skip_serializing_if = "Vec::is_empty")]
199    pub conditions: Vec<Condition>,
200}
201
202/// Observed ownership-lease state: who holds it and since when. ADR §3.7.
203#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Default, JsonSchema)]
204#[serde(rename_all = "camelCase")]
205pub struct OwnershipStatus {
206    /// The current lease holder's identity (matches `Ownership.owner`).
207    #[serde(default, skip_serializing_if = "Option::is_none")]
208    pub owner: Option<String>,
209    /// RFC3339 instant the lease was claimed.
210    #[serde(default, skip_serializing_if = "Option::is_none")]
211    pub claimed_at: Option<String>,
212}
213
214/// Per-kind (quick/full) run status. ADR §3.7.
215#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Default, JsonSchema)]
216#[serde(rename_all = "camelCase")]
217pub struct RunStatus {
218    /// RFC3339 instant of the most recent run of this kind.
219    #[serde(default, skip_serializing_if = "Option::is_none")]
220    pub last_run_at: Option<String>,
221    /// RFC3339 instant of the next scheduled run of this kind (cron + jitter, pinned).
222    #[serde(default, skip_serializing_if = "Option::is_none")]
223    pub next_scheduled_at: Option<String>,
224    /// Count of back-to-back failed runs of this kind; resets on success.
225    #[serde(default, skip_serializing_if = "Option::is_none")]
226    pub consecutive_failures: Option<i64>,
227    /// The ONLY place storage reclamation is surfaced (ADR §3.7/§4.5).
228    #[serde(default, skip_serializing_if = "Option::is_none")]
229    pub last_content_reclaimed_bytes: Option<i64>,
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235    use crate::common::RepositoryKind;
236    use crate::testutil::from_yaml;
237    use kube::core::CustomResourceExt;
238
239    #[test]
240    fn maintenance_crd_metadata_is_correct() {
241        let crd = Maintenance::crd();
242        assert_eq!(crd.spec.group, "kopiur.home-operations.com");
243        assert_eq!(crd.spec.names.kind, "Maintenance");
244        assert_eq!(crd.spec.scope, "Namespaced");
245        assert_eq!(crd.spec.versions[0].name, "v1alpha1");
246    }
247
248    #[test]
249    fn maintenance_roundtrip_matches_adr_shape() {
250        // Mirrors ADR-0001 §3.7.
251        let yaml = r#"
252repository:
253  kind: Repository
254  name: nas-primary
255schedule:
256  quick: { cron: "0 */6 * * *", jitter: 30m }
257  full:  { cron: "0 3 * * 0", jitter: 1h }
258  timezone: UTC
259ownership:
260  owner: "kopia-operator/nas-primary"
261  takeoverPolicy: PromptCondition
262mover:
263  resources: { requests: { cpu: 250m, memory: 1Gi }, limits: { cpu: "2", memory: 4Gi } }
264failurePolicy:
265  backoffLimit: 1
266  activeDeadlineSeconds: 14400
267"#;
268        let spec: MaintenanceSpec = from_yaml(yaml);
269        assert_eq!(spec.repository.kind, RepositoryKind::Repository);
270        assert_eq!(spec.schedule.quick.cron, "0 */6 * * *");
271        assert_eq!(spec.schedule.quick.jitter.as_deref(), Some("30m"));
272        assert_eq!(spec.schedule.full.cron, "0 3 * * 0");
273        assert_eq!(spec.schedule.timezone.as_deref(), Some("UTC"));
274        assert_eq!(spec.ownership.owner, "kopia-operator/nas-primary");
275        assert_eq!(
276            spec.ownership.takeover_policy,
277            TakeoverPolicy::PromptCondition
278        );
279        assert_eq!(
280            spec.failure_policy
281                .as_ref()
282                .unwrap()
283                .active_deadline_seconds,
284            Some(14400)
285        );
286
287        let json = serde_json::to_value(&spec).expect("serialize");
288        let reparsed: MaintenanceSpec = serde_json::from_value(json).expect("reparse");
289        assert_eq!(spec, reparsed);
290    }
291
292    #[test]
293    fn maintenance_status_roundtrips() {
294        // Mirrors ADR-0001 §3.7 status block.
295        let yaml = r#"
296ownership:
297  owner: "kopia-operator/nas-primary"
298  claimedAt: 2026-05-12T08:14:02Z
299quick:
300  lastRunAt: 2026-05-24T12:00:11Z
301  nextScheduledAt: 2026-05-24T18:00:00Z
302  consecutiveFailures: 0
303  lastContentReclaimedBytes: 1234567
304full:
305  lastRunAt: 2026-05-19T03:01:42Z
306  nextScheduledAt: 2026-05-26T03:00:00Z
307  consecutiveFailures: 0
308  lastContentReclaimedBytes: 89456789012
309"#;
310        let status: MaintenanceStatus = from_yaml(yaml);
311        assert_eq!(
312            status.ownership.as_ref().unwrap().owner.as_deref(),
313            Some("kopia-operator/nas-primary")
314        );
315        assert_eq!(
316            status.quick.as_ref().unwrap().last_content_reclaimed_bytes,
317            Some(1234567)
318        );
319        assert_eq!(
320            status.full.as_ref().unwrap().last_content_reclaimed_bytes,
321            Some(89456789012)
322        );
323
324        let json = serde_json::to_value(&status).unwrap();
325        let reparsed: MaintenanceStatus = serde_json::from_value(json).unwrap();
326        assert_eq!(status, reparsed);
327    }
328
329    #[test]
330    fn repository_maintenance_defaults_to_enabled() {
331        // An empty `spec.maintenance: {}` is default-on with no overrides.
332        let m: RepositoryMaintenanceSpec = from_yaml("{}\n");
333        assert!(
334            m.enabled,
335            "absent `enabled` must default to true (default-on)"
336        );
337        assert!(m.schedule.is_none());
338        assert!(m.namespace.is_none());
339        assert!(m.takeover_policy.is_none());
340        // The constructed Default agrees with the deserialized `{}`.
341        assert_eq!(m, RepositoryMaintenanceSpec::default());
342    }
343
344    #[test]
345    fn repository_maintenance_roundtrip_with_overrides() {
346        let yaml = r#"
347enabled: false
348schedule:
349  quick: { cron: "0 */4 * * *", jitter: 20m }
350  full:  { cron: "30 2 * * *", jitter: 45m }
351  timezone: America/Chicago
352takeoverPolicy: Force
353namespace: kopia-system
354failurePolicy:
355  backoffLimit: 2
356"#;
357        let m: RepositoryMaintenanceSpec = from_yaml(yaml);
358        assert!(!m.enabled);
359        let s = m.schedule.as_ref().expect("schedule");
360        assert_eq!(s.quick.cron, "0 */4 * * *");
361        assert_eq!(s.full.jitter.as_deref(), Some("45m"));
362        assert_eq!(s.timezone.as_deref(), Some("America/Chicago"));
363        assert_eq!(m.takeover_policy, Some(TakeoverPolicy::Force));
364        assert_eq!(m.namespace.as_deref(), Some("kopia-system"));
365        assert_eq!(m.failure_policy.as_ref().unwrap().backoff_limit, Some(2));
366
367        let json = serde_json::to_value(&m).expect("serialize");
368        let reparsed: RepositoryMaintenanceSpec = serde_json::from_value(json).expect("reparse");
369        assert_eq!(m, reparsed);
370    }
371
372    #[test]
373    fn default_maintenance_schedule_is_quick_6h_full_daily() {
374        let s = default_maintenance_schedule();
375        assert_eq!(s.quick.cron, "0 */6 * * *");
376        assert_eq!(s.quick.jitter.as_deref(), Some("30m"));
377        assert_eq!(s.full.cron, "0 3 * * *");
378        assert_eq!(s.full.jitter.as_deref(), Some("1h"));
379        assert!(s.timezone.is_none());
380    }
381
382    #[test]
383    fn takeover_policy_serializes_to_expected_strings() {
384        assert_eq!(
385            serde_json::to_value(TakeoverPolicy::Never).unwrap(),
386            "Never"
387        );
388        assert_eq!(
389            serde_json::to_value(TakeoverPolicy::PromptCondition).unwrap(),
390            "PromptCondition"
391        );
392        assert_eq!(
393            serde_json::to_value(TakeoverPolicy::Force).unwrap(),
394            "Force"
395        );
396        assert_eq!(TakeoverPolicy::default(), TakeoverPolicy::Never);
397    }
398}