Skip to main content

kopiur_api/
jitter.rs

1//! Deterministic schedule jitter and Jenkins-style `H` substitution (ADR §4.1).
2//!
3//! Jitter MUST be **deterministic**: HA operator replicas compute identical fire
4//! times without coordinating, and a controller restart re-derives the same value
5//! without persisting it (ADR §4.1, SKILL "Scheduling"). That rules out any RNG
6//! and any reliance on the wall clock. The offset is a pure function of
7//! `(seed, slot_start)`.
8//!
9//! ### Why a hand-rolled hash
10//!
11//! `std::collections::hash_map::DefaultHasher` is explicitly **not** guaranteed
12//! stable across Rust releases, so a value persisted/expected by one build could
13//! differ after a toolchain bump — fatal for "identical across replicas/restarts".
14//! We therefore inline **FNV-1a (64-bit)**, whose constants are fixed by the
15//! algorithm and will never change. No external dependency is added.
16//!
17//! ### Cron `H`
18//!
19//! `croner` 2.x does not implement Jenkins-style `H`. Kopiur treats `H` as
20//! "deterministic hashed jitter within the field's range," resolved *here* (not in
21//! the cron parser). [`substitute_h`] rewrites each `H` in a 5-field cron to a
22//! concrete value derived from the same FNV hash of the seed, so the resolved
23//! expression is stable per `BackupSchedule` and parseable by `croner`.
24//! [`crate::validate::validate_cron`] validates the *shape* by substituting a fixed
25//! placeholder; this function produces the *spread*.
26
27use std::time::Duration;
28
29/// FNV-1a 64-bit offset basis.
30const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
31/// FNV-1a 64-bit prime.
32const FNV_PRIME: u64 = 0x0000_0100_0000_01b3;
33
34/// Stable 64-bit FNV-1a hash of `(seed, slot_start)`. Fixed forever by the
35/// algorithm — safe to rely on across builds and replicas.
36fn fnv1a(seed: &str, slot_start_unix: i64) -> u64 {
37    let mut h = FNV_OFFSET;
38    for b in seed.as_bytes() {
39        h ^= u64::from(*b);
40        h = h.wrapping_mul(FNV_PRIME);
41    }
42    // Mix the slot as 8 little-endian bytes so adjacent slots diverge widely.
43    for b in slot_start_unix.to_le_bytes() {
44        h ^= u64::from(b);
45        h = h.wrapping_mul(FNV_PRIME);
46    }
47    h
48}
49
50/// Deterministic jitter offset in `[0, max)`, derived from a stable hash of
51/// `(seed, slot_start_unix)`. NO RNG, NO clock.
52///
53/// `seed` should be a stable per-schedule key — the ADR specifies
54/// `BackupSchedule.UID` (combined here with the slot's wall-clock start). The same
55/// `(seed, slot_start, max)` always yields the same `Duration`; different seeds or
56/// slots spread across the window. `max == 0` yields `Duration::ZERO`.
57///
58/// Resolution is whole seconds: jitter windows are minutes-to-hours, sub-second
59/// precision is meaningless for cron slots, and integer seconds keep the value
60/// trivially reproducible across languages if the mover ever re-derives it.
61///
62/// ```
63/// use std::time::Duration;
64/// // Re-exported from the crate root as `jitter_offset`.
65/// use kopiur_api::jitter_offset;
66///
67/// let max = Duration::from_secs(1800); // 30m window
68/// // Deterministic: the same (seed, slot, max) always yields the same offset —
69/// // HA replicas and restarts agree without coordination (ADR §4.1).
70/// let a = jitter_offset("schedule-uid", 1_700_000_000, max);
71/// let b = jitter_offset("schedule-uid", 1_700_000_000, max);
72/// assert_eq!(a, b);
73/// assert!(a < max);
74/// // A zero window means no jitter.
75/// assert_eq!(jitter_offset("uid", 123, Duration::ZERO), Duration::ZERO);
76/// ```
77pub fn offset(seed: &str, slot_start_unix: i64, max: Duration) -> Duration {
78    let max_secs = max.as_secs();
79    if max_secs == 0 {
80        return Duration::ZERO;
81    }
82    let h = fnv1a(seed, slot_start_unix);
83    // Modulo a u64 hash by the window width in seconds → uniform-enough offset.
84    let offset_secs = h % max_secs;
85    Duration::from_secs(offset_secs)
86}
87
88/// Resolve Jenkins-style `H` tokens in a 5-field cron expression to concrete,
89/// deterministic values derived from `seed`. Each field's `H` maps into that
90/// field's natural range (minute 0–59, hour 0–23, day-of-month 1–28, month 1–12,
91/// day-of-week 0–6) using a distinct mix of the seed hash, so two `H`s in the same
92/// expression don't collapse to the same number.
93///
94/// Returns the rewritten expression. Non-`H` fields pass through unchanged. If the
95/// expression isn't 5 whitespace-separated fields it is returned unchanged (shape
96/// validation is [`crate::validate::validate_cron`]'s job).
97///
98/// ```
99/// use kopiur_api::substitute_h;
100///
101/// // Each `H` resolves to a concrete, deterministic value in the field's range;
102/// // non-`H` fields are untouched.
103/// let out = substitute_h("H 2 * * *", "schedule-uid");
104/// let fields: Vec<&str> = out.split_whitespace().collect();
105/// let minute: u64 = fields[0].parse().expect("H -> a minute");
106/// assert!(minute < 60);
107/// assert_eq!(&fields[1..], &["2", "*", "*", "*"]);
108///
109/// // Deterministic per seed; different schedules land in different minutes.
110/// assert_eq!(substitute_h("H 2 * * *", "uid"), substitute_h("H 2 * * *", "uid"));
111/// assert_ne!(substitute_h("H 2 * * *", "uid-a"), substitute_h("H 2 * * *", "uid-b"));
112/// ```
113pub fn substitute_h(expr: &str, seed: &str) -> String {
114    let fields: Vec<&str> = expr.split_whitespace().collect();
115    if fields.len() != 5 {
116        return expr.to_string();
117    }
118    // (min_inclusive, range_width) per standard cron field.
119    const RANGES: [(u64, u64); 5] = [
120        (0, 60), // minute 0-59
121        (0, 24), // hour 0-23
122        (1, 28), // day-of-month 1-28 (28 keeps it valid in every month)
123        (1, 12), // month 1-12
124        (0, 7),  // day-of-week 0-6
125    ];
126    let base = fnv1a(seed, 0);
127    let resolved: Vec<String> = fields
128        .iter()
129        .enumerate()
130        .map(|(i, f)| {
131            if *f == "H" {
132                let (lo, width) = RANGES[i];
133                // Distinct rotation per field index so multiple H's diverge.
134                let mixed = base.rotate_left((i as u32 + 1) * 7);
135                (lo + (mixed % width)).to_string()
136            } else {
137                (*f).to_string()
138            }
139        })
140        .collect();
141    resolved.join(" ")
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    const MAX: Duration = Duration::from_secs(1800); // 30m
149
150    #[test]
151    fn offset_is_deterministic_across_calls() {
152        for seed in ["uid-a", "uid-b", "00000000-1111-2222", "schedule/billing"] {
153            let a = offset(seed, 1_700_000_000, MAX);
154            let b = offset(seed, 1_700_000_000, MAX);
155            assert_eq!(a, b, "same (seed, slot, max) must give identical offset");
156        }
157    }
158
159    #[test]
160    fn offset_is_always_within_range() {
161        for slot in 0..2000i64 {
162            let o = offset("some-uid", slot * 37, MAX);
163            assert!(o < MAX, "offset {o:?} must be < max {MAX:?}");
164        }
165    }
166
167    #[test]
168    fn offset_zero_max_is_zero() {
169        assert_eq!(offset("uid", 123, Duration::ZERO), Duration::ZERO);
170    }
171
172    #[test]
173    fn offset_spreads_across_seeds_not_all_identical() {
174        let slot = 1_700_000_000;
175        let mut seen = std::collections::BTreeSet::new();
176        for i in 0..500 {
177            let seed = format!("uid-{i}");
178            seen.insert(offset(&seed, slot, MAX).as_secs());
179        }
180        // With 500 seeds over a 1800s window we expect a wide spread, certainly
181        // far more than a handful of distinct values (proves it's not constant).
182        assert!(
183            seen.len() > 100,
184            "expected a wide spread of offsets, got {} distinct",
185            seen.len()
186        );
187    }
188
189    #[test]
190    fn offset_spreads_across_slots_for_one_seed() {
191        let mut seen = std::collections::BTreeSet::new();
192        for slot in 0..500i64 {
193            seen.insert(offset("fixed-uid", slot * 3600, MAX).as_secs());
194        }
195        assert!(
196            seen.len() > 100,
197            "adjacent slots must diverge, got {}",
198            seen.len()
199        );
200    }
201
202    #[test]
203    fn substitute_h_is_deterministic_and_in_range() {
204        let a = substitute_h("H 2 * * *", "uid-1");
205        let b = substitute_h("H 2 * * *", "uid-1");
206        assert_eq!(a, b);
207        // minute field resolved to a number 0-59; rest unchanged.
208        let fields: Vec<&str> = a.split_whitespace().collect();
209        let minute: u64 = fields[0].parse().expect("H resolved to a number");
210        assert!(minute < 60);
211        assert_eq!(&fields[1..], &["2", "*", "*", "*"]);
212    }
213
214    #[test]
215    fn substitute_h_different_seeds_differ() {
216        let a = substitute_h("H 2 * * *", "uid-1");
217        let b = substitute_h("H 2 * * *", "uid-2");
218        assert_ne!(a, b, "different schedules should land in different minutes");
219    }
220
221    #[test]
222    fn substitute_h_multiple_h_diverge() {
223        // Two H's in one expression must not collapse to the same number-space.
224        let out = substitute_h("H H * * *", "uid-x");
225        let fields: Vec<&str> = out.split_whitespace().collect();
226        let minute: u64 = fields[0].parse().unwrap();
227        let hour: u64 = fields[1].parse().unwrap();
228        assert!(minute < 60);
229        assert!(hour < 24);
230    }
231
232    #[test]
233    fn substitute_h_passes_through_non_h_and_malformed() {
234        assert_eq!(substitute_h("0 2 * * *", "s"), "0 2 * * *");
235        // Not 5 fields → returned unchanged (validate_cron handles shape).
236        assert_eq!(substitute_h("0 2 * *", "s"), "0 2 * *");
237    }
238}