fleetforge_policy/
budget.rs

1use anyhow::{anyhow, Result};
2use async_trait::async_trait;
3use serde_json::Value;
4
5use crate::{Decision, DecisionEffect, PolicyEngine, PolicyRequest};
6
7#[derive(Clone, Default)]
8pub struct BudgetCapsPolicy {
9    max_tokens: Option<i64>,
10    max_cost: Option<f64>,
11    warn_ratio: Option<f64>,
12}
13
14impl BudgetCapsPolicy {
15    pub fn from_config(config: &Value) -> Result<Self> {
16        let max_tokens = config
17            .get("max_tokens")
18            .and_then(Value::as_i64)
19            .map(|value| value.max(0));
20
21        let max_cost = config
22            .get("max_cost")
23            .and_then(Value::as_f64)
24            .map(|value| value.max(0.0));
25
26        let warn_ratio = config
27            .get("warn_ratio")
28            .and_then(Value::as_f64)
29            .map(|value| {
30                if value.is_finite() {
31                    value.clamp(0.0, 1.0)
32                } else {
33                    0.0
34                }
35            });
36
37        if max_tokens.is_none() && max_cost.is_none() {
38            return Err(anyhow!(
39                "budget_caps pack requires at least one of 'max_tokens' or 'max_cost'"
40            ));
41        }
42
43        Ok(Self {
44            max_tokens,
45            max_cost,
46            warn_ratio,
47        })
48    }
49}
50
51fn read_reserved_tokens(budget: &Value) -> Option<i64> {
52    budget
53        .get("reserved")
54        .and_then(|reserved| reserved.get("tokens"))
55        .and_then(Value::as_i64)
56}
57
58fn read_reserved_cost(budget: &Value) -> Option<f64> {
59    budget
60        .get("reserved")
61        .and_then(|reserved| reserved.get("cost"))
62        .and_then(Value::as_f64)
63}
64
65fn read_actual_tokens(payload: &Value, budget: &Value) -> Option<i64> {
66    payload
67        .get("usage")
68        .and_then(|usage| usage.get("tokens"))
69        .and_then(Value::as_i64)
70        .or_else(|| {
71            budget
72                .get("actual")
73                .and_then(|actual| actual.get("tokens"))
74                .and_then(Value::as_i64)
75        })
76}
77
78fn read_actual_cost(payload: &Value, budget: &Value) -> Option<f64> {
79    payload
80        .get("usage")
81        .and_then(|usage| usage.get("cost"))
82        .and_then(Value::as_f64)
83        .or_else(|| {
84            budget
85                .get("actual")
86                .and_then(|actual| actual.get("cost"))
87                .and_then(Value::as_f64)
88        })
89}
90
91fn warn_reason(prefix: &str, value: f64) -> String {
92    format!("{prefix}:{:.2}", value)
93}
94
95#[async_trait]
96impl PolicyEngine for BudgetCapsPolicy {
97    async fn evaluate(&self, request: &PolicyRequest) -> Result<Decision> {
98        let payload = request
99            .context()
100            .get("payload")
101            .cloned()
102            .unwrap_or(Value::Null);
103        let boundary = request
104            .context()
105            .get("boundary")
106            .and_then(Value::as_str)
107            .unwrap_or_default()
108            .to_ascii_lowercase();
109
110        let budget = payload.get("budget").cloned().unwrap_or(Value::Null);
111
112        let is_pre_phase = matches!(
113            boundary.as_str(),
114            "ingress_prompt" | "ingress_tool" | "scheduler"
115        );
116        let is_post_phase = matches!(boundary.as_str(), "egress_prompt" | "egress_tool");
117
118        if !is_pre_phase && !is_post_phase {
119            return Ok(Decision::allow());
120        }
121
122        if is_pre_phase {
123            if let (Some(limit), Some(reserved)) = (self.max_tokens, read_reserved_tokens(&budget))
124            {
125                if reserved > limit {
126                    return Ok(Decision {
127                        effect: DecisionEffect::Deny,
128                        reason: Some("budget_tokens_reserved_exceeded".into()),
129                        patches: Vec::new(),
130                    });
131                }
132            }
133            if let (Some(limit), Some(reserved)) = (self.max_cost, read_reserved_cost(&budget)) {
134                if reserved > limit + f64::EPSILON {
135                    return Ok(Decision {
136                        effect: DecisionEffect::Deny,
137                        reason: Some("budget_cost_reserved_exceeded".into()),
138                        patches: Vec::new(),
139                    });
140                }
141            }
142            return Ok(Decision::allow());
143        }
144
145        // Post phase – evaluate actual usage and warnings.
146        let mut warnings: Vec<String> = Vec::new();
147
148        if let (Some(limit), Some(actual)) =
149            (self.max_tokens, read_actual_tokens(&payload, &budget))
150        {
151            if actual > limit {
152                return Ok(Decision {
153                    effect: DecisionEffect::Deny,
154                    reason: Some("budget_tokens_actual_exceeded".into()),
155                    patches: Vec::new(),
156                });
157            }
158            if let Some(ratio) = self.warn_ratio {
159                if limit > 0 && (actual as f64) / (limit as f64) >= ratio {
160                    warnings.push(warn_reason(
161                        "budget_tokens_watermark",
162                        (actual as f64) / (limit as f64),
163                    ));
164                }
165            }
166        }
167
168        if let (Some(limit), Some(actual)) = (self.max_cost, read_actual_cost(&payload, &budget)) {
169            if actual > limit + f64::EPSILON {
170                return Ok(Decision {
171                    effect: DecisionEffect::Deny,
172                    reason: Some("budget_cost_actual_exceeded".into()),
173                    patches: Vec::new(),
174                });
175            }
176            if let Some(ratio) = self.warn_ratio {
177                if limit > 0.0 && actual / limit >= ratio {
178                    warnings.push(warn_reason("budget_cost_watermark", actual / limit));
179                }
180            }
181        }
182
183        if warnings.is_empty() {
184            Ok(Decision::allow())
185        } else {
186            Ok(Decision {
187                effect: DecisionEffect::Allow,
188                reason: Some(warnings.join(";")),
189                patches: Vec::new(),
190            })
191        }
192    }
193}