fleetforge_policy/
budget.rs1use anyhow::{anyhow, Result};
2use async_trait::async_trait;
3use serde_json::Value;
4
5use crate::{Decision, DecisionEffect, PolicyEngine, PolicyRequest};
6
7#[derive(Clone, Default)]
8pub struct BudgetCapsPolicy {
9 max_tokens: Option<i64>,
10 max_cost: Option<f64>,
11 warn_ratio: Option<f64>,
12}
13
14impl BudgetCapsPolicy {
15 pub fn from_config(config: &Value) -> Result<Self> {
16 let max_tokens = config
17 .get("max_tokens")
18 .and_then(Value::as_i64)
19 .map(|value| value.max(0));
20
21 let max_cost = config
22 .get("max_cost")
23 .and_then(Value::as_f64)
24 .map(|value| value.max(0.0));
25
26 let warn_ratio = config
27 .get("warn_ratio")
28 .and_then(Value::as_f64)
29 .map(|value| {
30 if value.is_finite() {
31 value.clamp(0.0, 1.0)
32 } else {
33 0.0
34 }
35 });
36
37 if max_tokens.is_none() && max_cost.is_none() {
38 return Err(anyhow!(
39 "budget_caps pack requires at least one of 'max_tokens' or 'max_cost'"
40 ));
41 }
42
43 Ok(Self {
44 max_tokens,
45 max_cost,
46 warn_ratio,
47 })
48 }
49}
50
51fn read_reserved_tokens(budget: &Value) -> Option<i64> {
52 budget
53 .get("reserved")
54 .and_then(|reserved| reserved.get("tokens"))
55 .and_then(Value::as_i64)
56}
57
58fn read_reserved_cost(budget: &Value) -> Option<f64> {
59 budget
60 .get("reserved")
61 .and_then(|reserved| reserved.get("cost"))
62 .and_then(Value::as_f64)
63}
64
65fn read_actual_tokens(payload: &Value, budget: &Value) -> Option<i64> {
66 payload
67 .get("usage")
68 .and_then(|usage| usage.get("tokens"))
69 .and_then(Value::as_i64)
70 .or_else(|| {
71 budget
72 .get("actual")
73 .and_then(|actual| actual.get("tokens"))
74 .and_then(Value::as_i64)
75 })
76}
77
78fn read_actual_cost(payload: &Value, budget: &Value) -> Option<f64> {
79 payload
80 .get("usage")
81 .and_then(|usage| usage.get("cost"))
82 .and_then(Value::as_f64)
83 .or_else(|| {
84 budget
85 .get("actual")
86 .and_then(|actual| actual.get("cost"))
87 .and_then(Value::as_f64)
88 })
89}
90
91fn warn_reason(prefix: &str, value: f64) -> String {
92 format!("{prefix}:{:.2}", value)
93}
94
95#[async_trait]
96impl PolicyEngine for BudgetCapsPolicy {
97 async fn evaluate(&self, request: &PolicyRequest) -> Result<Decision> {
98 let payload = request
99 .context()
100 .get("payload")
101 .cloned()
102 .unwrap_or(Value::Null);
103 let boundary = request
104 .context()
105 .get("boundary")
106 .and_then(Value::as_str)
107 .unwrap_or_default()
108 .to_ascii_lowercase();
109
110 let budget = payload.get("budget").cloned().unwrap_or(Value::Null);
111
112 let is_pre_phase = matches!(
113 boundary.as_str(),
114 "ingress_prompt" | "ingress_tool" | "scheduler"
115 );
116 let is_post_phase = matches!(boundary.as_str(), "egress_prompt" | "egress_tool");
117
118 if !is_pre_phase && !is_post_phase {
119 return Ok(Decision::allow());
120 }
121
122 if is_pre_phase {
123 if let (Some(limit), Some(reserved)) = (self.max_tokens, read_reserved_tokens(&budget))
124 {
125 if reserved > limit {
126 return Ok(Decision {
127 effect: DecisionEffect::Deny,
128 reason: Some("budget_tokens_reserved_exceeded".into()),
129 patches: Vec::new(),
130 });
131 }
132 }
133 if let (Some(limit), Some(reserved)) = (self.max_cost, read_reserved_cost(&budget)) {
134 if reserved > limit + f64::EPSILON {
135 return Ok(Decision {
136 effect: DecisionEffect::Deny,
137 reason: Some("budget_cost_reserved_exceeded".into()),
138 patches: Vec::new(),
139 });
140 }
141 }
142 return Ok(Decision::allow());
143 }
144
145 let mut warnings: Vec<String> = Vec::new();
147
148 if let (Some(limit), Some(actual)) =
149 (self.max_tokens, read_actual_tokens(&payload, &budget))
150 {
151 if actual > limit {
152 return Ok(Decision {
153 effect: DecisionEffect::Deny,
154 reason: Some("budget_tokens_actual_exceeded".into()),
155 patches: Vec::new(),
156 });
157 }
158 if let Some(ratio) = self.warn_ratio {
159 if limit > 0 && (actual as f64) / (limit as f64) >= ratio {
160 warnings.push(warn_reason(
161 "budget_tokens_watermark",
162 (actual as f64) / (limit as f64),
163 ));
164 }
165 }
166 }
167
168 if let (Some(limit), Some(actual)) = (self.max_cost, read_actual_cost(&payload, &budget)) {
169 if actual > limit + f64::EPSILON {
170 return Ok(Decision {
171 effect: DecisionEffect::Deny,
172 reason: Some("budget_cost_actual_exceeded".into()),
173 patches: Vec::new(),
174 });
175 }
176 if let Some(ratio) = self.warn_ratio {
177 if limit > 0.0 && actual / limit >= ratio {
178 warnings.push(warn_reason("budget_cost_watermark", actual / limit));
179 }
180 }
181 }
182
183 if warnings.is_empty() {
184 Ok(Decision::allow())
185 } else {
186 Ok(Decision {
187 effect: DecisionEffect::Allow,
188 reason: Some(warnings.join(";")),
189 patches: Vec::new(),
190 })
191 }
192 }
193}