1use std::collections::{BTreeSet, HashMap, HashSet};
2
3use anyhow::{Context, Result};
4use chrono::{DateTime, Utc};
5use fleetforge_storage::models;
6use serde::Serialize;
7use serde_json::{json, Value};
8use tracing::warn;
9use uuid::Uuid;
10
11use crate::model::{RunStatus, StepSpec, StepType};
12use fleetforge_trust::digest_json;
13
14pub struct BomBuildOutput {
15 pub bom: Value,
16 pub summary: Value,
17 pub attestation_ids: Vec<Uuid>,
18 pub trust_subjects: Vec<String>,
19}
20
21pub fn build_aibom(
22 run: &models::Run,
23 steps: &[models::Step],
24 policy_pack_id: &str,
25) -> Result<BomBuildOutput> {
26 let status: RunStatus = run.status.into();
27 let metadata_time = run.created_at;
28 let dag_digest = digest_json(&run.dag_json);
29 let input_digest = digest_json(&extract_inputs(&run.input_ctx));
30
31 let mut model_components = Vec::new();
32 let mut tool_components = Vec::new();
33 let mut guardrails = BTreeSet::new();
34 let mut attestation_ids: HashSet<Uuid> = HashSet::new();
35 let mut trust_subjects: HashSet<String> = HashSet::new();
36
37 trust_subjects.insert(format!("run:{}", run.run_id));
38
39 for step in steps {
40 trust_subjects.insert(format!("step:{}:{}", run.run_id, step.step_id));
41 let spec: StepSpec = match serde_json::from_value(step.spec_json.clone()) {
42 Ok(spec) => spec,
43 Err(err) => {
44 warn!(
45 run_id = %run.run_id,
46 step_id = %step.step_id,
47 error = %err,
48 "failed to parse step spec for AIBOM; skipping component"
49 );
50 continue;
51 }
52 };
53
54 match spec.r#type {
55 StepType::Llm => {
56 if let Some(component) =
57 build_model_component(step, &spec, policy_pack_id, dag_digest.clone())
58 {
59 model_components.push(component);
60 }
61 }
62 _ => {
63 if let Some(component) =
64 build_tool_component(step, &spec, policy_pack_id, dag_digest.clone())
65 {
66 tool_components.push(component);
67 }
68 }
69 }
70
71 collect_guardrails(&spec.policy, &mut guardrails);
72 if let Some(output) = &step.output_json {
73 collect_attestation_ids(output, &mut attestation_ids);
74 collect_trust_subjects(output, &mut trust_subjects);
75 }
76 if let Some(error) = &step.error_json {
77 collect_attestation_ids(error, &mut attestation_ids);
78 collect_trust_subjects(error, &mut trust_subjects);
79 }
80 }
81
82 let (pack_name, pack_version) = split_pack_version(policy_pack_id);
83
84 let mut guardrail_vec: Vec<String> = guardrails.into_iter().collect();
85 guardrail_vec.sort();
86
87 let mut attestation_vec: Vec<Uuid> = attestation_ids.into_iter().collect();
88 attestation_vec.sort();
89
90 let mut trust_subject_vec: Vec<String> = trust_subjects.into_iter().collect();
91 trust_subject_vec.sort();
92
93 let bom = MlBom {
94 bom_format: "CycloneDX",
95 spec_version: "1.5",
96 serial_number: format!("urn:uuid:{}", run.run_id),
97 version: 1,
98 metadata: MlBomMetadata {
99 run_id: run.run_id.to_string(),
100 status: status.as_str().to_string(),
101 generated_at: metadata_time,
102 policy_pack: pack_name.to_string(),
103 policy_pack_version: pack_version.map(|v| v.to_string()),
104 dag_digest,
105 input_digest,
106 },
107 components: MlBomComponents {
108 models: model_components,
109 tools: tool_components,
110 policies: vec![MlBomPolicy {
111 pack: pack_name.to_string(),
112 version: pack_version.map(|v| v.to_string()),
113 guardrails: guardrail_vec.clone(),
114 }],
115 },
116 trust: MlBomTrust {
117 subjects: trust_subject_vec.clone(),
118 attestation_ids: attestation_vec.iter().map(|id| id.to_string()).collect(),
119 },
120 };
121
122 let summary = json!({
123 "components": {
124 "models": bom.components.models.len(),
125 "tools": bom.components.tools.len(),
126 "guardrails": guardrail_vec.len(),
127 },
128 "policy_pack": {
129 "id": pack_name,
130 "version": pack_version,
131 },
132 "attestation_ids": attestation_vec.iter().map(|id| id.to_string()).collect::<Vec<_>>(),
133 });
134
135 Ok(BomBuildOutput {
136 bom: serde_json::to_value(bom)?,
137 summary,
138 attestation_ids: attestation_vec,
139 trust_subjects: trust_subject_vec,
140 })
141}
142
143#[derive(Serialize)]
144struct MlBom {
145 bom_format: &'static str,
146 spec_version: &'static str,
147 serial_number: String,
148 version: u32,
149 metadata: MlBomMetadata,
150 components: MlBomComponents,
151 trust: MlBomTrust,
152}
153
154#[derive(Serialize)]
155struct MlBomMetadata {
156 run_id: String,
157 status: String,
158 generated_at: DateTime<Utc>,
159 policy_pack: String,
160 #[serde(skip_serializing_if = "Option::is_none")]
161 policy_pack_version: Option<String>,
162 dag_digest: String,
163 input_digest: String,
164}
165
166#[derive(Serialize)]
167struct MlBomComponents {
168 models: Vec<MlBomComponent>,
169 tools: Vec<MlBomComponent>,
170 policies: Vec<MlBomPolicy>,
171}
172
173#[derive(Serialize)]
174struct MlBomComponent {
175 id: String,
176 #[serde(rename = "type")]
177 component_type: String,
178 name: String,
179 #[serde(skip_serializing_if = "Option::is_none")]
180 version: Option<String>,
181 #[serde(skip_serializing_if = "Option::is_none")]
182 provider: Option<String>,
183 digests: Vec<MlBomHash>,
184 properties: HashMap<String, Value>,
185}
186
187#[derive(Serialize)]
188struct MlBomPolicy {
189 pack: String,
190 #[serde(skip_serializing_if = "Option::is_none")]
191 version: Option<String>,
192 guardrails: Vec<String>,
193}
194
195#[derive(Serialize)]
196struct MlBomHash {
197 alg: &'static str,
198 content: String,
199}
200
201#[derive(Serialize)]
202struct MlBomTrust {
203 subjects: Vec<String>,
204 attestation_ids: Vec<String>,
205}
206
207fn build_model_component(
208 step: &models::Step,
209 spec: &StepSpec,
210 policy_pack_id: &str,
211 dag_digest: String,
212) -> Option<MlBomComponent> {
213 let name = spec
214 .inputs
215 .get("model")
216 .and_then(Value::as_str)
217 .map(|s| s.to_string())
218 .or_else(|| spec.slug.clone())
219 .unwrap_or_else(|| format!("llm-{}", step.step_id));
220 let provider = step.provider.clone().or_else(|| {
221 spec.inputs
222 .get("provider")
223 .and_then(Value::as_str)
224 .map(|s| s.to_string())
225 });
226 let version = step.provider_version.clone().or_else(|| {
227 spec.inputs
228 .get("model_version")
229 .and_then(Value::as_str)
230 .map(|s| s.to_string())
231 });
232
233 let mut properties = HashMap::new();
234 properties.insert(
235 "step_id".to_string(),
236 Value::String(step.step_id.to_string()),
237 );
238 properties.insert(
239 "policy_pack".to_string(),
240 Value::String(policy_pack_id.to_string()),
241 );
242 if let Some(slug) = spec.slug.clone() {
243 properties.insert("slug".to_string(), Value::String(slug));
244 }
245 if let Some(temp) = spec.inputs.get("temperature") {
246 properties.insert("temperature".to_string(), temp.clone());
247 }
248 if let Some(top_p) = spec.inputs.get("top_p") {
249 properties.insert("top_p".to_string(), top_p.clone());
250 }
251 properties.insert("dag_digest".to_string(), Value::String(dag_digest));
252
253 Some(MlBomComponent {
254 id: step.step_id.to_string(),
255 component_type: "model".to_string(),
256 name,
257 version,
258 provider,
259 digests: vec![MlBomHash {
260 alg: "SHA-256",
261 content: digest_json(&spec.inputs),
262 }],
263 properties,
264 })
265}
266
267fn build_tool_component(
268 step: &models::Step,
269 spec: &StepSpec,
270 policy_pack_id: &str,
271 dag_digest: String,
272) -> Option<MlBomComponent> {
273 let tool_name = spec
274 .inputs
275 .get("tool")
276 .or_else(|| spec.inputs.get("tool_name"))
277 .and_then(Value::as_str)
278 .map(|s| s.to_string())
279 .or_else(|| spec.slug.clone())
280 .unwrap_or_else(|| format!("step-{}", step.step_id));
281
282 let mut properties = HashMap::new();
283 properties.insert(
284 "step_id".to_string(),
285 Value::String(step.step_id.to_string()),
286 );
287 properties.insert(
288 "policy_pack".to_string(),
289 Value::String(policy_pack_id.to_string()),
290 );
291 properties.insert("dag_digest".to_string(), Value::String(dag_digest));
292 if let Some(slug) = spec.slug.clone() {
293 properties.insert("slug".to_string(), Value::String(slug));
294 }
295 if let Some(tool_type) = spec.inputs.get("type").and_then(Value::as_str) {
296 properties.insert(
297 "tool_type".to_string(),
298 Value::String(tool_type.to_string()),
299 );
300 }
301 if let Some(command) = spec.inputs.get("command") {
302 properties.insert("command".to_string(), command.clone());
303 }
304
305 Some(MlBomComponent {
306 id: step.step_id.to_string(),
307 component_type: "tool".to_string(),
308 name: tool_name,
309 version: step.provider_version.clone(),
310 provider: step.provider.clone(),
311 digests: vec![MlBomHash {
312 alg: "SHA-256",
313 content: digest_json(&spec.inputs),
314 }],
315 properties,
316 })
317}
318
319fn collect_guardrails(policy: &Value, guardrails: &mut BTreeSet<String>) {
320 if let Some(array) = policy.get("guardrails").and_then(Value::as_array) {
321 for value in array {
322 if let Some(name) = value.as_str() {
323 guardrails.insert(name.to_string());
324 }
325 }
326 }
327}
328
329fn collect_attestation_ids(value: &Value, set: &mut HashSet<Uuid>) {
330 match value {
331 Value::Object(map) => {
332 if let Some(array) = map.get("attestation_ids").and_then(Value::as_array) {
333 for entry in array {
334 if let Some(id_str) = entry.as_str() {
335 if let Ok(id) = Uuid::parse_str(id_str) {
336 set.insert(id);
337 }
338 }
339 }
340 }
341 for child in map.values() {
342 collect_attestation_ids(child, set);
343 }
344 }
345 Value::Array(items) => {
346 for item in items {
347 collect_attestation_ids(item, set);
348 }
349 }
350 _ => {}
351 }
352}
353
354fn collect_trust_subjects(value: &Value, set: &mut HashSet<String>) {
355 match value {
356 Value::Object(map) => {
357 if let Some(subject) = map.get("subject_id").and_then(Value::as_str) {
358 set.insert(subject.to_string());
359 }
360 if let Some(trust) = map.get("trust") {
361 match trust {
362 Value::String(subject) => {
363 set.insert(subject.to_string());
364 }
365 Value::Object(obj) => {
366 if let Some(subject_value) = obj.get("subject") {
367 match subject_value {
368 Value::String(subject) => {
369 set.insert(subject.to_string());
370 }
371 other => {
372 if let Ok(serialised) = serde_json::to_string(other) {
373 set.insert(serialised);
374 }
375 }
376 }
377 }
378 }
379 _ => {}
380 }
381 }
382 for child in map.values() {
383 collect_trust_subjects(child, set);
384 }
385 }
386 Value::Array(items) => {
387 for item in items {
388 collect_trust_subjects(item, set);
389 }
390 }
391 _ => {}
392 }
393}
394
395fn extract_inputs(ctx: &Value) -> Value {
396 ctx.as_object()
397 .and_then(|map| map.get("inputs"))
398 .cloned()
399 .unwrap_or_else(|| Value::Null)
400}
401
402fn split_pack_version(id: &str) -> (&str, Option<&str>) {
403 id.split_once('@')
404 .map(|(n, v)| (n, Some(v)))
405 .unwrap_or((id, None))
406}