Skip to content

Commit 8fe66d4

Browse files
docs(lunar-lake): harden SLM258V-005 dense SLM receipt identity
Harden dense Qwen SLM answer and phase receipts with explicit top-level backend, runtime, fallback, model, tokenizer, prompt-template, and selected runtime identity. Preserve child/case backend fields and the no quality, speed, Arc/NPU, acceleration, or BitNet QK256/I2_S claim boundary.
1 parent 22391a5 commit 8fe66d4

14 files changed

Lines changed: 223 additions & 14 deletions

File tree

ci/hardware/intel-258v/2026-05-08/slm-answer-corpus-qwen25-cpu-clean-provenance.json

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
{
22
"artifact_kind": "slm_cpu_answer_corpus",
3+
"backend_lane": "dense_slm_cpu",
34
"backend": {
45
"fallback_used": false,
56
"requested_backend": "cpu",
67
"runtime_api": "cpu",
7-
"selected_backend": "cpu"
8+
"selected_backend": "cpu-rust"
89
},
910
"cases": [
1011
{
@@ -633,6 +634,7 @@
633634
]
634635
},
635636
"execution_plan": null,
637+
"fallback_used": false,
636638
"generation": {
637639
"default_max_new_tokens": 16,
638640
"deterministic": true,
@@ -657,17 +659,26 @@
657659
"tokenizer": "gguf_metadata",
658660
"tokenizer_path": null
659661
},
660-
"prompt_template": {
662+
"model_architecture": "qwen2",
663+
"model_family": "qwen",
664+
"prompt_template": "qwen2.5",
665+
"prompt_template_policy": {
661666
"family": "qwen2.5"
662667
},
668+
"quantization": "Q8_0",
663669
"quality_summary": {
664670
"failed": 0,
665671
"not_run": 0,
666672
"passed": 3,
667673
"timeout": 0,
668674
"total": 3
669675
},
676+
"requested_backend": "cpu",
670677
"schema_version": "1.0.0",
678+
"selected_backend": "cpu-rust",
679+
"selected_kernel_or_runtime": "dense-qwen-cpu-reference",
671680
"speedup_claim": false,
681+
"runtime_api": "cpu",
682+
"tokenizer_source": "gguf_metadata",
672683
"timestamp": "2026-05-12T21:48:18.738703300+00:00"
673-
}
684+
}

ci/hardware/intel-258v/2026-05-08/slm-phase-warm-session-qwen25-cpu-profiles/decode_128.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"arc140v_claim": false,
33
"artifact_kind": "dense_slm_cpu_phase_profile",
44
"artifact_path": "ci\\hardware\\intel-258v\\2026-05-08\\slm-phase-warm-session-qwen25-cpu-profiles\\decode_128.json",
5+
"backend_lane": "dense_slm_cpu",
56
"bitnet_qk256_i2s_claim": false,
67
"counts": {
78
"n_kv": 26,
@@ -92,6 +93,8 @@
9293
"tokenizer": "gguf_metadata",
9394
"vocab_size": 151936
9495
},
96+
"model_architecture": "qwen2",
97+
"model_family": "qwen",
9598
"profile": {
9699
"backend": {
97100
"fallback_reason": null,
@@ -197,6 +200,8 @@
197200
},
198201
"profile_id": "decode_128",
199202
"prompt": "Answer with a deterministic continuation: one two three",
203+
"prompt_template": "qwen2.5",
204+
"quantization": "Q8_0",
200205
"prompt_render": {
201206
"add_bos": false,
202207
"parse_special": true,
@@ -208,6 +213,7 @@
208213
"runtime_api": "cpu",
209214
"schema_version": "1.0.0",
210215
"selected_backend": "cpu-rust",
216+
"selected_kernel_or_runtime": "dense-qwen-cpu-reference",
211217
"session_artifact_path": "ci\\hardware\\intel-258v\\2026-05-08\\slm-phase-warm-session-qwen25-cpu.json",
212218
"speedup_claim": false,
213219
"strict_provenance": {
@@ -321,6 +327,7 @@
321327
"strict": true,
322328
"type": "gguf_metadata"
323329
},
330+
"tokenizer_source": "gguf_metadata",
324331
"tokens": {
325332
"generated": 128,
326333
"generated_ids": [
@@ -625,4 +632,4 @@
625632
],
626633
"total": 165
627634
}
628-
}
635+
}

ci/hardware/intel-258v/2026-05-08/slm-phase-warm-session-qwen25-cpu-profiles/prefill_512.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"arc140v_claim": false,
33
"artifact_kind": "dense_slm_cpu_phase_profile",
44
"artifact_path": "ci\\hardware\\intel-258v\\2026-05-08\\slm-phase-warm-session-qwen25-cpu-profiles\\prefill_512.json",
5+
"backend_lane": "dense_slm_cpu",
56
"bitnet_qk256_i2s_claim": false,
67
"counts": {
78
"n_kv": 26,
@@ -92,6 +93,8 @@
9293
"tokenizer": "gguf_metadata",
9394
"vocab_size": 151936
9495
},
96+
"model_architecture": "qwen2",
97+
"model_family": "qwen",
9598
"profile": {
9699
"backend": {
97100
"fallback_reason": null,
@@ -197,6 +200,8 @@
197200
},
198201
"profile_id": "prefill_512",
199202
"prompt": "benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token benchmark token ",
203+
"prompt_template": "qwen2.5",
204+
"quantization": "Q8_0",
200205
"prompt_render": {
201206
"add_bos": false,
202207
"parse_special": true,
@@ -208,6 +213,7 @@
208213
"runtime_api": "cpu",
209214
"schema_version": "1.0.0",
210215
"selected_backend": "cpu-rust",
216+
"selected_kernel_or_runtime": "dense-qwen-cpu-reference",
211217
"session_artifact_path": "ci\\hardware\\intel-258v\\2026-05-08\\slm-phase-warm-session-qwen25-cpu.json",
212218
"speedup_claim": false,
213219
"strict_provenance": {
@@ -321,6 +327,7 @@
321327
"strict": true,
322328
"type": "gguf_metadata"
323329
},
330+
"tokenizer_source": "gguf_metadata",
324331
"tokens": {
325332
"generated": 1,
326333
"generated_ids": [
@@ -875,4 +882,4 @@
875882
],
876883
"total": 542
877884
}
878-
}
885+
}

ci/hardware/intel-258v/2026-05-08/slm-phase-warm-session-qwen25-cpu.json

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"artifact_kind": "dense_slm_cpu_phase_warm_session",
33
"artifact_path": "ci\\hardware\\intel-258v\\2026-05-08\\slm-phase-warm-session-qwen25-cpu.json",
4+
"backend_lane": "dense_slm_cpu",
45
"claim_boundary": {
56
"arc140v_claim": false,
67
"bitnet_answer_quality_claim": false,
@@ -62,6 +63,10 @@
6263
"tokenizer": "gguf_metadata",
6364
"vocab_size": 151936
6465
},
66+
"model_architecture": "qwen2",
67+
"model_family": "qwen",
68+
"prompt_template": "qwen2.5",
69+
"quantization": "Q8_0",
6570
"profiles": [
6671
{
6772
"decode_total_ms": 175.035,
@@ -92,6 +97,7 @@
9297
"runtime_api": "cpu",
9398
"schema_version": "1.0.0",
9499
"selected_backend": "cpu-rust",
100+
"selected_kernel_or_runtime": "dense-qwen-cpu-reference",
95101
"session": {
96102
"model_loaded_once": true,
97103
"per_profile_receipt_dir": "ci\\hardware\\intel-258v\\2026-05-08\\slm-phase-warm-session-qwen25-cpu-profiles",
@@ -115,5 +121,6 @@
115121
"source": "gguf_metadata",
116122
"strict": true,
117123
"type": "gguf_metadata"
118-
}
119-
}
124+
},
125+
"tokenizer_source": "gguf_metadata"
126+
}

crates/bitnet-cli/src/commands/answer_corpus.rs

Lines changed: 91 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,46 @@ impl AnswerCorpusCommand {
174174
row["kernel"]["selected_kernel"] == "dense-qwen-cpu-reference"
175175
&& row["kernel"]["family"] == "dense_qwen"
176176
});
177+
let top_level_selected_backend =
178+
aggregate_case_str(&rows, &["backend", "selected_backend"])
179+
.unwrap_or(device.as_str())
180+
.to_string();
181+
let top_level_runtime_api = aggregate_case_str(&rows, &["backend", "runtime_api"])
182+
.unwrap_or_else(|| answer_corpus_runtime_api(&device))
183+
.to_string();
184+
let top_level_fallback_used =
185+
rows.iter().any(|row| row["backend"]["fallback_used"].as_bool().unwrap_or(true));
186+
let top_level_model_family =
187+
corpus.model.family.as_deref().unwrap_or("unknown").to_string();
188+
let top_level_model_architecture =
189+
corpus.model.architecture.as_deref().unwrap_or("unknown").to_string();
190+
let top_level_quantization =
191+
corpus.model.quant_format.as_deref().unwrap_or("unknown").to_string();
192+
let top_level_tokenizer_source = aggregate_case_str(&rows, &["tokenizer", "source"])
193+
.unwrap_or(aggregate_tokenizer)
194+
.to_string();
195+
let top_level_selected_kernel_or_runtime =
196+
aggregate_case_str(&rows, &["kernel", "selected_kernel"])
197+
.unwrap_or(&top_level_runtime_api)
198+
.to_string();
199+
let top_level_backend_lane =
200+
answer_corpus_backend_lane(&device, slm_answer_path, &top_level_model_family);
177201

178202
let receipt = json!({
179203
"schema_version": "1.0.0",
180204
"artifact_kind": artifact_kind,
181205
"timestamp": chrono::Utc::now().to_rfc3339(),
206+
"requested_backend": device.as_str(),
207+
"selected_backend": top_level_selected_backend,
208+
"runtime_api": top_level_runtime_api,
209+
"fallback_used": top_level_fallback_used,
210+
"backend_lane": top_level_backend_lane,
211+
"model_family": top_level_model_family,
212+
"model_architecture": top_level_model_architecture,
213+
"quantization": top_level_quantization,
214+
"tokenizer_source": top_level_tokenizer_source,
215+
"prompt_template": corpus.defaults.prompt_template.as_str(),
216+
"selected_kernel_or_runtime": top_level_selected_kernel_or_runtime,
182217
"corpus": {
183218
"path": self.corpus.display().to_string(),
184219
"name": corpus.name,
@@ -202,13 +237,13 @@ impl AnswerCorpusCommand {
202237
},
203238
"backend": {
204239
"requested_backend": device.as_str(),
205-
"selected_backend": device.as_str(),
206-
"runtime_api": answer_corpus_runtime_api(&device),
207-
"fallback_used": false,
240+
"selected_backend": top_level_selected_backend,
241+
"runtime_api": top_level_runtime_api,
242+
"fallback_used": top_level_fallback_used,
208243
},
209244
"execution_plan": aggregate_execution_plan,
210-
"prompt_template": {
211-
"family": corpus.defaults.prompt_template,
245+
"prompt_template_policy": {
246+
"family": corpus.defaults.prompt_template.as_str(),
212247
},
213248
"generation": {
214249
"mode": if corpus.defaults.greedy { "greedy" } else { "sampling" },
@@ -612,6 +647,32 @@ fn is_cuda_answer_corpus_device(device: &str) -> bool {
612647
matches!(device, "cuda" | RTX_5070_TI_CUDA)
613648
}
614649

650+
fn answer_corpus_backend_lane(
651+
device: &str,
652+
slm_answer_path: bool,
653+
model_family: &str,
654+
) -> &'static str {
655+
if slm_answer_path && device == "cpu" && model_family == "qwen" {
656+
"dense_slm_cpu"
657+
} else if is_cuda_answer_corpus_device(device) {
658+
"bitnet_cuda"
659+
} else if device == "apple-m4-cpu-neon" {
660+
"apple_m4_cpu_neon"
661+
} else {
662+
"bitnet_cpu"
663+
}
664+
}
665+
666+
fn aggregate_case_str<'a>(rows: &'a [Value], path: &[&str]) -> Option<&'a str> {
667+
rows.iter().find_map(|row| {
668+
let mut cursor = row;
669+
for key in path {
670+
cursor = cursor.get(*key)?;
671+
}
672+
cursor.as_str()
673+
})
674+
}
675+
615676
fn prompt_prefill_receipt(run_receipt: &Value) -> Value {
616677
let prompt_token_count = run_receipt["tokens"]["prompt"].as_u64().unwrap_or_else(|| {
617678
run_receipt["tokens"]["prompt_ids"]
@@ -1242,6 +1303,31 @@ mod tests {
12421303
assert_eq!(effective_default_timeout_seconds(Some(0), Some(300)), 1);
12431304
}
12441305

1306+
#[test]
1307+
fn slm_answer_aggregate_identity_uses_dense_cpu_lane() {
1308+
let rows = vec![json!({
1309+
"backend": {
1310+
"selected_backend": "cpu-rust",
1311+
"runtime_api": "cpu",
1312+
"fallback_used": false,
1313+
},
1314+
"kernel": {
1315+
"selected_kernel": "dense-qwen-cpu-reference",
1316+
},
1317+
"tokenizer": {
1318+
"source": "gguf_metadata",
1319+
},
1320+
})];
1321+
1322+
assert_eq!(aggregate_case_str(&rows, &["backend", "selected_backend"]), Some("cpu-rust"));
1323+
assert_eq!(
1324+
aggregate_case_str(&rows, &["kernel", "selected_kernel"]),
1325+
Some("dense-qwen-cpu-reference")
1326+
);
1327+
assert_eq!(aggregate_case_str(&rows, &["tokenizer", "source"]), Some("gguf_metadata"));
1328+
assert_eq!(answer_corpus_backend_lane("cpu", true, "qwen"), "dense_slm_cpu");
1329+
}
1330+
12451331
#[test]
12461332
fn cpu_answer_receipt_accepts_strict_cpu_truth() {
12471333
let receipt = json!({

crates/bitnet-cli/src/main.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5975,6 +5975,7 @@ async fn run_cpu_phase_warm_session(
59755975
pretokenizer_authority,
59765976
tokenizer.as_ref(),
59775977
dense_slm_model,
5978+
&prompt_template,
59785979
model_quant_format,
59795980
kernel_family,
59805981
kernel_implementation,
@@ -6021,6 +6022,13 @@ async fn run_cpu_phase_warm_session(
60216022
"runtime_api": backend_identity.runtime_api.as_str(),
60226023
"fallback_used": false,
60236024
"fallback_reason": serde_json::Value::Null,
6025+
"backend_lane": if dense_slm_model { "dense_slm_cpu" } else { hardware_lane },
6026+
"model_family": model_family,
6027+
"model_architecture": model_architecture,
6028+
"quantization": model_quant_format,
6029+
"tokenizer_source": tokenizer_source_str,
6030+
"prompt_template": prompt_template.as_str(),
6031+
"selected_kernel_or_runtime": selected_kernel.as_str(),
60246032
"session": {
60256033
"model_loaded_once": true,
60266034
"tokenizer_loaded_once": true,
@@ -6253,6 +6261,7 @@ fn cpu_phase_strict_profile_receipt(
62536261
pretokenizer_authority: &str,
62546262
tokenizer: &(dyn bitnet_tokenizers::Tokenizer + Send + Sync),
62556263
dense_slm_model: bool,
6264+
prompt_template_label: &str,
62566265
model_quant_format: &str,
62576266
kernel_family: &str,
62586267
kernel_implementation: &str,
@@ -6291,6 +6300,13 @@ fn cpu_phase_strict_profile_receipt(
62916300
"runtime_api": backend_identity.runtime_api.as_str(),
62926301
"fallback_used": false,
62936302
"fallback_reason": serde_json::Value::Null,
6303+
"backend_lane": if dense_slm_model { "dense_slm_cpu" } else { "bitnet_cpu" },
6304+
"model_family": model_family,
6305+
"model_architecture": model_architecture,
6306+
"quantization": model_quant_format,
6307+
"tokenizer_source": tokenizer_source_str,
6308+
"prompt_template": prompt_template_label,
6309+
"selected_kernel_or_runtime": selected_kernel,
62946310
"prompt": run.prompt.as_str(),
62956311
"prompt_render": {
62966312
"template_family": run.prompt_template_family.as_str(),

docs/tracking/campaigns/intel-258v-platform/CAMPAIGN.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ Validate Core Ultra 7 258V as the BitNet CPU lead and tri-device platform while
7474
| SLM258V-004 | merged | Record dense Qwen SLM phase timing receipts on the 258V CPU path, keeping dense SLM phase evidence separate from BitNet QK256/I2_S receipts; merged in #4542. |
7575
| LNL258V-COMPARE-004 | merged | Refresh the same-machine comparison index after the corrected BitNet CPU bundle and dense Qwen SLM CPU answer/phase receipts, preserving independent BitNet CPU, dense SLM CPU, Arc 140V, and NPU claim boundaries; merged in #4545. |
7676
| CPU258V-032 | merged | Harden the 258V post-fix scalar-vs-AVX2 answer-parity receipt so top-level backend/runtime/fallback/kernel identity is explicit; merged in #4550. |
77+
| SLM258V-005 | pr_open | Harden dense Qwen SLM answer and phase receipts so top-level backend/runtime/fallback/model identity is explicit before OpenVINO CPU/GPU/NPU acceleration work; open in #4552. |
7778

7879
## Review Policy
7980

0 commit comments

Comments
 (0)