From d127e774db191ef19915eda47ed3c3836db1a002 Mon Sep 17 00:00:00 2001 From: tiennv Date: Sat, 27 Sep 2025 10:43:46 +0000 Subject: [PATCH] feat: Replace `once_cell` with `std::sync::OnceLock`, and improve error handling in the backend initialization. --- reference_apps/llamacpp_bindings/Cargo.lock | 3 +-- reference_apps/llamacpp_bindings/Cargo.toml | 1 - reference_apps/llamacpp_bindings/src/lib.rs | 18 +++++++++++------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/reference_apps/llamacpp_bindings/Cargo.lock b/reference_apps/llamacpp_bindings/Cargo.lock index 36295d3..c53f348 100644 --- a/reference_apps/llamacpp_bindings/Cargo.lock +++ b/reference_apps/llamacpp_bindings/Cargo.lock @@ -593,14 +593,13 @@ dependencies = [ [[package]] name = "llamacpp_bindings" -version = "0.6.2" +version = "0.8.2" dependencies = [ "anyhow", "bindgen", "encoding_rs", "hf-hub", "llama-cpp-2", - "once_cell", ] [[package]] diff --git a/reference_apps/llamacpp_bindings/Cargo.toml b/reference_apps/llamacpp_bindings/Cargo.toml index 79bbd88..6b41769 100644 --- a/reference_apps/llamacpp_bindings/Cargo.toml +++ b/reference_apps/llamacpp_bindings/Cargo.toml @@ -8,7 +8,6 @@ llama-cpp-2 = { git = "https://github.com/utilityai/llama-cpp-rs", rev = "77af62 hf-hub = { version = "0.3.2" } anyhow = "1.0.91" encoding_rs = "0.8.34" -once_cell = "1.20.2" [features] cuda = ["llama-cpp-2/cuda"] diff --git a/reference_apps/llamacpp_bindings/src/lib.rs b/reference_apps/llamacpp_bindings/src/lib.rs index 38788e8..54c755c 100644 --- a/reference_apps/llamacpp_bindings/src/lib.rs +++ b/reference_apps/llamacpp_bindings/src/lib.rs @@ -1,4 +1,4 @@ -use anyhow::{ Context, Result}; +use anyhow::{anyhow, Context, Result}; use llama_cpp_2::llama_backend::LlamaBackend; use llama_cpp_2::model::{LlamaModel, params::LlamaModelParams, Special, AddBos}; use llama_cpp_2::token::data_array::LlamaTokenDataArray; @@ -7,12 +7,11 @@ use llama_cpp_2::llama_batch::LlamaBatch; use std::num::NonZeroU32; use std::path::PathBuf; use encoding_rs::UTF_8; -use hf_hub::api::sync::ApiBuilder; use llama_cpp_2::ggml_time_us; use std::time::Duration; -use once_cell::sync::OnceCell; +use std::sync::OnceLock; -static BACKEND: OnceCell = OnceCell::new(); +static BACKEND: OnceLock> = OnceLock::new(); // Enum for selecting model type #[derive(Debug, Clone)] @@ -102,10 +101,11 @@ impl LLM { /// Loads the model based on the provided ModelType pub fn load(model_type: ModelType, load_params: LoadParams) -> Result { // Initialize backend only once - let backend = BACKEND.get_or_try_init(LlamaBackend::init)?; + let backend = BACKEND.get_or_init(|| LlamaBackend::init().map_err(anyhow::Error::from)); + let backend = backend.as_ref().map_err(|e| anyhow!(e.to_string()))?; let model_path = match model_type { - ModelType::Local { path } => path + ModelType::Local { path } => path, }; let model_params = LlamaModelParams::from(load_params); @@ -122,7 +122,11 @@ impl LLM { where F: FnMut(&str), { - let backend = BACKEND.get().expect("Backend not initialized"); + let backend = BACKEND + .get() + .expect("Backend not initialized") + .as_ref() + .map_err(|e| anyhow!(e.to_string()))?; let t_main_start = ggml_time_us(); let max_tokens = inference_params.max_tokens; let ctx_params = LlamaContextParams::from(inference_params);