Merge pull request #1267 from dcSpark/fix/capabilities

guillevalin · web-flow · commit 023f205fbd4b · 2025-08-08T19:56:23.000-04:00
Update GPT-5 capabilities / OpenRouter.
diff --git a/shinkai-bin/shinkai-node/src/llm_provider/providers/openrouter.rs b/shinkai-bin/shinkai-node/src/llm_provider/providers/openrouter.rs
@@ -90,9 +90,19 @@ impl LLMService for OpenRouter {
                     "max_tokens": result.remaining_output_tokens,
                 });
 
-                // Conditionally add functions to the payload if tools_json is not empty
+                // Conditionally add tools to the payload if tools_json is not empty
                 if !tools_json.is_empty() {
-                    payload["functions"] = serde_json::Value::Array(tools_json.clone());
+                    // Remove tool_router_key from each tool before sending to OpenRouter
+                    let tools_payload = tools_json
+                        .clone()
+                        .into_iter()
+                        .map(|mut tool| {
+                            tool.as_object_mut().unwrap().remove("tool_router_key");
+                            tool
+                        })
+                        .collect::<Vec<JsonValue>>();
+                    
+                    payload["tools"] = serde_json::Value::Array(tools_payload);
                 }
 
                 // Add options to payload
@@ -252,38 +262,44 @@ async fn handle_streaming_response(
                                     if let Some(content) = message.get("content") {
                                         response_text.push_str(content.as_str().unwrap_or(""));
                                     }
-                                    if let Some(fc) = message.get("function_call") {
-                                        if let Some(name) = fc.get("name") {
-                                            let fc_arguments = fc
-                                                .get("arguments")
-                                                .and_then(|args| args.as_str())
-                                                .and_then(|args_str| serde_json::from_str(args_str).ok())
-                                                .and_then(|args_value: serde_json::Value| {
-                                                    args_value.as_object().cloned()
-                                                })
-                                                .unwrap_or_else(|| serde_json::Map::new());
-
-                                            // Extract tool_router_key
-                                            let tool_router_key = tools.as_ref().and_then(|tools_array| {
-                                                tools_array.iter().find_map(|tool| {
-                                                    if tool.get("name")?.as_str()? == name.as_str().unwrap_or("") {
-                                                        tool.get("tool_router_key")
-                                                            .and_then(|key| key.as_str().map(|s| s.to_string()))
-                                                    } else {
-                                                        None
+                                    if let Some(tool_calls) = message.get("tool_calls") {
+                                        if let Some(tool_calls_array) = tool_calls.as_array() {
+                                            for tool_call in tool_calls_array {
+                                                if let Some(function) = tool_call.get("function") {
+                                                    if let Some(name) = function.get("name") {
+                                                        let fc_arguments = function
+                                                            .get("arguments")
+                                                            .and_then(|args| args.as_str())
+                                                            .and_then(|args_str| serde_json::from_str(args_str).ok())
+                                                            .and_then(|args_value: serde_json::Value| {
+                                                                args_value.as_object().cloned()
+                                                            })
+                                                            .unwrap_or_else(|| serde_json::Map::new());
+
+                                                        // Extract tool_router_key
+                                                        let tool_router_key = tools.as_ref().and_then(|tools_array| {
+                                                            tools_array.iter().find_map(|tool| {
+                                                                if tool.get("name")?.as_str()? == name.as_str().unwrap_or("") {
+                                                                    tool.get("tool_router_key")
+                                                                        .and_then(|key| key.as_str().map(|s| s.to_string()))
+                                                                } else {
+                                                                    None
+                                                                }
+                                                            })
+                                                        });
+
+                                                        function_calls.push(FunctionCall {
+                                                            name: name.as_str().unwrap_or("").to_string(),
+                                                            arguments: fc_arguments.clone(),
+                                                            tool_router_key,
+                                                            response: None,
+                                                            index: function_calls.len() as u64,
+                                                            id: tool_call.get("id").and_then(|id| id.as_str()).map(|s| s.to_string()),
+                                                            call_type: tool_call.get("type").and_then(|t| t.as_str()).map(|s| s.to_string()).or(Some("function".to_string())),
+                                                        });
                                                     }
-                                                })
-                                            });
-
-                                            function_calls.push(FunctionCall {
-                                                name: name.as_str().unwrap_or("").to_string(),
-                                                arguments: fc_arguments.clone(),
-                                                tool_router_key,
-                                                response: None,
-                                                index: function_calls.len() as u64,
-                                                id: None,
-                                                call_type: Some("function".to_string()),
-                                            });
+                                                }
+                                            }
                                         }
                                     }
                                 }
@@ -462,17 +478,17 @@ async fn handle_non_streaming_response(
                             .collect::<Vec<String>>()
                             .join(" ");
 
-                        let function_call: Option<FunctionCall> = data.choices.iter().find_map(|choice| {
-                            choice.message.function_call.clone().map(|fc| {
-                                let arguments = serde_json::from_str::<serde_json::Value>(&fc.arguments)
+                        let function_calls: Vec<FunctionCall> = data.choices.iter().flat_map(|choice| {
+                            choice.message.tool_calls.as_ref().unwrap_or(&vec![]).iter().map(|tool_call| {
+                                let arguments = serde_json::from_str::<serde_json::Value>(&tool_call.function.arguments)
                                     .ok()
                                     .and_then(|args_value: serde_json::Value| args_value.as_object().cloned())
                                     .unwrap_or_else(|| serde_json::Map::new());
 
                                 // Extract tool_router_key
                                 let tool_router_key = tools.as_ref().and_then(|tools_array| {
                                     tools_array.iter().find_map(|tool| {
-                                        if tool.get("name")?.as_str()? == fc.name {
+                                        if tool.get("name")?.as_str()? == tool_call.function.name {
                                             tool.get("tool_router_key").and_then(|key| key.as_str().map(|s| s.to_string()))
                                         } else {
                                             None
@@ -481,22 +497,22 @@ async fn handle_non_streaming_response(
                                 });
 
                                 FunctionCall {
-                                    name: fc.name,
+                                    name: tool_call.function.name.clone(),
                                     arguments,
                                     tool_router_key,
                                     response: None,
                                     index: 0,
-                                    id: None,
-                                    call_type: Some("function".to_string()),
+                                    id: Some(tool_call.id.clone()),
+                                    call_type: Some(tool_call.call_type.clone()),
                                 }
-                            })
-                        });
-                        eprintln!("Function Call: {:?}", function_call);
+                            }).collect::<Vec<_>>()
+                        }).collect();
+                        eprintln!("Function Calls: {:?}", function_calls);
                         eprintln!("Response String: {:?}", response_string);
                         return Ok(LLMInferenceResponse::new(
                             response_string,
                             json!({}),
-                            function_call.map_or_else(Vec::new, |fc| vec![fc]),
+                            function_calls,
                             None,
                         ));
                     }
diff --git a/shinkai-bin/shinkai-node/src/managers/model_capabilities_manager.rs b/shinkai-bin/shinkai-node/src/managers/model_capabilities_manager.rs
@@ -134,6 +134,9 @@ impl ModelCapabilitiesManager {
         match model {
             LLMProviderInterface::OpenAI(openai) => match openai.model_type.as_str() {
                 "gpt-5" => vec![ModelCapability::ImageAnalysis, ModelCapability::TextInference],
+                "gpt-5-mini" => vec![ModelCapability::ImageAnalysis, ModelCapability::TextInference],
+                "gpt-5-nano" => vec![ModelCapability::ImageAnalysis, ModelCapability::TextInference],
+                "gpt-5-chat-latest" => vec![ModelCapability::ImageAnalysis, ModelCapability::TextInference],
                 "gpt-4o" => vec![ModelCapability::ImageAnalysis, ModelCapability::TextInference],
                 "gpt-4o-mini" => vec![ModelCapability::ImageAnalysis, ModelCapability::TextInference],
                 "gpt-4.1-nano" => vec![ModelCapability::ImageAnalysis, ModelCapability::TextInference],
@@ -387,7 +390,10 @@ impl ModelCapabilitiesManager {
     pub fn get_llm_provider_cost(model: &LLMProviderInterface) -> ModelCost {
         match model {
             LLMProviderInterface::OpenAI(openai) => match openai.model_type.as_str() {
-                "gpt-5" => ModelCost::Expensive,
+                "gpt-5" => ModelCost::GoodValue,
+                "gpt-5-mini" => ModelCost::Cheap,
+                "gpt-5-nano" => ModelCost::VeryCheap,
+                "gpt-5-chat-latest" => ModelCost::GoodValue,
                 "gpt-4o" => ModelCost::GoodValue,
                 "gpt-3.5-turbo-1106" => ModelCost::VeryCheap,
                 "gpt-4o-mini" => ModelCost::VeryCheap,
@@ -617,7 +623,7 @@ impl ModelCapabilitiesManager {
                 } else if openai.model_type.starts_with("gpt-5") {
                     400_000
                 } else if openai.model_type.starts_with("gpt-3.5") {
-                    16384
+                    16_384
                 } else {
                     200_000 // New default for OpenAI models
                 }
@@ -791,6 +797,12 @@ impl ModelCapabilitiesManager {
                     65_536
                 } else if openai.model_type.starts_with("o3") || openai.model_type.starts_with("o4-mini") {
                     100_000
+                } else if openai.model_type.starts_with("gpt-5-chat-latest") {
+                    16_384 
+                } else if openai.model_type.starts_with("gpt-5-mini") {
+                    128_000
+                } else if openai.model_type.starts_with("gpt-5-nano") {
+                    128_000                                       
                 } else if openai.model_type.starts_with("gpt-5") {
                     128_000
                 } else if openai.model_type.starts_with("gpt-3.5") {
@@ -982,8 +994,14 @@ impl ModelCapabilitiesManager {
         eprintln!("has tool capabilities model: {:?}", model);
         match model {
             LLMProviderInterface::OpenAI(openai) => {
-                // o1-mini specifically does not support function calling
-                !openai.model_type.starts_with("o1-mini")
+                // o1-mini and gpt-5-chat specifically does not support function calling
+                if openai.model_type.starts_with("o1-mini") {
+                    false
+                } else if openai.model_type.starts_with("gpt-5-chat-latest") {
+                    false
+                } else {
+                    true
+                }
             }
             LLMProviderInterface::Ollama(model) => {
                 // For Ollama, check model type and respect the passed stream parameter
@@ -1073,7 +1091,7 @@ impl ModelCapabilitiesManager {
                     || openai.model_type.starts_with("o3")
                     || openai.model_type.starts_with("o4")
                     || openai.model_type.starts_with("o5")
-                    || openai.model_type.starts_with("gpt-5")
+                    || (openai.model_type.starts_with("gpt-5") && openai.model_type != "gpt-5-chat-latest")
             }
             LLMProviderInterface::Ollama(ollama) => {
                 ollama.model_type.starts_with("deepseek-r1")