dcSpark
diff --git a/‎shinkai-bin/shinkai-node/src/llm_provider/providers/ollama.rs‎
Lines changed: 183 additions & 2 deletions b/‎shinkai-bin/shinkai-node/src/llm_provider/providers/ollama.rs‎
Lines changed: 183 additions & 2 deletions
diff --git a/‎shinkai-bin/shinkai-node/src/llm_provider/providers/openai.rs‎
Lines changed: 15 additions & 3 deletions b/‎shinkai-bin/shinkai-node/src/llm_provider/providers/openai.rs‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎shinkai-bin/shinkai-node/src/llm_provider/providers/shared/ollama_api.rs‎
Lines changed: 5 additions & 0 deletions b/‎shinkai-bin/shinkai-node/src/llm_provider/providers/shared/ollama_api.rs‎
Lines changed: 5 additions & 0 deletions
@@ -243,6 +243,9 @@ async fn process_stream(
     tools: Option<Vec<JsonValue>>,
 ) -> Result<LLMInferenceResponse, LLMProviderError> {
     let mut response_text = String::new();
+    let mut thinking_content = String::new();
+    let mut thinking_started = false;
+    let mut thinking_ended = false;
     let mut previous_json_chunk: String = String::new();
     let mut final_eval_count = None;
     let mut final_eval_duration = None;
@@ -315,7 +318,106 @@ async fn process_stream(
                 match data_resp {
                     Ok(data) => {
                         previous_json_chunk = "".to_string();
-                        response_text.push_str(&data.message.content);
+                        
+                        // Handle thinking tokens
+                        if let Some(thinking) = &data.message.thinking {
+                            if !thinking.is_empty() {
+                                if !thinking_started {
+                                    thinking_started = true;
+                                    // Send opening <think> tag immediately via WebSocket
+                                    if let Some(ref manager) = ws_manager_trait {
+                                        if let Some(ref inbox_name) = inbox_name {
+                                            let m = manager.lock().await;
+                                            let inbox_name_string = inbox_name.to_string();
+                                            let metadata = WSMetadata {
+                                                id: Some(session_id.clone()),
+                                                is_done: false,
+                                                done_reason: None,
+                                                total_duration: None,
+                                                eval_count: None,
+                                            };
+                                            let ws_message_type = WSMessageType::Metadata(metadata);
+                                            let _ = m
+                                                .queue_message(
+                                                    WSTopic::Inbox,
+                                                    inbox_name_string,
+                                                    "<think>".to_string(),
+                                                    ws_message_type,
+                                                    true,
+                                                )
+                                                .await;
+                                        }
+                                    }
+                                    // Add to response text for final accumulation
+                                    response_text.push_str("<think>");
+                                }
+                                
+                                // Stream thinking content immediately via WebSocket
+                                if let Some(ref manager) = ws_manager_trait {
+                                    if let Some(ref inbox_name) = inbox_name {
+                                        let m = manager.lock().await;
+                                        let inbox_name_string = inbox_name.to_string();
+                                        let metadata = WSMetadata {
+                                            id: Some(session_id.clone()),
+                                            is_done: false,
+                                            done_reason: None,
+                                            total_duration: None,
+                                            eval_count: None,
+                                        };
+                                        let ws_message_type = WSMessageType::Metadata(metadata);
+                                        let _ = m
+                                            .queue_message(
+                                                WSTopic::Inbox,
+                                                inbox_name_string,
+                                                thinking.clone(),
+                                                ws_message_type,
+                                                true,
+                                            )
+                                            .await;
+                                    }
+                                }
+                                
+                                // Also accumulate for final response
+                                thinking_content.push_str(thinking);
+                                response_text.push_str(thinking);
+                            }
+                        }
+                        
+                        // Handle regular content tokens
+                        if !data.message.content.is_empty() {
+                            // If we were processing thinking and now we have content, 
+                            // close the thinking tags
+                            if thinking_started && !thinking_ended {
+                                thinking_ended = true;
+                                // Send closing </think> tag via WebSocket
+                                if let Some(ref manager) = ws_manager_trait {
+                                    if let Some(ref inbox_name) = inbox_name {
+                                        let m = manager.lock().await;
+                                        let inbox_name_string = inbox_name.to_string();
+                                        let metadata = WSMetadata {
+                                            id: Some(session_id.clone()),
+                                            is_done: false,
+                                            done_reason: None,
+                                            total_duration: None,
+                                            eval_count: None,
+                                        };
+                                        let ws_message_type = WSMessageType::Metadata(metadata);
+                                        let _ = m
+                                            .queue_message(
+                                                WSTopic::Inbox,
+                                                inbox_name_string,
+                                                "</think>".to_string(),
+                                                ws_message_type,
+                                                true,
+                                            )
+                                            .await;
+                                    }
+                                }
+                                // Add to response text for final accumulation
+                                response_text.push_str("</think>");
+                            }
+                            response_text.push_str(&data.message.content);
+                        }
 
                         if let Some(tool_calls) = data.message.tool_calls {
                             for tool_call in tool_calls {
@@ -461,6 +563,36 @@ async fn process_stream(
         }
     }
 
+    // If we ended with thinking content but no regular content, send closing tag
+    if thinking_started && !thinking_ended && !thinking_content.is_empty() {
+        // Send closing </think> tag via WebSocket
+        if let Some(ref manager) = ws_manager_trait {
+            if let Some(ref inbox_name) = inbox_name {
+                let m = manager.lock().await;
+                let inbox_name_string = inbox_name.to_string();
+                let metadata = WSMetadata {
+                    id: Some(session_id.clone()),
+                    is_done: true,
+                    done_reason: None,
+                    total_duration: None,
+                    eval_count: None,
+                };
+                let ws_message_type = WSMessageType::Metadata(metadata);
+                let _ = m
+                    .queue_message(
+                        WSTopic::Inbox,
+                        inbox_name_string,
+                        "</think>".to_string(),
+                        ws_message_type,
+                        true,
+                    )
+                    .await;
+            }
+        }
+        // Add to response text for final accumulation
+        response_text.push_str("</think>");
+    }
+
     let tps = if let (Some(eval_count), Some(eval_duration)) = (final_eval_count, final_eval_duration) {
         if eval_duration > 0 {
             Some(eval_count as f64 / eval_duration as f64 * 1e9)
@@ -530,6 +662,21 @@ async fn handle_non_streaming_response(
                 if let Some(message) = response_json.get("message") {
                     if let Some(content) = message.get("content") {
                         if let Some(content_str) = content.as_str() {
+                            // Handle thinking content in non-streaming response
+                            let mut final_content = String::new();
+                            
+                            // Check for thinking content and prepend it with tags
+                            if let Some(thinking) = message.get("thinking").and_then(|t| t.as_str()) {
+                                if !thinking.is_empty() {
+                                    final_content.push_str("<think>");
+                                    final_content.push_str(thinking);
+                                    final_content.push_str("</think>");
+                                }
+                            }
+                            
+                            // Add regular content
+                            final_content.push_str(content_str);
+                            
                             let mut function_calls = Vec::new();
 
                             if let Some(tool_calls) = message.get("tool_calls").and_then(|tc| tc.as_array()) {
@@ -606,6 +753,31 @@ async fn handle_non_streaming_response(
                                 format!("Function Calls: {:?}", function_calls).as_str(),
                             );
 
+                            // Send the final content (including thinking) via WebSocket in non-streaming mode
+                            if let Some(ref manager) = ws_manager_trait {
+                                if let Some(ref inbox_name) = inbox_name {
+                                    let m = manager.lock().await;
+                                    let inbox_name_string = inbox_name.to_string();
+                                    let metadata = WSMetadata {
+                                        id: None,
+                                        is_done: true,
+                                        done_reason: None,
+                                        total_duration: None,
+                                        eval_count: None,
+                                    };
+                                    let ws_message_type = WSMessageType::Metadata(metadata);
+                                    let _ = m
+                                        .queue_message(
+                                            WSTopic::Inbox,
+                                            inbox_name_string,
+                                            final_content.clone(),
+                                            ws_message_type,
+                                            true,
+                                        )
+                                        .await;
+                                }
+                            }
+
                             let eval_count = response_json.get("eval_count").and_then(|v| v.as_u64()).unwrap_or(0);
                             let eval_duration = response_json.get("eval_duration").and_then(|v| v.as_u64()).unwrap_or(1);
                             let tps = if eval_duration > 0 {
@@ -615,7 +787,7 @@ async fn handle_non_streaming_response(
                             };
 
                             break Ok(LLMInferenceResponse::new(
-                                content_str.to_string(),
+                                final_content,
                                 json!({}),
                                 function_calls,
                                 tps,
@@ -681,6 +853,15 @@ fn add_options_to_payload(
     let streaming = get_value("LLM_STREAMING", config.and_then(|c| c.stream.as_ref())).unwrap_or(true); // Default to true if not specified
     payload["stream"] = serde_json::json!(streaming);
 
+    // Handle thinking option (there are open issues with this feature)
+    // https://github.com/ollama/ollama/issues/11712
+    // https://github.com/ollama/ollama/issues/11751
+    // https://github.com/ollama/ollama/issues/10976
+    if ModelCapabilitiesManager::has_reasoning_capabilities(model) {
+        let thinking = get_value("LLM_THINKING", config.and_then(|c| c.thinking.as_ref())).unwrap_or(true);
+        payload["think"] = serde_json::json!(thinking);
+    }
+
     // Handle num_ctx setting
     let num_ctx_from_config = config
         .and_then(|c| c.other_model_params.as_ref())
 
@@ -97,7 +97,8 @@ impl LLMService for OpenAI {
                 let tools_json = result.functions.unwrap_or_else(Vec::new);
 
                 // Set up initial payload with appropriate token limit field based on model capabilities
-                let mut payload = if ModelCapabilitiesManager::has_reasoning_capabilities(&model) {
+                let mut payload = if ModelCapabilitiesManager::has_reasoning_capabilities(&model) 
+                {
                     json!({
                         "model": self.model_type,
                         "messages": messages_json,
@@ -118,8 +119,19 @@ impl LLMService for OpenAI {
                     payload["tools"] = serde_json::Value::Array(tools_json.clone());
                 }
 
-                // Only add options to payload for non-reasoning models
-                if !ModelCapabilitiesManager::has_reasoning_capabilities(&model) {
+                // Only add options to payload for non-reasoning models, add reasoning_effort if thinking is enabled and the model has reasoning capabilities
+                if ModelCapabilitiesManager::has_reasoning_capabilities(&model) {
+                    let thinking_enabled = config.as_ref().and_then(|c| c.thinking).unwrap_or(false);
+                    if thinking_enabled {
+                        let effort = config
+                            .as_ref()
+                            .and_then(|c| c.reasoning_effort.clone())
+                            .unwrap_or("medium".to_string());
+                        payload["reasoning_effort"] = serde_json::json!(effort);
+                    } else if let Some(obj) = payload.as_object_mut() {
+                        obj.remove("reasoning_effort");
+                    }
+                } else {
                     add_options_to_payload(&mut payload, config.as_ref());
                 }
 
 
@@ -46,6 +46,8 @@ pub struct OllamaMessage {
     pub images: Option<Vec<String>>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_calls: Option<Vec<ToolCall>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking: Option<String>,
 }
 
 #[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
@@ -131,6 +133,7 @@ fn from_chat_completion_messages(
                 content,
                 images,
                 tool_calls: None,
+                thinking: None,
             });
         }
     }
@@ -358,12 +361,14 @@ mod tests {
                 content: "You are a very helpful assistant. You may be provided with documents or content to analyze and answer questions about them, in that case refer to the content provided in the user message for your responses.".to_string(),
                 images: None,
                 tool_calls: None,
+                thinking: None,
             },
             OllamaMessage {
                 role: "user".to_string(),
                 content: "tell me what's the response when using shinkai echo tool with: say hello".to_string(),
                 images: Some(vec![]),
                 tool_calls: None,
+                thinking: None,
             },
         ];