feat: adding num_ctx to set the size of the model context window (#75)

jonigl · web-flow · commit d324f6d9140d · 2025-08-14T15:56:54.000+02:00
* feat: adding num_ctx to set the size of the model context window.

* docs: updating main README.md

* feat: update help removing ollama num_ctx default, since this can change in the future
diff --git a/README.md b/README.md
@@ -56,19 +56,20 @@ MCP Client for Ollama (`ollmcp`) is a modern, interactive terminal application (
 
 - 🌐 **Multi-Server Support**: Connect to multiple MCP servers simultaneously
 - 🚀 **Multiple Transport Types**: Supports STDIO, SSE, and Streamable HTTP server connections
-- 🎨 **Rich Terminal Interface**: Interactive console UI
+- 🎨 **Rich Terminal Interface**: Interactive console UI with modern styling
 - 🌊 **Streaming Responses**: View model outputs in real-time as they're generated
 - 🛠️ **Tool Management**: Enable/disable specific tools or entire servers during chat sessions
 - 🧑‍💻 **Human-in-the-Loop (HIL)**: Review and approve tool executions before they run for enhanced control and safety
-- 🎮 **Advanced Model Configuration**: Fine-tune 10+ model parameters including temperature, sampling, repetition control, and more
+- 🎮 **Advanced Model Configuration**: Fine-tune 15+ model parameters including context window size, temperature, sampling, repetition control, and more
 - 💬 **System Prompt Customization**: Define and edit the system prompt to control model behavior and persona
+- 🧠 **Context Window Control**: Adjust the context window size (num_ctx) to handle longer conversations and complex tasks
 - 🎨 **Enhanced Tool Display**: Beautiful, structured visualization of tool executions with JSON syntax highlighting
 - 🧠 **Context Management**: Control conversation memory with configurable retention settings
 - 🤔 **Thinking Mode**: Advanced reasoning capabilities with visible thought processes for supported models (e.g., gpt-oss, deepseek-r1, qwen3, etc.)
 - 🗣️ **Cross-Language Support**: Seamlessly work with both Python and JavaScript MCP servers
 - 🔍 **Auto-Discovery**: Automatically find and use Claude's existing MCP server configurations
 - 🔁 **Dynamic Model Switching**: Switch between any installed Ollama model without restarting
-- 💾 **Configuration Persistence**: Save and load tool preferences between sessions
+- 💾 **Configuration Persistence**: Save and load tool preferences and model settings between sessions
 - 🔄 **Server Reloading**: Hot-reload MCP servers during development without restarting the client
 - ✨ **Fuzzy Autocomplete**: Interactive, arrow-key command autocomplete with descriptions
 - 🏷️ **Dynamic Prompt**: Shows current model, thinking mode, and enabled tools
@@ -298,6 +299,8 @@ The `model-config` (`mc`) command opens the advanced model settings interface, a
 
 #### Key Parameters
 
+- **System Prompt**: Set the model's role and behavior to guide responses.
+- **Context Window (num_ctx)**: Set how much chat history the model uses. Balance with memory usage and performance.
 - **Keep Tokens**: Prevent important tokens from being dropped
 - **Max Tokens**: Limit response length (0 = auto)
 - **Seed**: Make outputs reproducible (set to -1 for random)
@@ -309,7 +312,7 @@ The `model-config` (`mc`) command opens the advanced model settings interface, a
 
 #### Commands
 
-- Enter parameter numbers `1-14` to edit settings
+- Enter parameter numbers `1-15` to edit settings
 - Enter `sp` to edit the system prompt
 - Use `u1`, `u2`, etc. to unset parameters, or `uall` to reset all
 - `h`/`help`: Show parameter details and tips
@@ -324,6 +327,7 @@ The `model-config` (`mc`) command opens the advanced model settings interface, a
 - **Reduce Repeats:** `repeat_penalty: 1.1-1.3`, `presence_penalty: 0.2`, `frequency_penalty: 0.3`
 - **Balanced:** `temperature: 0.7`, `top_p: 0.9`, `typical_p: 0.7`
 - **Reproducible:** `seed: 42`, `temperature: 0.0`
+- **Large Context:** `num_ctx: 8192` or higher for complex conversations requiring more context
 
 > [!TIP]
 > All parameters default to unset, letting Ollama use its own optimized values. Use `help` in the config menu for details and recommendations. Changes are saved with your configuration.
diff --git a/mcp_client_for_ollama/config/defaults.py b/mcp_client_for_ollama/config/defaults.py
@@ -37,7 +37,8 @@ def default_config() -> dict:
             "repeat_penalty": None,
             "presence_penalty": None,
             "frequency_penalty": None,
-            "stop": None
+            "stop": None,
+            "num_ctx": None
         },
         "displaySettings": {
             "showToolExecution": True,
diff --git a/mcp_client_for_ollama/config/manager.py b/mcp_client_for_ollama/config/manager.py
@@ -241,6 +241,8 @@ def _validate_config(self, config_data: Dict[str, Any]) -> Dict[str, Any]:
                 validated["modelConfig"]["frequency_penalty"] = model_config["frequency_penalty"] if model_config["frequency_penalty"] is not None else None
             if "stop" in model_config:
                 validated["modelConfig"]["stop"] = model_config["stop"] if model_config["stop"] is not None else None
+            if "num_ctx" in model_config:
+                validated["modelConfig"]["num_ctx"] = model_config["num_ctx"] if model_config["num_ctx"] is not None else None
 
         if "displaySettings" in config_data and isinstance(config_data["displaySettings"], dict):
             if "showToolExecution" in config_data["displaySettings"]:
diff --git a/mcp_client_for_ollama/models/config_manager.py b/mcp_client_for_ollama/models/config_manager.py
@@ -40,6 +40,7 @@ def __init__(self, console: Optional[Console] = None):
         self.presence_penalty = None       # float
         self.frequency_penalty = None      # float
         self.stop = None                   # list[str]
+        self.num_ctx = None                # int
 
         # Parameter explanations
         self.parameter_explanations = {
@@ -126,6 +127,12 @@ def __init__(self, console: Optional[Console] = None):
                 "range": "0–8 strings, each ≤ 255 characters",
                 "effect": "Provides precise control over where generation ends.",
                 "recommendation": "Use for controlling dialog format or preventing the model from continuing beyond desired points."
+            },
+            "num_ctx": {
+                "description": "Sets the size of the context window used to generate the next token.",
+                "range": "1 – model maximum (e.g., 1 – 32768 for qwen3:0.6b); model-dependent",
+                "effect": "Controls how much conversation history and context the model can access when generating responses.",
+                "recommendation": "Use higher values for complex conversations requiring more context; balance with memory usage and performance."
             }
         }
 
@@ -149,7 +156,8 @@ def get_config(self) -> Dict[str, Any]:
             "repeat_penalty": self.repeat_penalty,
             "presence_penalty": self.presence_penalty,
             "frequency_penalty": self.frequency_penalty,
-            "stop": self.stop
+            "stop": self.stop,
+            "num_ctx": self.num_ctx
         }
 
     def get_ollama_options(self) -> Dict[str, Any]:
@@ -188,6 +196,8 @@ def get_ollama_options(self) -> Dict[str, Any]:
             options["frequency_penalty"] = self.frequency_penalty
         if self.stop is not None:
             options["stop"] = self.stop
+        if self.num_ctx is not None:
+            options["num_ctx"] = self.num_ctx
         return options
 
     def get_system_prompt(self) -> str:
@@ -232,6 +242,8 @@ def set_config(self, config: Dict[str, Any]) -> None:
             self.frequency_penalty = config["frequency_penalty"]
         if "stop" in config:
             self.stop = config["stop"]
+        if "num_ctx" in config:
+            self.num_ctx = config["num_ctx"]
 
     def display_current_config(self) -> None:
         """Display the current model configuration."""
@@ -260,7 +272,8 @@ def format_value(value):
             f"[bold][orange3]10.[/orange3] repeat_penalty:[/bold] {format_value(self.repeat_penalty)}\n"
             f"[bold][orange3]11.[/orange3] presence_penalty:[/bold] {format_value(self.presence_penalty)}\n"
             f"[bold][orange3]12.[/orange3] frequency_penalty:[/bold] {format_value(self.frequency_penalty)}\n"
-            f"[bold][orange3]13.[/orange3] stop:[/bold] {format_value(self.stop)}",
+            f"[bold][orange3]13.[/orange3] stop:[/bold] {format_value(self.stop)}\n"
+            f"[bold][orange3]14.[/orange3] num_ctx:[/bold] {format_value(self.num_ctx)}",
             title="[bold blue]🎮 Model Parameters[/bold blue]",
             border_style="blue", expand=False))
         self.console.print("\n[bold yellow]Note:[/bold yellow] Unset values will use Ollama's defaults.")
@@ -312,7 +325,7 @@ def display_parameter_explanations(self) -> None:
         for param in [
             "num_keep", "seed", "num_predict", "top_k", "top_p", "min_p",
             "typical_p", "repeat_last_n", "temperature", "repeat_penalty",
-            "presence_penalty", "frequency_penalty", "stop"
+            "presence_penalty", "frequency_penalty", "stop", "num_ctx"
         ]:
             info = self.parameter_explanations[param]
             table.add_row(
@@ -433,6 +446,7 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
                 self.presence_penalty = None
                 self.frequency_penalty = None
                 self.stop = None
+                self.num_ctx = None
                 result_message = "[green]All parameters unset (using Ollama defaults).[/green]"
                 result_style = "green"
                 continue
@@ -505,6 +519,10 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
                             self.stop = None
                             result_message = "[green]stop unset (using Ollama default).[/green]"
                             result_style = "green"
+                        case 14:
+                            self.num_ctx = None
+                            result_message = "[green]num_ctx unset (using Ollama default).[/green]"
+                            result_style = "green"
                         case _:
                             result_message = "[red]Invalid parameter number.[/red]"
                             result_style = "red"
@@ -709,6 +727,20 @@ def configure_model_interactive(self, clear_console_func: Optional[Callable] = N
                         result_message = "[green]stop sequences cleared.[/green]"
                         result_style = "green"
 
+                case "14":
+                    try:
+                        new_value = IntPrompt.ask("Context Size (num_ctx, size of context window)", default=self.num_ctx)
+                        if new_value >= 1:
+                            self.num_ctx = new_value
+                            result_message = f"[green]num_ctx set to {new_value}.[/green]"
+                            result_style = "green"
+                        else:
+                            result_message = "[red]num_ctx must be a positive integer.[/red]"
+                            result_style = "red"
+                    except ValueError:
+                        result_message = "[red]Please enter a valid integer.[/red]"
+                        result_style = "red"
+
                 case _:
                     result_message = "[red]Invalid selection. Please choose a valid option.[/red]"
                     result_style = "red"