ollama · drifkin · May 29, 2025 · May 27, 2025 · May 27, 2025 · ParthSareen
diff --git a/examples/README.md b/examples/README.md
@@ -62,3 +62,6 @@ Requirement: `pip install tqdm`
 ### Ollama Embed - Generate embeddings with a model
 - [embed.py](embed.py)
 
+
+### Thinking - Enable thinking mode for a model
+- [thinking.py](thinking.py)
diff --git a/examples/thinking.py b/examples/thinking.py
@@ -0,0 +1,13 @@
+from ollama import chat
+
+messages = [
+  {
+    'role': 'user',
+    'content': 'What is 10 + 23?',
+  },
+]
+
+response = chat('deepseek-r1', messages=messages, think=True)
+
+print('Thinking:\n========\n\n' + response.message.thinking)
-print('Thinking:\n========\n\n' + response.message.thinking)
+print('Thinking:')
+print(response.message.thinking)
-print('Thinking:\n========\n\n' + response.message.thinking)
+print('Thinking:')
+print(response.message.thinking)
+print('\nResponse:\n========\n\n' + response.message.content)
diff --git a/ollama/_client.py b/ollama/_client.py
@@ -270,6 +270,7 @@ def chat(
     *,
     tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
     stream: Literal[False] = False,
+    think: Optional[bool] = None,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     options: Optional[Union[Mapping[str, Any], Options]] = None,
     keep_alive: Optional[Union[float, str]] = None,
@@ -283,6 +284,7 @@ def chat(
     *,
     tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
     stream: Literal[True] = True,
+    think: Optional[bool] = None,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     options: Optional[Union[Mapping[str, Any], Options]] = None,
     keep_alive: Optional[Union[float, str]] = None,
@@ -295,6 +297,7 @@ def chat(
     *,
     tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
     stream: bool = False,
+    think: Optional[bool] = None,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     options: Optional[Union[Mapping[str, Any], Options]] = None,
     keep_alive: Optional[Union[float, str]] = None,
@@ -341,6 +344,7 @@ def add_two_numbers(a: int, b: int) -> int:
         messages=list(_copy_messages(messages)),
         tools=list(_copy_tools(tools)),
         stream=stream,
+        think=think,
         format=format,
         options=options,
         keep_alive=keep_alive,
@@ -694,6 +698,7 @@ async def generate(
     template: str = '',
     context: Optional[Sequence[int]] = None,
     stream: Literal[False] = False,
+    think: Optional[bool] = None,
     raw: bool = False,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     images: Optional[Sequence[Union[str, bytes, Image]]] = None,
@@ -712,6 +717,7 @@ async def generate(
     template: str = '',
     context: Optional[Sequence[int]] = None,
     stream: Literal[True] = True,
+    think: Optional[bool] = None,
     raw: bool = False,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     images: Optional[Sequence[Union[str, bytes, Image]]] = None,
@@ -729,6 +735,7 @@ async def generate(
     template: Optional[str] = None,
     context: Optional[Sequence[int]] = None,
     stream: bool = False,
+    think: Optional[bool] = None,
     raw: Optional[bool] = None,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     images: Optional[Sequence[Union[str, bytes, Image]]] = None,
@@ -756,6 +763,7 @@ async def generate(
         template=template,
         context=context,
         stream=stream,
+        think=think,
         raw=raw,
         format=format,
         images=list(_copy_images(images)) if images else None,
@@ -773,6 +781,7 @@ async def chat(
     *,
     tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
     stream: Literal[False] = False,
+    think: Optional[bool] = None,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     options: Optional[Union[Mapping[str, Any], Options]] = None,
     keep_alive: Optional[Union[float, str]] = None,
@@ -786,6 +795,7 @@ async def chat(
     *,
     tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
     stream: Literal[True] = True,
+    think: Optional[bool] = None,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     options: Optional[Union[Mapping[str, Any], Options]] = None,
     keep_alive: Optional[Union[float, str]] = None,
@@ -798,6 +808,7 @@ async def chat(
     *,
     tools: Optional[Sequence[Union[Mapping[str, Any], Tool, Callable]]] = None,
     stream: bool = False,
+    think: Optional[bool] = None,
     format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
     options: Optional[Union[Mapping[str, Any], Options]] = None,
     keep_alive: Optional[Union[float, str]] = None,
@@ -845,6 +856,7 @@ def add_two_numbers(a: int, b: int) -> int:
         messages=list(_copy_messages(messages)),
         tools=list(_copy_tools(tools)),
         stream=stream,
+        think=think,
         format=format,
         options=options,
         keep_alive=keep_alive,

diff --git a/ollama/_types.py b/ollama/_types.py
@@ -207,6 +207,9 @@ class GenerateRequest(BaseGenerateRequest):
   images: Optional[Sequence[Image]] = None
   'Image data for multimodal models.'
 
+  think: Optional[bool] = None
+  'Enable thinking mode (for thinking models).'
+
 
 class BaseGenerateResponse(SubscriptableBaseModel):
   model: Optional[str] = None
@@ -248,6 +251,9 @@ class GenerateResponse(BaseGenerateResponse):
   response: str
   'Response content. When streaming, this contains a fragment of the response.'
 
+  thinking: Optional[str] = None
+  'Thinking content. Only present when thinking is enabled.'
+
   context: Optional[Sequence[int]] = None
   'Tokenized history up to the point of the response.'
 
@@ -263,6 +269,9 @@ class Message(SubscriptableBaseModel):
   content: Optional[str] = None
   'Content of the message. Response messages contains message fragments when streaming.'
 
+  thinking: Optional[str] = None
+  'Thinking content. Only present when thinking is enabled.'
+
   images: Optional[Sequence[Image]] = None
   """
   Optional list of image data for multimodal models.
@@ -345,6 +354,9 @@ def serialize_model(self, nxt):
   tools: Optional[Sequence[Tool]] = None
   'Tools to use for the chat.'
 
+  think: Optional[bool] = None
+  'Enable thinking mode (for thinking models).'
+
 
 class ChatResponse(BaseGenerateResponse):
   """