Skip to content

Commit 2a62658

Browse files
authored
FEATURE: support configurable thinking tokens for Gemini (#1322)
1 parent 851ca57 commit 2a62658

File tree

4 files changed

+88
-0
lines changed

4 files changed

+88
-0
lines changed

app/models/llm_model.rb

+2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ def self.provider_params
6363
},
6464
google: {
6565
disable_native_tools: :checkbox,
66+
enable_thinking: :checkbox,
67+
thinking_tokens: :number,
6668
},
6769
azure: {
6870
disable_native_tools: :checkbox,

config/locales/client.en.yml

+2
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,8 @@ en:
533533
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
534534
reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
535535
enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
536+
enable_thinking: "Enable thinking (only on applicable models eg: flash 2.5)"
537+
thinking_tokens: "Number of tokens used for thinking"
536538
reasoning_tokens: "Number of tokens used for reasoning"
537539
disable_temperature: "Disable temperature (some thinking models don't support temperature)"
538540
disable_top_p: "Disable top P (some thinking models don't support top P)"

lib/completions/endpoints/gemini.rb

+6
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ def prepare_payload(prompt, model_params, dialect)
9494
end
9595
end
9696

97+
if llm_model.lookup_custom_param("enable_thinking")
98+
thinking_tokens = llm_model.lookup_custom_param("thinking_tokens").to_i
99+
thinking_tokens = thinking_tokens.clamp(0, 24_576)
100+
payload[:generationConfig][:thinkingConfig] = { thinkingBudget: thinking_tokens }
101+
end
102+
97103
payload
98104
end
99105

spec/lib/completions/endpoints/gemini_spec.rb

+78
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,84 @@ def tool_response
153153
}
154154
end
155155

156+
it "correctly configures thinking when enabled" do
157+
model.update!(provider_params: { enable_thinking: "true", thinking_tokens: "10000" })
158+
159+
response = gemini_mock.response("Using thinking mode").to_json
160+
161+
req_body = nil
162+
163+
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
164+
url = "#{model.url}:generateContent?key=123"
165+
166+
stub_request(:post, url).with(
167+
body:
168+
proc do |_req_body|
169+
req_body = _req_body
170+
true
171+
end,
172+
).to_return(status: 200, body: response)
173+
174+
response = llm.generate("Hello", user: user)
175+
176+
parsed = JSON.parse(req_body, symbolize_names: true)
177+
178+
# Verify thinking config is properly set with the token limit
179+
expect(parsed.dig(:generationConfig, :thinkingConfig)).to eq({ thinkingBudget: 10_000 })
180+
end
181+
182+
it "clamps thinking tokens within allowed limits" do
183+
model.update!(provider_params: { enable_thinking: "true", thinking_tokens: "30000" })
184+
185+
response = gemini_mock.response("Thinking tokens clamped").to_json
186+
187+
req_body = nil
188+
189+
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
190+
url = "#{model.url}:generateContent?key=123"
191+
192+
stub_request(:post, url).with(
193+
body:
194+
proc do |_req_body|
195+
req_body = _req_body
196+
true
197+
end,
198+
).to_return(status: 200, body: response)
199+
200+
response = llm.generate("Hello", user: user)
201+
202+
parsed = JSON.parse(req_body, symbolize_names: true)
203+
204+
# Verify thinking tokens are clamped to 24_576
205+
expect(parsed.dig(:generationConfig, :thinkingConfig)).to eq({ thinkingBudget: 24_576 })
206+
end
207+
208+
it "does not add thinking config when disabled" do
209+
model.update!(provider_params: { enable_thinking: false, thinking_tokens: "10000" })
210+
211+
response = gemini_mock.response("No thinking mode").to_json
212+
213+
req_body = nil
214+
215+
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
216+
url = "#{model.url}:generateContent?key=123"
217+
218+
stub_request(:post, url).with(
219+
body:
220+
proc do |_req_body|
221+
req_body = _req_body
222+
true
223+
end,
224+
).to_return(status: 200, body: response)
225+
226+
response = llm.generate("Hello", user: user)
227+
228+
parsed = JSON.parse(req_body, symbolize_names: true)
229+
230+
# Verify thinking config is not present
231+
expect(parsed.dig(:generationConfig, :thinkingConfig)).to be_nil
232+
end
233+
156234
# by default gemini is meant to use AUTO mode, however new experimental models
157235
# appear to require this to be explicitly set
158236
it "Explicitly specifies tool config" do

0 commit comments

Comments
 (0)