Skip to content

Commit a14936c

Browse files
authored
[Firebase AI] Add support for configuring a thinking budget (#14909)
1 parent 79d8cf6 commit a14936c

File tree

7 files changed

+144
-1
lines changed

7 files changed

+144
-1
lines changed

FirebaseAI/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
- [fixed] Fixed `Sendable` warnings introduced in the Xcode 26 beta. (#14947)
33
- [added] Added support for setting `title` in string, number and array `Schema`
44
types. (#14971)
5+
- [added] Added support for configuring the "thinking" budget when using Gemini
6+
2.5 series models. (#14909)
57

68
# 11.13.0
79
- [feature] Initial release of the Firebase AI Logic SDK (`FirebaseAI`). This

FirebaseAI/Sources/GenerateContentResponse.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@ public struct GenerateContentResponse: Sendable {
2626
/// The total number of tokens across the generated response candidates.
2727
public let candidatesTokenCount: Int
2828

29+
/// The number of tokens used by the model's internal "thinking" process.
30+
///
31+
/// For models that support thinking (like Gemini 2.5 Pro and Flash), this represents the actual
32+
/// number of tokens consumed for reasoning before the model generated a response. For models
33+
/// that do not support thinking, this value will be `0`.
34+
///
35+
/// When thinking is used, this count will be less than or equal to the `thinkingBudget` set in
36+
/// the ``ThinkingConfig``.
37+
public let thoughtsTokenCount: Int
38+
2939
/// The total number of tokens in both the request and response.
3040
public let totalTokenCount: Int
3141

@@ -330,6 +340,7 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
330340
enum CodingKeys: CodingKey {
331341
case promptTokenCount
332342
case candidatesTokenCount
343+
case thoughtsTokenCount
333344
case totalTokenCount
334345
case promptTokensDetails
335346
case candidatesTokensDetails
@@ -340,6 +351,7 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
340351
promptTokenCount = try container.decodeIfPresent(Int.self, forKey: .promptTokenCount) ?? 0
341352
candidatesTokenCount =
342353
try container.decodeIfPresent(Int.self, forKey: .candidatesTokenCount) ?? 0
354+
thoughtsTokenCount = try container.decodeIfPresent(Int.self, forKey: .thoughtsTokenCount) ?? 0
343355
totalTokenCount = try container.decodeIfPresent(Int.self, forKey: .totalTokenCount) ?? 0
344356
promptTokensDetails =
345357
try container.decodeIfPresent([ModalityTokenCount].self, forKey: .promptTokensDetails) ?? []

FirebaseAI/Sources/GenerationConfig.swift

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ public struct GenerationConfig: Sendable {
5151
/// Supported modalities of the response.
5252
let responseModalities: [ResponseModality]?
5353

54+
/// Configuration for controlling the "thinking" behavior of compatible Gemini models.
55+
let thinkingConfig: ThinkingConfig?
56+
5457
/// Creates a new `GenerationConfig` value.
5558
///
5659
/// See the
@@ -152,11 +155,14 @@ public struct GenerationConfig: Sendable {
152155
/// > Warning: Specifying response modalities is a **Public Preview** feature, which means
153156
/// > that it is not subject to any SLA or deprecation policy and could change in
154157
/// > backwards-incompatible ways.
158+
/// - thinkingConfig: Configuration for controlling the "thinking" behavior of compatible Gemini
159+
/// models; see ``ThinkingConfig`` for more details.
155160
public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
156161
candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
157162
presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
158163
stopSequences: [String]? = nil, responseMIMEType: String? = nil,
159-
responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil) {
164+
responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil,
165+
thinkingConfig: ThinkingConfig? = nil) {
160166
// Explicit init because otherwise if we re-arrange the above variables it changes the API
161167
// surface.
162168
self.temperature = temperature
@@ -170,6 +176,7 @@ public struct GenerationConfig: Sendable {
170176
self.responseMIMEType = responseMIMEType
171177
self.responseSchema = responseSchema
172178
self.responseModalities = responseModalities
179+
self.thinkingConfig = thinkingConfig
173180
}
174181
}
175182

@@ -189,5 +196,6 @@ extension GenerationConfig: Encodable {
189196
case responseMIMEType = "responseMimeType"
190197
case responseSchema
191198
case responseModalities
199+
case thinkingConfig
192200
}
193201
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
/// Configuration for controlling the "thinking" behavior of compatible Gemini models.
16+
///
17+
/// Certain models, like Gemini 2.5 Flash and Pro, utilize a thinking process before generating a
18+
/// response. This allows them to reason through complex problems and plan a more coherent and
19+
/// accurate answer.
20+
public struct ThinkingConfig: Sendable {
21+
/// The thinking budget in tokens.
22+
///
23+
/// This parameter sets an upper limit on the number of tokens the model can use for its internal
24+
/// "thinking" process. A higher budget may result in better quality responses for complex tasks
25+
/// but can also increase latency and cost.
26+
///
27+
/// If you don't specify a budget (`nil`), the model will automatically determine the appropriate
28+
/// amount of thinking based on the complexity of the prompt.
29+
///
30+
/// **Model-Specific Behavior:**
31+
/// - **Gemini 2.5 Flash:** The budget can range from `0` to `24576`. Setting the budget to `0`
32+
/// disables the thinking process, which prioritizes the lowest latency and cost.
33+
/// - **Gemini 2.5 Pro:** The budget must be an integer between `128` and `32768`. Thinking cannot
34+
/// be disabled for this model.
35+
///
36+
/// An error will be thrown if you set a thinking budget for a model that does not support this
37+
/// feature or if the specified budget is not within the model's supported range.
38+
let thinkingBudget: Int?
39+
40+
/// Initializes a new `ThinkingConfig`.
41+
///
42+
/// - Parameters:
43+
/// - thinkingBudget: The maximum number of tokens to be used for the model's thinking process.
44+
public init(thinkingBudget: Int? = nil) {
45+
self.thinkingBudget = thinkingBudget
46+
}
47+
}
48+
49+
// MARK: - Codable Conformances
50+
51+
extension ThinkingConfig: Encodable {}

FirebaseAI/Tests/TestApp/Sources/Constants.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,7 @@ public enum ModelNames {
2424
public static let gemini2Flash = "gemini-2.0-flash-001"
2525
public static let gemini2FlashLite = "gemini-2.0-flash-lite-001"
2626
public static let gemini2FlashExperimental = "gemini-2.0-flash-exp"
27+
public static let gemini2_5_FlashPreview = "gemini-2.5-flash-preview-05-20"
28+
public static let gemini2_5_ProPreview = "gemini-2.5-pro-preview-06-05"
2729
public static let gemma3_4B = "gemma-3-4b-it"
2830
}

FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ struct GenerateContentIntegrationTests {
7676
let promptTokensDetails = try #require(usageMetadata.promptTokensDetails.first)
7777
#expect(promptTokensDetails.modality == .text)
7878
#expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount)
79+
#expect(usageMetadata.thoughtsTokenCount == 0)
7980
// The fields `candidatesTokenCount` and `candidatesTokensDetails` are not included when using
8081
// Gemma models.
8182
if modelName.hasPrefix("gemma") {
@@ -119,6 +120,7 @@ struct GenerateContentIntegrationTests {
119120
let usageMetadata = try #require(response.usageMetadata)
120121
#expect(usageMetadata.promptTokenCount.isEqual(to: 15, accuracy: tokenCountAccuracy))
121122
#expect(usageMetadata.candidatesTokenCount.isEqual(to: 1, accuracy: tokenCountAccuracy))
123+
#expect(usageMetadata.thoughtsTokenCount == 0)
122124
#expect(usageMetadata.totalTokenCount
123125
== usageMetadata.promptTokenCount + usageMetadata.candidatesTokenCount)
124126
#expect(usageMetadata.promptTokensDetails.count == 1)
@@ -131,6 +133,68 @@ struct GenerateContentIntegrationTests {
131133
#expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
132134
}
133135

136+
@Test(arguments: [
137+
(InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashPreview, 0),
138+
(InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashPreview, 24576),
139+
// TODO: Add Vertex AI Gemini 2.5 Pro tests when available.
140+
// (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_ProPreview, 128),
141+
// (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_ProPreview, 32768),
142+
(InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashPreview, 0),
143+
(InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashPreview, 24576),
144+
(InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_ProPreview, 128),
145+
(InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_ProPreview, 32768),
146+
(InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashPreview, 0),
147+
(InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashPreview, 24576),
148+
])
149+
func generateContentThinking(_ config: InstanceConfig, modelName: String,
150+
thinkingBudget: Int) async throws {
151+
let model = FirebaseAI.componentInstance(config).generativeModel(
152+
modelName: modelName,
153+
generationConfig: GenerationConfig(
154+
temperature: 0.0,
155+
topP: 0.0,
156+
topK: 1,
157+
thinkingConfig: ThinkingConfig(thinkingBudget: thinkingBudget)
158+
),
159+
safetySettings: safetySettings
160+
)
161+
let prompt = "Where is Google headquarters located? Answer with the city name only."
162+
163+
let response = try await model.generateContent(prompt)
164+
165+
let text = try #require(response.text).trimmingCharacters(in: .whitespacesAndNewlines)
166+
#expect(text == "Mountain View")
167+
168+
let usageMetadata = try #require(response.usageMetadata)
169+
#expect(usageMetadata.promptTokenCount.isEqual(to: 13, accuracy: tokenCountAccuracy))
170+
#expect(usageMetadata.promptTokensDetails.count == 1)
171+
let promptTokensDetails = try #require(usageMetadata.promptTokensDetails.first)
172+
#expect(promptTokensDetails.modality == .text)
173+
#expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount)
174+
if thinkingBudget == 0 {
175+
#expect(usageMetadata.thoughtsTokenCount == 0)
176+
} else {
177+
#expect(usageMetadata.thoughtsTokenCount <= thinkingBudget)
178+
}
179+
#expect(usageMetadata.candidatesTokenCount.isEqual(to: 3, accuracy: tokenCountAccuracy))
180+
// The `candidatesTokensDetails` field is erroneously omitted when using the Google AI (Gemini
181+
// Developer API) backend.
182+
if case .googleAI = config.apiConfig.service {
183+
#expect(usageMetadata.candidatesTokensDetails.isEmpty)
184+
} else {
185+
#expect(usageMetadata.candidatesTokensDetails.count == 1)
186+
let candidatesTokensDetails = try #require(usageMetadata.candidatesTokensDetails.first)
187+
#expect(candidatesTokensDetails.modality == .text)
188+
#expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
189+
}
190+
#expect(usageMetadata.totalTokenCount > 0)
191+
#expect(usageMetadata.totalTokenCount == (
192+
usageMetadata.promptTokenCount
193+
+ usageMetadata.thoughtsTokenCount
194+
+ usageMetadata.candidatesTokenCount
195+
))
196+
}
197+
134198
@Test(arguments: [
135199
InstanceConfig.vertexAI_v1beta,
136200
InstanceConfig.googleAI_v1beta,

FirebaseAI/Tests/TestApp/Tests/Utilities/InstanceConfig.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ struct InstanceConfig: Equatable, Encodable {
3232
static let googleAI_v1beta_staging = InstanceConfig(
3333
apiConfig: APIConfig(service: .googleAI(endpoint: .firebaseProxyStaging), version: .v1beta)
3434
)
35+
static let googleAI_v1beta_freeTier = InstanceConfig(
36+
appName: FirebaseAppNames.spark,
37+
apiConfig: APIConfig(service: .googleAI(endpoint: .firebaseProxyProd), version: .v1beta)
38+
)
3539
static let googleAI_v1beta_freeTier_bypassProxy = InstanceConfig(
3640
appName: FirebaseAppNames.spark,
3741
apiConfig: APIConfig(service: .googleAI(endpoint: .googleAIBypassProxy), version: .v1beta)

0 commit comments

Comments
 (0)