Skip to content

[Firebase AI] Add support for configuring a thinking budget #14909

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 16, 2025
2 changes: 2 additions & 0 deletions FirebaseAI/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
- [fixed] Fixed `Sendable` warnings introduced in the Xcode 26 beta. (#14947)
- [added] Added support for setting `title` in string, number and array `Schema`
types. (#14971)
- [added] Added support for configuring the "thinking" budget when using Gemini
2.5 series models. (#14909)

# 11.13.0
- [feature] Initial release of the Firebase AI Logic SDK (`FirebaseAI`). This
Expand Down
12 changes: 12 additions & 0 deletions FirebaseAI/Sources/GenerateContentResponse.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ public struct GenerateContentResponse: Sendable {
/// The total number of tokens across the generated response candidates.
public let candidatesTokenCount: Int

/// The number of tokens used by the model's internal "thinking" process.
///
/// For models that support thinking (like Gemini 2.5 Pro and Flash), this represents the actual
/// number of tokens consumed for reasoning before the model generated a response. For models
/// that do not support thinking, this value will be `0`.
///
/// When thinking is used, this count will be less than or equal to the `thinkingBudget` set in
/// the ``ThinkingConfig``.
public let thoughtsTokenCount: Int

/// The total number of tokens in both the request and response.
public let totalTokenCount: Int

Expand Down Expand Up @@ -330,6 +340,7 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
enum CodingKeys: CodingKey {
case promptTokenCount
case candidatesTokenCount
case thoughtsTokenCount
case totalTokenCount
case promptTokensDetails
case candidatesTokensDetails
Expand All @@ -340,6 +351,7 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
promptTokenCount = try container.decodeIfPresent(Int.self, forKey: .promptTokenCount) ?? 0
candidatesTokenCount =
try container.decodeIfPresent(Int.self, forKey: .candidatesTokenCount) ?? 0
thoughtsTokenCount = try container.decodeIfPresent(Int.self, forKey: .thoughtsTokenCount) ?? 0
totalTokenCount = try container.decodeIfPresent(Int.self, forKey: .totalTokenCount) ?? 0
promptTokensDetails =
try container.decodeIfPresent([ModalityTokenCount].self, forKey: .promptTokensDetails) ?? []
Expand Down
10 changes: 9 additions & 1 deletion FirebaseAI/Sources/GenerationConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ public struct GenerationConfig: Sendable {
/// Supported modalities of the response.
let responseModalities: [ResponseModality]?

/// Configuration for controlling the "thinking" behavior of compatible Gemini models.
let thinkingConfig: ThinkingConfig?

/// Creates a new `GenerationConfig` value.
///
/// See the
Expand Down Expand Up @@ -152,11 +155,14 @@ public struct GenerationConfig: Sendable {
/// > Warning: Specifying response modalities is a **Public Preview** feature, which means
/// > that it is not subject to any SLA or deprecation policy and could change in
/// > backwards-incompatible ways.
/// - thinkingConfig: Configuration for controlling the "thinking" behavior of compatible Gemini
/// models; see ``ThinkingConfig`` for more details.
public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
stopSequences: [String]? = nil, responseMIMEType: String? = nil,
responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil) {
responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil,
thinkingConfig: ThinkingConfig? = nil) {
// Explicit init because otherwise if we re-arrange the above variables it changes the API
// surface.
self.temperature = temperature
Expand All @@ -170,6 +176,7 @@ public struct GenerationConfig: Sendable {
self.responseMIMEType = responseMIMEType
self.responseSchema = responseSchema
self.responseModalities = responseModalities
self.thinkingConfig = thinkingConfig
}
}

Expand All @@ -189,5 +196,6 @@ extension GenerationConfig: Encodable {
case responseMIMEType = "responseMimeType"
case responseSchema
case responseModalities
case thinkingConfig
}
}
51 changes: 51 additions & 0 deletions FirebaseAI/Sources/Types/Public/ThinkingConfig.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/// Configuration for controlling the "thinking" behavior of compatible Gemini models.
///
/// Certain models, like Gemini 2.5 Flash and Pro, utilize a thinking process before generating a
/// response. This allows them to reason through complex problems and plan a more coherent and
/// accurate answer.
public struct ThinkingConfig: Sendable {
/// The thinking budget in tokens.
///
/// This parameter sets an upper limit on the number of tokens the model can use for its internal
/// "thinking" process. A higher budget may result in better quality responses for complex tasks
/// but can also increase latency and cost.
///
/// If you don't specify a budget (`nil`), the model will automatically determine the appropriate
/// amount of thinking based on the complexity of the prompt.
///
/// **Model-Specific Behavior:**
/// - **Gemini 2.5 Flash:** The budget can range from `0` to `24576`. Setting the budget to `0`
/// disables the thinking process, which prioritizes the lowest latency and cost.
/// - **Gemini 2.5 Pro:** The budget must be an integer between `128` and `32768`. Thinking cannot
/// be disabled for this model.
///
/// An error will be thrown if you set a thinking budget for a model that does not support this
/// feature or if the specified budget is not within the model's supported range.
let thinkingBudget: Int?

/// Initializes a new `ThinkingConfig`.
///
/// - Parameters:
/// - thinkingBudget: The maximum number of tokens to be used for the model's thinking process.
public init(thinkingBudget: Int? = nil) {
self.thinkingBudget = thinkingBudget
}
}

// MARK: - Codable Conformances

extension ThinkingConfig: Encodable {}
2 changes: 2 additions & 0 deletions FirebaseAI/Tests/TestApp/Sources/Constants.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,7 @@ public enum ModelNames {
public static let gemini2Flash = "gemini-2.0-flash-001"
public static let gemini2FlashLite = "gemini-2.0-flash-lite-001"
public static let gemini2FlashExperimental = "gemini-2.0-flash-exp"
public static let gemini2_5_FlashPreview = "gemini-2.5-flash-preview-05-20"
public static let gemini2_5_ProPreview = "gemini-2.5-pro-preview-06-05"
public static let gemma3_4B = "gemma-3-4b-it"
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct GenerateContentIntegrationTests {
let promptTokensDetails = try #require(usageMetadata.promptTokensDetails.first)
#expect(promptTokensDetails.modality == .text)
#expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount)
#expect(usageMetadata.thoughtsTokenCount == 0)
// The fields `candidatesTokenCount` and `candidatesTokensDetails` are not included when using
// Gemma models.
if modelName.hasPrefix("gemma") {
Expand Down Expand Up @@ -119,6 +120,7 @@ struct GenerateContentIntegrationTests {
let usageMetadata = try #require(response.usageMetadata)
#expect(usageMetadata.promptTokenCount.isEqual(to: 15, accuracy: tokenCountAccuracy))
#expect(usageMetadata.candidatesTokenCount.isEqual(to: 1, accuracy: tokenCountAccuracy))
#expect(usageMetadata.thoughtsTokenCount == 0)
#expect(usageMetadata.totalTokenCount
== usageMetadata.promptTokenCount + usageMetadata.candidatesTokenCount)
#expect(usageMetadata.promptTokensDetails.count == 1)
Expand All @@ -131,6 +133,68 @@ struct GenerateContentIntegrationTests {
#expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
}

@Test(arguments: [
(InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashPreview, 0),
(InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashPreview, 24576),
// TODO: Add Vertex AI Gemini 2.5 Pro tests when available.
// (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_ProPreview, 128),
// (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_ProPreview, 32768),
(InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashPreview, 0),
(InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashPreview, 24576),
(InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_ProPreview, 128),
(InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_ProPreview, 32768),
(InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashPreview, 0),
(InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashPreview, 24576),
])
func generateContentThinking(_ config: InstanceConfig, modelName: String,
thinkingBudget: Int) async throws {
let model = FirebaseAI.componentInstance(config).generativeModel(
modelName: modelName,
generationConfig: GenerationConfig(
temperature: 0.0,
topP: 0.0,
topK: 1,
thinkingConfig: ThinkingConfig(thinkingBudget: thinkingBudget)
),
safetySettings: safetySettings
)
let prompt = "Where is Google headquarters located? Answer with the city name only."

let response = try await model.generateContent(prompt)

let text = try #require(response.text).trimmingCharacters(in: .whitespacesAndNewlines)
#expect(text == "Mountain View")

let usageMetadata = try #require(response.usageMetadata)
#expect(usageMetadata.promptTokenCount.isEqual(to: 13, accuracy: tokenCountAccuracy))
#expect(usageMetadata.promptTokensDetails.count == 1)
let promptTokensDetails = try #require(usageMetadata.promptTokensDetails.first)
#expect(promptTokensDetails.modality == .text)
#expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount)
if thinkingBudget == 0 {
#expect(usageMetadata.thoughtsTokenCount == 0)
} else {
#expect(usageMetadata.thoughtsTokenCount <= thinkingBudget)
}
#expect(usageMetadata.candidatesTokenCount.isEqual(to: 3, accuracy: tokenCountAccuracy))
// The `candidatesTokensDetails` field is erroneously omitted when using the Google AI (Gemini
// Developer API) backend.
if case .googleAI = config.apiConfig.service {
#expect(usageMetadata.candidatesTokensDetails.isEmpty)
} else {
#expect(usageMetadata.candidatesTokensDetails.count == 1)
let candidatesTokensDetails = try #require(usageMetadata.candidatesTokensDetails.first)
#expect(candidatesTokensDetails.modality == .text)
#expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
}
#expect(usageMetadata.totalTokenCount > 0)
#expect(usageMetadata.totalTokenCount == (
usageMetadata.promptTokenCount
+ usageMetadata.thoughtsTokenCount
+ usageMetadata.candidatesTokenCount
))
}

@Test(arguments: [
InstanceConfig.vertexAI_v1beta,
InstanceConfig.googleAI_v1beta,
Expand Down
4 changes: 4 additions & 0 deletions FirebaseAI/Tests/TestApp/Tests/Utilities/InstanceConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ struct InstanceConfig: Equatable, Encodable {
static let googleAI_v1beta_staging = InstanceConfig(
apiConfig: APIConfig(service: .googleAI(endpoint: .firebaseProxyStaging), version: .v1beta)
)
static let googleAI_v1beta_freeTier = InstanceConfig(
appName: FirebaseAppNames.spark,
apiConfig: APIConfig(service: .googleAI(endpoint: .firebaseProxyProd), version: .v1beta)
)
static let googleAI_v1beta_freeTier_bypassProxy = InstanceConfig(
appName: FirebaseAppNames.spark,
apiConfig: APIConfig(service: .googleAI(endpoint: .googleAIBypassProxy), version: .v1beta)
Expand Down
Loading