[Firebase AI] Add support for configuring a thinking budget (#14909)

andrewheard · web-flow · commit a14936c0c798 · 2025-06-16T14:23:40.000-04:00
diff --git a/FirebaseAI/CHANGELOG.md b/FirebaseAI/CHANGELOG.md
@@ -2,6 +2,8 @@
 - [fixed] Fixed `Sendable` warnings introduced in the Xcode 26 beta. (#14947)
 - [added] Added support for setting `title` in string, number and array `Schema`
   types. (#14971)
+- [added] Added support for configuring the "thinking" budget when using Gemini
+  2.5 series models. (#14909)
 
 # 11.13.0
 - [feature] Initial release of the Firebase AI Logic SDK (`FirebaseAI`). This
diff --git a/FirebaseAI/Sources/GenerateContentResponse.swift b/FirebaseAI/Sources/GenerateContentResponse.swift
@@ -26,6 +26,16 @@ public struct GenerateContentResponse: Sendable {
     /// The total number of tokens across the generated response candidates.
     public let candidatesTokenCount: Int
 
+    /// The number of tokens used by the model's internal "thinking" process.
+    ///
+    /// For models that support thinking (like Gemini 2.5 Pro and Flash), this represents the actual
+    /// number of tokens consumed for reasoning before the model generated a response. For models
+    /// that do not support thinking, this value will be `0`.
+    ///
+    /// When thinking is used, this count will be less than or equal to the `thinkingBudget` set in
+    /// the ``ThinkingConfig``.
+    public let thoughtsTokenCount: Int
+
     /// The total number of tokens in both the request and response.
     public let totalTokenCount: Int
 
@@ -330,6 +340,7 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
   enum CodingKeys: CodingKey {
     case promptTokenCount
     case candidatesTokenCount
+    case thoughtsTokenCount
     case totalTokenCount
     case promptTokensDetails
     case candidatesTokensDetails
@@ -340,6 +351,7 @@ extension GenerateContentResponse.UsageMetadata: Decodable {
     promptTokenCount = try container.decodeIfPresent(Int.self, forKey: .promptTokenCount) ?? 0
     candidatesTokenCount =
       try container.decodeIfPresent(Int.self, forKey: .candidatesTokenCount) ?? 0
+    thoughtsTokenCount = try container.decodeIfPresent(Int.self, forKey: .thoughtsTokenCount) ?? 0
     totalTokenCount = try container.decodeIfPresent(Int.self, forKey: .totalTokenCount) ?? 0
     promptTokensDetails =
       try container.decodeIfPresent([ModalityTokenCount].self, forKey: .promptTokensDetails) ?? []
diff --git a/FirebaseAI/Sources/GenerationConfig.swift b/FirebaseAI/Sources/GenerationConfig.swift
@@ -51,6 +51,9 @@ public struct GenerationConfig: Sendable {
   /// Supported modalities of the response.
   let responseModalities: [ResponseModality]?
 
+  /// Configuration for controlling the "thinking" behavior of compatible Gemini models.
+  let thinkingConfig: ThinkingConfig?
+
   /// Creates a new `GenerationConfig` value.
   ///
   /// See the
@@ -152,11 +155,14 @@ public struct GenerationConfig: Sendable {
   ///     > Warning: Specifying response modalities is a **Public Preview** feature, which means
   ///     > that it is not subject to any SLA or deprecation policy and could change in
   ///     > backwards-incompatible ways.
+  ///   - thinkingConfig: Configuration for controlling the "thinking" behavior of compatible Gemini
+  ///     models; see ``ThinkingConfig`` for more details.
   public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
               candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
               presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
               stopSequences: [String]? = nil, responseMIMEType: String? = nil,
-              responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil) {
+              responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil,
+              thinkingConfig: ThinkingConfig? = nil) {
     // Explicit init because otherwise if we re-arrange the above variables it changes the API
     // surface.
     self.temperature = temperature
@@ -170,6 +176,7 @@ public struct GenerationConfig: Sendable {
     self.responseMIMEType = responseMIMEType
     self.responseSchema = responseSchema
     self.responseModalities = responseModalities
+    self.thinkingConfig = thinkingConfig
   }
 }
 
@@ -189,5 +196,6 @@ extension GenerationConfig: Encodable {
     case responseMIMEType = "responseMimeType"
     case responseSchema
     case responseModalities
+    case thinkingConfig
   }
 }
diff --git a/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift b/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift
@@ -0,0 +1,51 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/// Configuration for controlling the "thinking" behavior of compatible Gemini models.
+///
+/// Certain models, like Gemini 2.5 Flash and Pro, utilize a thinking process before generating a
+/// response. This allows them to reason through complex problems and plan a more coherent and
+/// accurate answer.
+public struct ThinkingConfig: Sendable {
+  /// The thinking budget in tokens.
+  ///
+  /// This parameter sets an upper limit on the number of tokens the model can use for its internal
+  /// "thinking" process. A higher budget may result in better quality responses for complex tasks
+  /// but can also increase latency and cost.
+  ///
+  /// If you don't specify a budget (`nil`), the model will automatically determine the appropriate
+  /// amount of thinking based on the complexity of the prompt.
+  ///
+  /// **Model-Specific Behavior:**
+  /// - **Gemini 2.5 Flash:** The budget can range from `0` to `24576`. Setting the budget to `0`
+  ///   disables the thinking process, which prioritizes the lowest latency and cost.
+  /// - **Gemini 2.5 Pro:** The budget must be an integer between `128` and `32768`. Thinking cannot
+  ///   be disabled for this model.
+  ///
+  /// An error will be thrown if you set a thinking budget for a model that does not support this
+  /// feature or if the specified budget is not within the model's supported range.
+  let thinkingBudget: Int?
+
+  /// Initializes a new `ThinkingConfig`.
+  ///
+  /// - Parameters:
+  ///   - thinkingBudget: The maximum number of tokens to be used for the model's thinking process.
+  public init(thinkingBudget: Int? = nil) {
+    self.thinkingBudget = thinkingBudget
+  }
+}
+
+// MARK: - Codable Conformances
+
+extension ThinkingConfig: Encodable {}
diff --git a/FirebaseAI/Tests/TestApp/Sources/Constants.swift b/FirebaseAI/Tests/TestApp/Sources/Constants.swift
@@ -24,5 +24,7 @@ public enum ModelNames {
   public static let gemini2Flash = "gemini-2.0-flash-001"
   public static let gemini2FlashLite = "gemini-2.0-flash-lite-001"
   public static let gemini2FlashExperimental = "gemini-2.0-flash-exp"
+  public static let gemini2_5_FlashPreview = "gemini-2.5-flash-preview-05-20"
+  public static let gemini2_5_ProPreview = "gemini-2.5-pro-preview-06-05"
   public static let gemma3_4B = "gemma-3-4b-it"
 }
diff --git a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift
@@ -76,6 +76,7 @@ struct GenerateContentIntegrationTests {
     let promptTokensDetails = try #require(usageMetadata.promptTokensDetails.first)
     #expect(promptTokensDetails.modality == .text)
     #expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount)
+    #expect(usageMetadata.thoughtsTokenCount == 0)
     // The fields `candidatesTokenCount` and `candidatesTokensDetails` are not included when using
     // Gemma models.
     if modelName.hasPrefix("gemma") {
@@ -119,6 +120,7 @@ struct GenerateContentIntegrationTests {
     let usageMetadata = try #require(response.usageMetadata)
     #expect(usageMetadata.promptTokenCount.isEqual(to: 15, accuracy: tokenCountAccuracy))
     #expect(usageMetadata.candidatesTokenCount.isEqual(to: 1, accuracy: tokenCountAccuracy))
+    #expect(usageMetadata.thoughtsTokenCount == 0)
     #expect(usageMetadata.totalTokenCount
       == usageMetadata.promptTokenCount + usageMetadata.candidatesTokenCount)
     #expect(usageMetadata.promptTokensDetails.count == 1)
@@ -131,6 +133,68 @@ struct GenerateContentIntegrationTests {
     #expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
   }
 
+  @Test(arguments: [
+    (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashPreview, 0),
+    (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashPreview, 24576),
+    // TODO: Add Vertex AI Gemini 2.5 Pro tests when available.
+    // (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_ProPreview, 128),
+    // (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_ProPreview, 32768),
+    (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashPreview, 0),
+    (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashPreview, 24576),
+    (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_ProPreview, 128),
+    (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_ProPreview, 32768),
+    (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashPreview, 0),
+    (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashPreview, 24576),
+  ])
+  func generateContentThinking(_ config: InstanceConfig, modelName: String,
+                               thinkingBudget: Int) async throws {
+    let model = FirebaseAI.componentInstance(config).generativeModel(
+      modelName: modelName,
+      generationConfig: GenerationConfig(
+        temperature: 0.0,
+        topP: 0.0,
+        topK: 1,
+        thinkingConfig: ThinkingConfig(thinkingBudget: thinkingBudget)
+      ),
+      safetySettings: safetySettings
+    )
+    let prompt = "Where is Google headquarters located? Answer with the city name only."
+
+    let response = try await model.generateContent(prompt)
+
+    let text = try #require(response.text).trimmingCharacters(in: .whitespacesAndNewlines)
+    #expect(text == "Mountain View")
+
+    let usageMetadata = try #require(response.usageMetadata)
+    #expect(usageMetadata.promptTokenCount.isEqual(to: 13, accuracy: tokenCountAccuracy))
+    #expect(usageMetadata.promptTokensDetails.count == 1)
+    let promptTokensDetails = try #require(usageMetadata.promptTokensDetails.first)
+    #expect(promptTokensDetails.modality == .text)
+    #expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount)
+    if thinkingBudget == 0 {
+      #expect(usageMetadata.thoughtsTokenCount == 0)
+    } else {
+      #expect(usageMetadata.thoughtsTokenCount <= thinkingBudget)
+    }
+    #expect(usageMetadata.candidatesTokenCount.isEqual(to: 3, accuracy: tokenCountAccuracy))
+    // The `candidatesTokensDetails` field is erroneously omitted when using the Google AI (Gemini
+    // Developer API) backend.
+    if case .googleAI = config.apiConfig.service {
+      #expect(usageMetadata.candidatesTokensDetails.isEmpty)
+    } else {
+      #expect(usageMetadata.candidatesTokensDetails.count == 1)
+      let candidatesTokensDetails = try #require(usageMetadata.candidatesTokensDetails.first)
+      #expect(candidatesTokensDetails.modality == .text)
+      #expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
+    }
+    #expect(usageMetadata.totalTokenCount > 0)
+    #expect(usageMetadata.totalTokenCount == (
+      usageMetadata.promptTokenCount
+        + usageMetadata.thoughtsTokenCount
+        + usageMetadata.candidatesTokenCount
+    ))
+  }
+
   @Test(arguments: [
     InstanceConfig.vertexAI_v1beta,
     InstanceConfig.googleAI_v1beta,
diff --git a/FirebaseAI/Tests/TestApp/Tests/Utilities/InstanceConfig.swift b/FirebaseAI/Tests/TestApp/Tests/Utilities/InstanceConfig.swift
@@ -32,6 +32,10 @@ struct InstanceConfig: Equatable, Encodable {
   static let googleAI_v1beta_staging = InstanceConfig(
     apiConfig: APIConfig(service: .googleAI(endpoint: .firebaseProxyStaging), version: .v1beta)
   )
+  static let googleAI_v1beta_freeTier = InstanceConfig(
+    appName: FirebaseAppNames.spark,
+    apiConfig: APIConfig(service: .googleAI(endpoint: .firebaseProxyProd), version: .v1beta)
+  )
   static let googleAI_v1beta_freeTier_bypassProxy = InstanceConfig(
     appName: FirebaseAppNames.spark,
     apiConfig: APIConfig(service: .googleAI(endpoint: .googleAIBypassProxy), version: .v1beta)

Original file line number	Diff line number	Diff line change
`@@ -24,5 +24,7 @@ public enum ModelNames {`
`24`	`24`	`public static let gemini2Flash = "gemini-2.0-flash-001"`
`25`	`25`	`public static let gemini2FlashLite = "gemini-2.0-flash-lite-001"`
`26`	`26`	`public static let gemini2FlashExperimental = "gemini-2.0-flash-exp"`
	`27`	`+ public static let gemini2_5_FlashPreview = "gemini-2.5-flash-preview-05-20"`
	`28`	`+ public static let gemini2_5_ProPreview = "gemini-2.5-pro-preview-06-05"`
`27`	`29`	`public static let gemma3_4B = "gemma-3-4b-it"`
`28`	`30`	`}`