fixup finish part provider metadata

IMax153 · IMax153 · commit d147cad1404b · 2025-05-05T17:35:22.000-04:00
diff --git a/.changeset/five-colts-eat.md b/.changeset/five-colts-eat.md
@@ -48,12 +48,31 @@ const main = Effect.gen(function*() {
 
 The `AiInput` and `AiResponse` types have been refactored to allow inclusion of more information and metadata from model providers where possible, such as reasoning output and prompt cache token utilization.
 
+In addition, for an `AiResponse` you can now access metadata that is specific to a given provider. For example, when using OpenAi to generate audio, you can check the input and output audio tokens used:
+
+```ts
+import { OpenAiLanguageModel } from "@effect/ai-openai"
+import { Effect, Option } from "effect"
+
+const getDadJoke = OpenAiLanguageModel.generateText({
+  prompt: "Generate a hilarious dad joke"
+})
+
+Effect.gen(function*() {
+  const model = yield* OpenAiLanguageModel.model("gpt-4o")
+  const response = yield* model.use(getDadJoke)
+  const metadata = response.getProviderMetadata(OpenAiLanguageModel.ProviderMetadata)
+  if (Option.isSome(metadata)) {
+    console.log(metadata.value)
+  }
+})
+
+```
+
 ### `AiTool` and `AiToolkit`
 
 The `AiToolkit` has been completely refactored to simplify creating a collection of tools and using those tools in requests to model providers. A new `AiTool` data type has also been introduced to simplify defining tools for a toolkit. `AiToolkit.implement` has been renamed to `AiToolkit.toLayer` for clarity, and defining handlers is now very similar to the way handlers are defined in the `@effect/rpc` library.
 
-In addition, you can now control how many sequential steps are performed by `AiLanguageModel.generateText` and `AiLanguageModel.streamText` via the `maxSteps` option. For example, if `maxSteps` is set to `> 1` and any tools are invoked by the language model, these methods will take care of resolving the tool call and returning the results to the language model for subsequent generation (up to the maximum number of steps specified).
-
 A complete example of an `AiToolkit` implementation and usage can be found below:
 
 
@@ -156,10 +175,7 @@ const makeDadJoke = Effect.gen(function*() {
 
   const response = yield* languageModel.generateText({
     prompt: "Come up with a dad joke about pirates",
-    toolkit,
-    // Allow a maximum of two sequential interactions with the language model
-    // before returning the response
-    maxSteps: 2
+    toolkit
   })
 
   return yield* languageModel.generateText({
diff --git a/package.json b/package.json
@@ -6,7 +6,7 @@
     "clean": "node scripts/clean.mjs",
     "codegen": "pnpm --recursive --parallel --filter \"./packages/**/*\" run codegen",
     "codemod": "node scripts/codemod.mjs",
-    "build": "tsc -b tsconfig.build.json && pnpm --recursive --parallel run build",
+    "build": "tsc -b tsconfig.build.json && pnpm --recursive --parallel --filter \"./packages/**/*\" run build",
     "circular": "node scripts/circular.mjs",
     "test": "vitest",
     "coverage": "vitest --coverage",
diff --git a/packages/ai/anthropic/src/AnthropicClient.ts b/packages/ai/anthropic/src/AnthropicClient.ts
@@ -21,7 +21,7 @@ import * as Redacted from "effect/Redacted"
 import * as Stream from "effect/Stream"
 import { AnthropicConfig } from "./AnthropicConfig.js"
 import * as Generated from "./Generated.js"
-import { resolveFinishReason } from "./internal/utilities.js"
+import * as InternalUtilities from "./internal/utilities.js"
 
 const constDisableValidation = { disableValidation: true } as const
 
@@ -124,6 +124,7 @@ export const make = (options: {
           cacheReadInputTokens: 0,
           cacheWriteInputTokens: 0
         }
+        const metadata: Record<string, unknown> = {}
         return streamRequest<MessageStreamEvent>(
           HttpClientRequest.post("/v1/messages", {
             body: HttpBody.unsafeJson({ ...request, stream: true })
@@ -158,18 +159,19 @@ export const make = (options: {
                   ...usage,
                   outputTokens: chunk.usage.output_tokens
                 }
-                finishReason = resolveFinishReason(chunk.delta.stop_reason)
+                if (Predicate.isNotNullable(chunk.delta.stop_sequence)) {
+                  metadata.stopSequence = chunk.delta.stop_sequence
+                }
+                finishReason = InternalUtilities.resolveFinishReason(chunk.delta.stop_reason)
                 break
               }
               case "message_stop": {
                 parts.push(
-                  new AiResponse.FinishPart(
-                    {
-                      reason: finishReason,
-                      usage
-                    },
-                    constDisableValidation
-                  )
+                  new AiResponse.FinishPart({
+                    usage,
+                    reason: finishReason,
+                    providerMetadata: { [InternalUtilities.ProviderMetadataKey]: metadata }
+                  }, constDisableValidation)
                 )
                 break
               }
diff --git a/packages/ai/anthropic/src/AnthropicLanguageModel.ts b/packages/ai/anthropic/src/AnthropicLanguageModel.ts
@@ -21,7 +21,7 @@ import type { Mutable, Simplify } from "effect/Types"
 import { AnthropicClient } from "./AnthropicClient.js"
 import * as AnthropicTokenizer from "./AnthropicTokenizer.js"
 import type * as Generated from "./Generated.js"
-import { resolveFinishReason } from "./internal/utilities.js"
+import * as InternalUtilities from "./internal/utilities.js"
 
 const constDisableValidation = { disableValidation: true } as const
 
@@ -80,7 +80,7 @@ export declare namespace Config {
  * @since 1.0.0
  * @category Context
  */
-export class ProviderMetadata extends Context.Tag("@effect/ai-anthropic/AnthropicLanguageModel/ProviderMetadata")<
+export class ProviderMetadata extends Context.Tag(InternalUtilities.ProviderMetadataKey)<
   ProviderMetadata,
   ProviderMetadata.Service
 >() {}
@@ -94,6 +94,12 @@ export declare namespace ProviderMetadata {
    * @category Provider Metadata
    */
   export interface Service {
+    /**
+     * Which custom stop sequence was generated, if any.
+     *
+     * Will be a non-null string if one of your custom stop sequences was
+     * generated.
+     */
     readonly stopSequence?: string
   }
 }
@@ -496,7 +502,7 @@ const makeResponse = Effect.fnUntraced(
     parts.push(
       new AiResponse.FinishPart({
         // Anthropic always returns a non-null `stop_reason` for non-streaming responses
-        reason: resolveFinishReason(response.stop_reason!),
+        reason: InternalUtilities.resolveFinishReason(response.stop_reason!),
         usage: new AiResponse.Usage({
           inputTokens: response.usage.input_tokens,
           outputTokens: response.usage.output_tokens,
@@ -505,9 +511,7 @@ const makeResponse = Effect.fnUntraced(
           cacheReadInputTokens: response.usage.cache_read_input_tokens ?? 0,
           cacheWriteInputTokens: response.usage.cache_creation_input_tokens ?? 0
         }),
-        providerMetadata: {
-          [ProviderMetadata.key]: metadata
-        }
+        providerMetadata: { [InternalUtilities.ProviderMetadataKey]: metadata }
       }, constDisableValidation)
     )
     return new AiResponse.AiResponse({
diff --git a/packages/ai/anthropic/src/internal/utilities.ts b/packages/ai/anthropic/src/internal/utilities.ts
@@ -1,6 +1,9 @@
 import type * as AiResponse from "@effect/ai/AiResponse"
 import * as Predicate from "effect/Predicate"
 
+/** @internal */
+export const ProviderMetadataKey = "@effect/ai-anthropic/AnthropicLanguageModel/ProviderMetadata"
+
 const finishReasonMap: Record<string, AiResponse.FinishReason> = {
   end_turn: "stop",
   max_tokens: "length",
diff --git a/packages/ai/openai/src/OpenAiClient.ts b/packages/ai/openai/src/OpenAiClient.ts
@@ -19,7 +19,7 @@ import * as Predicate from "effect/Predicate"
 import * as Redacted from "effect/Redacted"
 import * as Stream from "effect/Stream"
 import * as Generated from "./Generated.js"
-import { resolveFinishReason } from "./internal/utilities.js"
+import * as InternalUtilities from "./internal/utilities.js"
 import { OpenAiConfig } from "./OpenAiConfig.js"
 
 const constDisableValidation = { disableValidation: true } as const
@@ -167,10 +167,19 @@ export const make = (options: {
 
               // Track the finish reason for the response
               if (Predicate.isNotNullable(choice.finish_reason)) {
-                finishReason = resolveFinishReason(choice.finish_reason)
+                finishReason = InternalUtilities.resolveFinishReason(choice.finish_reason)
                 if (finishReason === "tool-calls" && Predicate.isNotUndefined(toolCallIndex)) {
                   finishToolCall(toolCalls[toolCallIndex], parts)
                 }
+                if (finishReason === "stop") {
+                  parts.push(
+                    new AiResponse.FinishPart({
+                      usage,
+                      reason: finishReason,
+                      providerMetadata: { [InternalUtilities.ProviderMetadataKey]: metadata }
+                    }, constDisableValidation)
+                  )
+                }
               }
 
               // Handle text deltas
diff --git a/packages/ai/openai/src/OpenAiLanguageModel.ts b/packages/ai/openai/src/OpenAiLanguageModel.ts
@@ -19,6 +19,7 @@ import type { Span } from "effect/Tracer"
 import type { Simplify } from "effect/Types"
 import type * as Generated from "./Generated.js"
 import { resolveFinishReason } from "./internal/utilities.js"
+import * as InternalUtilities from "./internal/utilities.js"
 import { OpenAiClient } from "./OpenAiClient.js"
 import { addGenAIAnnotations } from "./OpenAiTelemetry.js"
 import * as OpenAiTokenizer from "./OpenAiTokenizer.js"
@@ -80,7 +81,7 @@ export declare namespace Config {
  * @since 1.0.0
  * @category Context
  */
-export class ProviderMetadata extends Context.Tag("@effect/ai-openai/OpenAiLanguageModel/ProviderMetadata")<
+export class ProviderMetadata extends Context.Tag(InternalUtilities.ProviderMetadataKey)<
   ProviderMetadata,
   ProviderMetadata.Service
 >() {}
@@ -94,7 +95,38 @@ export declare namespace ProviderMetadata {
    * @category Provider Metadata
    */
   export interface Service {
-    readonly stopSequence?: string
+    /**
+     * Specifies the latency tier that was used for processing the request.
+     */
+    readonly serviceTier?: string
+    /**
+     * This fingerprint represents the backend configuration that the model
+     * executes with.
+     *
+     * Can be used in conjunction with the seed request parameter to understand
+     * when backend changes have been made that might impact determinism.
+     */
+    readonly systemFingerprint: string
+    /**
+     * When using predicted outputs, the number of tokens in the prediction
+     * that appeared in the completion.
+     */
+    readonly acceptedPredictionTokens: number
+    /**
+     * When using predicted outputs, the number of tokens in the prediction
+     * that did not appear in the completion. However, like reasoning tokens,
+     * these tokens are still counted in the total completion tokens for
+     * purposes of billing, output, and context window limits.
+     */
+    readonly rejectedPredictionTokens: number
+    /**
+     * Audio tokens present in the prompt.
+     */
+    readonly inputAudioTokens: number
+    /**
+     * Audio tokens generated by the model.
+     */
+    readonly outputAudioTokens: number
   }
 }
 
@@ -425,16 +457,16 @@ const makeResponse = Effect.fnUntraced(function*(
     metadata.systemFingerprint = response.system_fingerprint
   }
   if (Predicate.isNotUndefined(response.usage?.completion_tokens_details?.accepted_prediction_tokens)) {
-    metadata.acceptedPredictionTokens = response.usage?.completion_tokens_details?.accepted_prediction_tokens
+    metadata.acceptedPredictionTokens = response.usage?.completion_tokens_details?.accepted_prediction_tokens ?? 0
   }
   if (Predicate.isNotUndefined(response.usage?.completion_tokens_details?.rejected_prediction_tokens)) {
-    metadata.rejectedPredictionTokens = response.usage?.completion_tokens_details?.rejected_prediction_tokens
+    metadata.rejectedPredictionTokens = response.usage?.completion_tokens_details?.rejected_prediction_tokens ?? 0
   }
   if (Predicate.isNotUndefined(response.usage?.prompt_tokens_details?.audio_tokens)) {
-    metadata.inputAudioTokens = response.usage?.prompt_tokens_details?.audio_tokens
+    metadata.inputAudioTokens = response.usage?.prompt_tokens_details?.audio_tokens ?? 0
   }
   if (Predicate.isNotUndefined(response.usage?.completion_tokens_details?.audio_tokens)) {
-    metadata.outputAudioTokens = response.usage?.completion_tokens_details?.audio_tokens
+    metadata.outputAudioTokens = response.usage?.completion_tokens_details?.audio_tokens ?? 0
   }
   parts.push(
     new AiResponse.FinishPart({
@@ -447,9 +479,7 @@ const makeResponse = Effect.fnUntraced(function*(
         cacheReadInputTokens: response.usage?.prompt_tokens_details?.cached_tokens ?? 0,
         cacheWriteInputTokens: 0
       }, constDisableValidation),
-      providerMetadata: {
-        [ProviderMetadata.key]: metadata
-      }
+      providerMetadata: { [InternalUtilities.ProviderMetadataKey]: metadata }
     }, constDisableValidation)
   )
   if (Predicate.isNotNullable(choice.message.content)) {
diff --git a/packages/ai/openai/src/internal/utilities.ts b/packages/ai/openai/src/internal/utilities.ts
@@ -1,6 +1,9 @@
 import type * as AiResponse from "@effect/ai/AiResponse"
 import * as Predicate from "effect/Predicate"
 
+/** @internal */
+export const ProviderMetadataKey = "@effect/ai-openai/OpenAiLanguageModel/ProviderMetadata"
+
 const finishReasonMap: Record<string, AiResponse.FinishReason> = {
   content_filter: "content-filter",
   function_call: "tool-calls",