Skip to content

Commit c47cefe

Browse files
authored
streaming_usage: - add support for including usage info in streaming requests (#59)
1 parent cdda2af commit c47cefe

File tree

2 files changed

+31
-2
lines changed

2 files changed

+31
-2
lines changed

pkg/adapter/chat.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ func (g *GeminiAdapter) GenerateStreamContent(
8888
iter := cs.SendMessageStream(ctx, messages[len(messages)-1].Parts...)
8989

9090
dataChan := make(chan string)
91-
go handleStreamIter(g.model, iter, dataChan)
91+
go handleStreamIter(g.model, iter, dataChan, req.StreamOptions.IncludeUsage)
9292

9393
return dataChan, nil
9494
}
9595

96-
func handleStreamIter(model string, iter *genai.GenerateContentResponseIterator, dataChan chan string) {
96+
func handleStreamIter(model string, iter *genai.GenerateContentResponseIterator, dataChan chan string, sendUsage bool) {
9797
defer close(dataChan)
9898

9999
respID := util.GetUUID()
@@ -157,6 +157,28 @@ func handleStreamIter(model string, iter *genai.GenerateContentResponseIterator,
157157
dataChan <- string(resp)
158158
}
159159

160+
sendUsageMetadata := func(usage *genai.UsageMetadata) {
161+
if usage == nil || !sendUsage {
162+
return
163+
}
164+
openaiResp := &CompletionResponse{
165+
ID: fmt.Sprintf("chatcmpl-%s", respID),
166+
Object: "chat.completion.chunk",
167+
Created: created,
168+
Model: GetMappedModel(model),
169+
Choices: []CompletionChoice{},
170+
Usage: openai.Usage{
171+
PromptTokens: int(usage.PromptTokenCount),
172+
CompletionTokens: int(usage.CandidatesTokenCount),
173+
TotalTokens: int(usage.TotalTokenCount),
174+
},
175+
}
176+
resp, _ := json.Marshal(openaiResp)
177+
dataChan <- string(resp)
178+
}
179+
180+
var usageMetadata *genai.UsageMetadata
181+
160182
for {
161183
genaiResp, err := iter.Next()
162184
if err == iterator.Done {
@@ -165,6 +187,9 @@ func handleStreamIter(model string, iter *genai.GenerateContentResponseIterator,
165187
// Send all remaining text at once when done
166188
sendFullText(textBuffer)
167189
}
190+
// per https://community.openai.com/t/usage-stats-now-available-when-using-streaming-with-the-chat-completions-api-or-completions-api/738156
191+
// the usage is sent after everything else
192+
sendUsageMetadata(usageMetadata)
168193
break
169194
}
170195

@@ -208,6 +233,8 @@ func handleStreamIter(model string, iter *genai.GenerateContentResponseIterator,
208233
dataChan <- string(resp)
209234
break
210235
}
236+
// gemini returns the usage data on each response, adding to it each time, so always get the latest
237+
usageMetadata = genaiResp.UsageMetadata
211238

212239
// Process each candidate's text content
213240
for _, candidate := range genaiResp.Candidates {

pkg/adapter/struct.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ type ChatCompletionRequest struct {
2525
TopP float32 `json:"top_p" binding:"omitempty"`
2626
N int32 `json:"n" binding:"omitempty"`
2727
Stream bool `json:"stream" binding:"omitempty"`
28+
StreamOptions openai.StreamOptions `json:"stream_options,omitempty"`
2829
Stop []string `json:"stop,omitempty"`
2930
Tools []openai.Tool `json:"tools,omitempty"`
3031
ToolChoice any `json:"tool_choice,omitempty"`
@@ -151,6 +152,7 @@ type CompletionResponse struct {
151152
Created int64 `json:"created"`
152153
Model string `json:"model"`
153154
Choices []CompletionChoice `json:"choices"`
155+
Usage openai.Usage `json:"usage"`
154156
}
155157

156158
type StringArray []string

0 commit comments

Comments
 (0)