Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 12f95c6

Browse files
committed
Add proper support for array types in structured outputs
1 parent 5682e8d commit 12f95c6

File tree

11 files changed

+209
-36
lines changed

11 files changed

+209
-36
lines changed

assets/javascripts/discourse/components/modal/ai-persona-response-format-editor.gjs

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,34 @@ import { i18n } from "discourse-i18n";
1010
export default class AiPersonaResponseFormatEditor extends Component {
1111
@tracked showJsonEditorModal = false;
1212

13-
jsonSchema = {
14-
type: "array",
15-
uniqueItems: true,
16-
title: i18n("discourse_ai.ai_persona.response_format.modal.root_title"),
17-
items: {
18-
type: "object",
19-
title: i18n("discourse_ai.ai_persona.response_format.modal.key_title"),
20-
properties: {
21-
key: {
22-
type: "string",
23-
},
24-
type: {
25-
type: "string",
26-
enum: ["string", "integer", "boolean"],
27-
},
13+
jsonSchema = {
14+
type: "array",
15+
uniqueItems: true,
16+
title: i18n("discourse_ai.ai_persona.response_format.modal.root_title"),
17+
items: {
18+
type: "object",
19+
title: i18n("discourse_ai.ai_persona.response_format.modal.key_title"),
20+
properties: {
21+
key: {
22+
type: "string",
2823
},
24+
type: {
25+
type: "string",
26+
enum: ["string", "integer", "boolean", "array"],
27+
},
28+
array_type: {
29+
type: "string",
30+
enum: ["string", "integer", "boolean"],
31+
options: {
32+
dependencies: {
33+
type: "array"
34+
},
35+
},
36+
}
2937
},
30-
};
38+
required: ["key", "type"],
39+
},
40+
};
3141

3242
get editorTitle() {
3343
return i18n("discourse_ai.ai_persona.response_format.title");
@@ -41,7 +51,11 @@ export default class AiPersonaResponseFormatEditor extends Component {
4151
const toDisplay = {};
4252

4353
this.args.data.response_format.forEach((keyDesc) => {
44-
toDisplay[keyDesc.key] = keyDesc.type;
54+
if (keyDesc.type === "array") {
55+
toDisplay[keyDesc.key] = `[${keyDesc.array_type}]`;
56+
} else {
57+
toDisplay[keyDesc.key] = keyDesc.type;
58+
}
4559
});
4660

4761
return prettyJSON(toDisplay);

lib/completions/json_streaming_tracker.rb

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,36 @@ def initialize(stream_consumer)
99
@stream_consumer = stream_consumer
1010
@current_key = nil
1111
@current_value = nil
12+
@tracking_array = false
1213
@parser = DiscourseAi::Completions::JsonStreamingParser.new
1314

1415
@parser.key do |k|
1516
@current_key = k
1617
@current_value = nil
1718
end
1819

19-
@parser.value do |v|
20+
@parser.value do |value|
2021
if @current_key
21-
stream_consumer.notify_progress(@current_key, v)
22-
@current_key = nil
22+
if @tracking_array
23+
@current_value << value
24+
stream_consumer.notify_progress(@current_key, @current_value)
25+
else
26+
stream_consumer.notify_progress(@current_key, value)
27+
@current_key = nil
28+
end
2329
end
2430
end
31+
32+
@parser.start_array do
33+
@tracking_array = true
34+
@current_value = []
35+
end
36+
37+
@parser.end_array do
38+
@tracking_array = false
39+
@current_key = nil
40+
@current_value = nil
41+
end
2542
end
2643

2744
def broken?
@@ -46,8 +63,9 @@ def <<(raw_json)
4663
end
4764

4865
if @parser.state == :start_string && @current_key
66+
buffered = @tracking_array ? [@parser.buf] : @parser.buf
4967
# this is is worth notifying
50-
stream_consumer.notify_progress(@current_key, @parser.buf)
68+
stream_consumer.notify_progress(@current_key, buffered)
5169
end
5270

5371
@current_key = nil if @parser.state == :end_value

lib/completions/structured_output.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def read_buffered_property(prop_name)
4545
@property_cursors[prop_name] = @tracked[prop_name].length
4646
unread
4747
else
48-
# Ints and bools are always returned as is.
48+
# Ints and bools, and arrays are always returned as is.
4949
@tracked[prop_name]
5050
end
5151
end

lib/inferred_concepts/applier.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,13 @@ def self.match_concepts_to_content(content, concept_list)
122122
)
123123

124124
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
125+
structured_output = nil
125126

126-
response = bot.reply(context)
127+
bot.reply(context) do |partial, _, type|
128+
structured_output = partial if type == :structured_output
129+
end
127130

128-
matching_concepts = JSON.parse(response[0][0]).dig("matching_concepts")
129-
130-
matching_concepts || []
131+
structured_output&.read_buffered_property(:matching_concepts) || []
131132
end
132133
end
133134
end

lib/inferred_concepts/finder.rb

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@ def self.identify_concepts(content)
2424
)
2525

2626
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
27+
structured_output = nil
2728

28-
response = bot.reply(context)
29+
bot.reply(context) do |partial, _, type|
30+
structured_output = partial if type == :structured_output
31+
end
2932

30-
concepts = JSON.parse(response[0][0]).dig("concepts")
31-
concepts || []
33+
structured_output&.read_buffered_property(:concepts) || []
3234
end
3335

3436
# Creates or finds concepts in the database from provided names
@@ -161,10 +163,13 @@ def self.deduplicate_concepts(concept_names)
161163
DiscourseAi::Personas::BotContext.new(messages: [input], user: Discourse.system_user)
162164

163165
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
166+
structured_output = nil
164167

165-
response = bot.reply(context)
168+
bot.reply(context) do |partial, _, type|
169+
structured_output = partial if type == :structured_output
170+
end
166171

167-
concepts = JSON.parse(response[0][0]).dig("streamlined_tags")
172+
structured_output&.read_buffered_property(:streamlined_tags) || []
168173
end
169174
end
170175
end

lib/personas/bot.rb

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,13 @@ def build_json_schema(response_format)
318318
response_format
319319
.to_a
320320
.reduce({}) do |memo, format|
321-
memo[format["key"].to_sym] = { type: format["type"] }
322-
memo[format["key"].to_sym][:items] = format["items"] if format["items"]
321+
type_desc = { type: format["type"] }
322+
323+
if format["type"] == "array"
324+
type_desc[:items] = { type: format["array_type"] || "string" }
325+
end
326+
327+
memo[format["key"].to_sym] = type_desc
323328
memo
324329
end
325330

lib/personas/concept_deduplicator.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def system_prompt
4646
end
4747

4848
def response_format
49-
[{ "key" => "streamlined_tags", "type" => "array" }]
49+
[{ "key" => "streamlined_tags", "type" => "array", "array_type" => "string" }]
5050
end
5151
end
5252
end

lib/personas/concept_finder.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def system_prompt
4242
end
4343

4444
def response_format
45-
[{ "key" => "concepts", "type" => "array", "items" => { "type" => "string" } }]
45+
[{ "key" => "concepts", "type" => "array", "array_type" => "string" }]
4646
end
4747
end
4848
end

lib/personas/concept_matcher.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def system_prompt
3636
end
3737

3838
def response_format
39-
[{ "key" => "matching_concepts", "type" => "array" }]
39+
[{ "key" => "matching_concepts", "type" => "array", "array_type" => "string" }]
4040
end
4141
end
4242
end

spec/lib/completions/endpoints/aws_bedrock_spec.rb

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,5 +672,87 @@ def encode_message(message)
672672
expect(structured_output.read_buffered_property(:key)).to eq("Hello!\n There")
673673
end
674674
end
675+
676+
it "works with JSON schema array types" do
677+
schema = {
678+
type: "json_schema",
679+
json_schema: {
680+
name: "reply",
681+
schema: {
682+
type: "object",
683+
properties: {
684+
plain: {
685+
type: "string",
686+
},
687+
key: {
688+
type: "array",
689+
items: {
690+
type: "string",
691+
},
692+
},
693+
},
694+
required: %w[plain key],
695+
additionalProperties: false,
696+
},
697+
strict: true,
698+
},
699+
}
700+
701+
messages =
702+
[
703+
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
704+
{ type: "content_block_delta", delta: { text: "\"" } },
705+
{ type: "content_block_delta", delta: { text: "key" } },
706+
{ type: "content_block_delta", delta: { text: "\":" } },
707+
{ type: "content_block_delta", delta: { text: " [\"" } },
708+
{ type: "content_block_delta", delta: { text: "Hello!" } },
709+
{ type: "content_block_delta", delta: { text: " I am" } },
710+
{ type: "content_block_delta", delta: { text: " a " } },
711+
{ type: "content_block_delta", delta: { text: "chunk\"," } },
712+
{ type: "content_block_delta", delta: { text: "\"There" } },
713+
{ type: "content_block_delta", delta: { text: "\"]," } },
714+
{ type: "content_block_delta", delta: { text: " \"plain" } },
715+
{ type: "content_block_delta", delta: { text: "\":\"" } },
716+
{ type: "content_block_delta", delta: { text: "I'm here" } },
717+
{ type: "content_block_delta", delta: { text: " too\"}" } },
718+
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
719+
].map { |message| encode_message(message) }
720+
721+
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
722+
request = nil
723+
bedrock_mock.with_chunk_array_support do
724+
stub_request(
725+
:post,
726+
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
727+
)
728+
.with do |inner_request|
729+
request = inner_request
730+
true
731+
end
732+
.to_return(status: 200, body: messages)
733+
734+
structured_output = nil
735+
proxy.generate("hello world", response_format: schema, user: user) do |partial|
736+
structured_output = partial
737+
end
738+
739+
expected = {
740+
"max_tokens" => 4096,
741+
"anthropic_version" => "bedrock-2023-05-31",
742+
"messages" => [
743+
{ "role" => "user", "content" => "hello world" },
744+
{ "role" => "assistant", "content" => "{" },
745+
],
746+
"system" => "You are a helpful bot",
747+
}
748+
expect(JSON.parse(request.body)).to eq(expected)
749+
750+
expect(structured_output.read_buffered_property(:key)).to contain_exactly(
751+
"Hello! I am a chunk",
752+
"There",
753+
)
754+
expect(structured_output.read_buffered_property(:plain)).to eq("I'm here too")
755+
end
756+
end
675757
end
676758
end

0 commit comments

Comments
 (0)