Skip to content

Commit f4fbf2e

Browse files
authored
Evaluation Framework (#12)
- Added evaluation framework - Added logger package - Improve configured agent prompts - Improve schema extraction tool - Add a new schema extraction tool
1 parent bbbda38 commit f4fbf2e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+10856
-1013
lines changed

config/gni/devtools_grd_files.gni

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,23 @@ grd_files_release_sources = [
640640
"front_end/panels/ai_chat/agent_framework/AgentRunner.js",
641641
"front_end/panels/ai_chat/agent_framework/ConfigurableAgentTool.js",
642642
"front_end/panels/ai_chat/agent_framework/implementation/ConfiguredAgents.js",
643+
"front_end/panels/ai_chat/common/MarkdownViewerUtil.js",
644+
"front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.js",
645+
"front_end/panels/ai_chat/evaluation/runner/EvaluationRunner.js",
646+
"front_end/panels/ai_chat/evaluation/framework/GenericToolEvaluator.js",
647+
"front_end/panels/ai_chat/evaluation/framework/judges/LLMEvaluator.js",
648+
"front_end/panels/ai_chat/evaluation/framework/MarkdownReportGenerator.js",
649+
"front_end/panels/ai_chat/evaluation/framework/types.js",
650+
"front_end/panels/ai_chat/evaluation/test-cases/action-agent-tests.js",
651+
"front_end/panels/ai_chat/evaluation/test-cases/research-agent-tests.js",
652+
"front_end/panels/ai_chat/evaluation/test-cases/schema-extractor-tests.js",
653+
"front_end/panels/ai_chat/evaluation/test-cases/streamlined-schema-extractor-tests.js",
654+
"front_end/panels/ai_chat/evaluation/utils/ErrorHandlingUtils.js",
655+
"front_end/panels/ai_chat/evaluation/utils/EvaluationTypes.js",
656+
"front_end/panels/ai_chat/evaluation/utils/PromptTemplates.js",
657+
"front_end/panels/ai_chat/evaluation/utils/ResponseParsingUtils.js",
658+
"front_end/panels/ai_chat/evaluation/utils/SanitizationUtils.js",
659+
"front_end/panels/ai_chat/ui/EvaluationDialog.js",
643660
"front_end/panels/animation/animation-meta.js",
644661
"front_end/panels/animation/animation.js",
645662
"front_end/panels/application/application-meta.js",

front_end/panels/ai_chat/BUILD.gn

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ devtools_module("ai_chat") {
2121
"ui/HelpDialog.ts",
2222
"ui/SettingsDialog.ts",
2323
"ui/PromptEditDialog.ts",
24+
"ui/EvaluationDialog.ts",
2425
"ai_chat_impl.ts",
2526
"core/Graph.ts",
2627
"core/State.ts",
2728
"core/Types.ts",
2829
"core/AgentService.ts",
30+
"core/Constants.ts",
2931
"core/GraphConfigs.ts",
3032
"core/OpenAIClient.ts",
3133
"core/LiteLLMClient.ts",
@@ -38,18 +40,36 @@ devtools_module("ai_chat") {
3840
"core/ChatLiteLLM.ts",
3941
"core/GraphHelpers.ts",
4042
"core/StateGraph.ts",
43+
"core/Logger.ts",
4144
"tools/Tools.ts",
4245
"tools/CritiqueTool.ts",
4346
"tools/FetcherTool.ts",
4447
"tools/FinalizeWithCritiqueTool.ts",
4548
"tools/VisitHistoryManager.ts",
4649
"tools/HTMLToMarkdownTool.ts",
4750
"tools/SchemaBasedExtractorTool.ts",
51+
"tools/StreamlinedSchemaExtractorTool.ts",
4852
"tools/CombinedExtractionTool.ts",
4953
"tools/FullPageAccessibilityTreeToMarkdownTool.ts",
5054
"agent_framework/ConfigurableAgentTool.ts",
5155
"agent_framework/AgentRunner.ts",
5256
"agent_framework/implementation/ConfiguredAgents.ts",
57+
"evaluation/framework/types.ts",
58+
"evaluation/framework/judges/LLMEvaluator.ts",
59+
"evaluation/framework/GenericToolEvaluator.ts",
60+
"evaluation/framework/MarkdownReportGenerator.ts",
61+
"evaluation/utils/SanitizationUtils.ts",
62+
"evaluation/utils/ErrorHandlingUtils.ts",
63+
"evaluation/utils/EvaluationTypes.ts",
64+
"evaluation/utils/PromptTemplates.ts",
65+
"evaluation/utils/ResponseParsingUtils.ts",
66+
"evaluation/test-cases/schema-extractor-tests.ts",
67+
"evaluation/test-cases/streamlined-schema-extractor-tests.ts",
68+
"evaluation/test-cases/research-agent-tests.ts",
69+
"evaluation/test-cases/action-agent-tests.ts",
70+
"evaluation/runner/EvaluationRunner.ts",
71+
"evaluation/runner/VisionAgentEvaluationRunner.ts",
72+
"common/MarkdownViewerUtil.ts",
5373
"common/utils.ts",
5474
"common/log.ts",
5575
"common/context.ts",
@@ -79,11 +99,13 @@ _ai_chat_sources = [
7999
"ui/HelpDialog.ts",
80100
"ui/PromptEditDialog.ts",
81101
"ui/SettingsDialog.ts",
102+
"ui/EvaluationDialog.ts",
82103
"ai_chat_impl.ts",
83104
"core/Graph.ts",
84105
"core/State.ts",
85106
"core/Types.ts",
86107
"core/AgentService.ts",
108+
"core/Constants.ts",
87109
"core/GraphConfigs.ts",
88110
"core/OpenAIClient.ts",
89111
"core/LiteLLMClient.ts",
@@ -108,6 +130,22 @@ _ai_chat_sources = [
108130
"agent_framework/ConfigurableAgentTool.ts",
109131
"agent_framework/AgentRunner.ts",
110132
"agent_framework/implementation/ConfiguredAgents.ts",
133+
"evaluation/framework/types.ts",
134+
"evaluation/framework/judges/LLMEvaluator.ts",
135+
"evaluation/framework/GenericToolEvaluator.ts",
136+
"evaluation/framework/MarkdownReportGenerator.ts",
137+
"evaluation/utils/SanitizationUtils.ts",
138+
"evaluation/utils/ErrorHandlingUtils.ts",
139+
"evaluation/utils/EvaluationTypes.ts",
140+
"evaluation/utils/PromptTemplates.ts",
141+
"evaluation/utils/ResponseParsingUtils.ts",
142+
"evaluation/test-cases/schema-extractor-tests.ts",
143+
"evaluation/test-cases/streamlined-schema-extractor-tests.ts",
144+
"evaluation/test-cases/research-agent-tests.ts",
145+
"evaluation/test-cases/action-agent-tests.ts",
146+
"evaluation/runner/EvaluationRunner.ts",
147+
"evaluation/runner/VisionAgentEvaluationRunner.ts",
148+
"common/MarkdownViewerUtil.ts",
111149
"common/utils.ts",
112150
"common/log.ts",
113151
"common/context.ts",

0 commit comments

Comments
 (0)