Skip to content

Commit df1089c

Browse files
committed
work in progress
1 parent 6ed6b52 commit df1089c

File tree

6 files changed

+268
-110
lines changed

6 files changed

+268
-110
lines changed

config/locales/server.en.yml

+8
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,9 @@ en:
296296
designer:
297297
name: Designer
298298
description: "AI Bot specialized in generating and editing images"
299+
forum_researcher:
300+
name: Forum Researcher
301+
description: "AI Bot specialized in deep research for the forum"
299302
sql_helper:
300303
name: SQL Helper
301304
description: "AI Bot specialized in helping craft SQL queries on this Discourse instance"
@@ -385,6 +388,7 @@ en:
385388
javascript_evaluator: "Evaluate JavaScript"
386389
create_image: "Creating image"
387390
edit_image: "Editing image"
391+
researcher: "Researching"
388392
tool_help:
389393
read_artifact: "Read a web artifact using the AI Bot"
390394
update_artifact: "Update a web artifact using the AI Bot"
@@ -411,6 +415,7 @@ en:
411415
dall_e: "Generate image using DALL-E 3"
412416
search_meta_discourse: "Search Meta Discourse"
413417
javascript_evaluator: "Evaluate JavaScript"
418+
researcher: "Research forum information using the AI Bot"
414419
tool_description:
415420
read_artifact: "Read a web artifact using the AI Bot"
416421
update_artifact: "Updated a web artifact using the AI Bot"
@@ -445,6 +450,9 @@ en:
445450
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
446451
setting_context: "Reading context for: %{setting_name}"
447452
schema: "%{tables}"
453+
researcher:
454+
one: "Found %{count} result for '%{filter}'"
455+
other: "Found %{count} results for '%{filter}'"
448456
search_settings:
449457
one: "Found %{count} result for '%{query}'"
450458
other: "Found %{count} results for '%{query}'"

lib/personas/forum_researcher.rb

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#frozen_string_literal: true
2+
3+
module DiscourseAi
4+
module Personas
5+
class ForumResearcher < Persona
6+
def tools
7+
[Tools::Researcher]
8+
end
9+
10+
def system_prompt
11+
<<~PROMPT
12+
You are a helpful Discourse assistant specializing in forum research.
13+
You _understand_ and **generate** Discourse Markdown.
14+
15+
You live in the forum with the URL: {site_url}
16+
The title of your site: {site_title}
17+
The description is: {site_description}
18+
The participants in this conversation are: {participants}
19+
The date now is: {time}, much has changed since you were trained.
20+
21+
As a forum researcher, you will help users come up with the correct research criteria to
22+
properly analyze the forum data.
23+
24+
You will always start with a dry_run of the proposed research criteria.
25+
PROMPT
26+
end
27+
end
28+
end
29+
end

lib/personas/persona.rb

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def system_personas
4747
Summarizer => -11,
4848
ShortSummarizer => -12,
4949
Designer => -13,
50+
ForumResearcher => -14,
5051
}
5152
end
5253

@@ -99,6 +100,7 @@ def all_available_tools
99100
Tools::GithubSearchFiles,
100101
Tools::WebBrowser,
101102
Tools::JavascriptEvaluator,
103+
Tools::Researcher,
102104
]
103105

104106
if SiteSetting.ai_artifact_security.in?(%w[lax strict])

lib/personas/tools/researcher.rb

+7-28
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def name
4141

4242
def custom_system_message
4343
<<~TEXT
44-
Use the researcher tool to analyze patterns and extract insights from forum content.
45-
For complex research tasks, start with a dry run to gauge the scope before processing.
44+
Use the researcher tool to analyze patterns and extract insights from forum content.
45+
For complex research tasks, start with a dry run to gauge the scope before processing.
4646
TEXT
4747
end
4848

@@ -54,26 +54,14 @@ def accepted_options
5454
def invoke
5555
@last_filter = parameters[:filter] || ""
5656
goal = parameters[:goal] || ""
57-
dry_run = parameters[:dry_run].nil? ? true : parameters[:dry_run]
5857

59-
yield(I18n.t("discourse_ai.ai_bot.researching", filter: @last_filter, goal: goal))
58+
#dry_run = parameters[:dry_run].nil? ? true : parameters[:dry_run]
59+
#yield(I18n.t("discourse_ai.ai_bot.researching", filter: @last_filter, goal: goal))
6060

61-
# Parse the filter string to extract components
62-
filter_components = parse_filter(@last_filter)
61+
filter = DiscourseAi::Utils::Research::Filter.new(@last_filter)
6362

64-
# Determine max results
65-
max_results = calculate_max_results(llm)
66-
67-
# In a real implementation, we would query the database here
68-
# For now, just simulate the behavior
69-
if dry_run
70-
@result_count = simulate_count(filter_components)
71-
{ count: @result_count, filter: @last_filter, goal: goal, dry_run: true }
72-
else
73-
results = perform_research(filter_components, goal, max_results)
74-
@result_count = results[:rows]&.length || 0
75-
results
76-
end
63+
@result_count = filter.search.count
64+
{ dry_run: true, goal: goal, filter: @last_filter, number_of_results: @result_count }
7765
end
7866

7967
protected
@@ -84,15 +72,6 @@ def description_args
8472

8573
private
8674

87-
def parse_filter(filter_string)
88-
# This would parse the filter string into components
89-
# For example, extracting username, date ranges, categories, tags, etc.
90-
# Simplified implementation for now
91-
components = {}
92-
components[:raw] = filter_string
93-
components
94-
end
95-
9675
def simulate_count(filter_components)
9776
# In a real implementation, this would query the database to get a count
9877
# For now, return a simulated count

lib/utils/research/filter.rb

+154-82
Original file line numberDiff line numberDiff line change
@@ -4,103 +4,175 @@ module DiscourseAi
44
module Utils
55
module Research
66
class Filter
7-
attr_reader :raw_filter, :parsed_components, :current_offset, :batch_size
8-
9-
VALID_FILTER_PATTERNS = {
10-
user: /\@(\w+)/,
11-
before: /before:(\d{4}-\d{2}-\d{2})/,
12-
after: /after:(\d{4}-\d{2}-\d{2})/,
13-
category: /category:([a-zA-Z0-9_\-]+)/,
14-
tag: /tag:([a-zA-Z0-9_\-]+)/,
15-
group: /group:([a-zA-Z0-9_\-]+)/,
16-
status: /status:(open|closed|archived|noreplies|single_user)/,
17-
}
18-
19-
DEFAULT_BATCH_SIZE = 20
20-
21-
def initialize(filter_string, batch_size: DEFAULT_BATCH_SIZE)
22-
@raw_filter = filter_string.to_s
23-
@batch_size = batch_size
24-
@current_offset = 0
25-
@parsed_components = parse_filter
26-
end
27-
28-
def parse_filter
29-
components = {
30-
users: [],
31-
categories: [],
32-
tags: [],
33-
groups: [],
34-
date_range: {
35-
},
36-
status: nil,
37-
raw: @raw_filter,
38-
}
39-
40-
# Extract user mentions
41-
@raw_filter
42-
.scan(VALID_FILTER_PATTERNS[:user])
43-
.each { |match| components[:users] << match[0] }
44-
45-
# Extract date ranges
46-
if before_match = @raw_filter.match(VALID_FILTER_PATTERNS[:before])
47-
components[:date_range][:before] = before_match[1]
7+
# Stores custom filter handlers
8+
def self.register_filter(matcher, &block)
9+
(@registered_filters ||= {})[matcher] = block
10+
end
11+
12+
def self.registered_filters
13+
@registered_filters ||= {}
14+
end
15+
16+
def self.word_to_date(str)
17+
::Search.word_to_date(str)
18+
end
19+
20+
attr_reader :term, :filters, :order, :guardian, :limit, :offset
21+
22+
# Define all filters at class level
23+
register_filter(/\Astatus:open\z/i) do |relation, _, _|
24+
relation.where("topics.closed = false AND topics.archived = false")
25+
end
26+
27+
register_filter(/\Astatus:closed\z/i) do |relation, _, _|
28+
relation.where("topics.closed = true")
29+
end
30+
31+
register_filter(/\Astatus:archived\z/i) do |relation, _, _|
32+
relation.where("topics.archived = true")
33+
end
34+
35+
register_filter(/\Astatus:noreplies\z/i) do |relation, _, _|
36+
relation.where("topics.posts_count = 1")
37+
end
38+
39+
register_filter(/\Astatus:single_user\z/i) do |relation, _, _|
40+
relation.where("topics.participant_count = 1")
41+
end
42+
43+
# Date filters
44+
register_filter(/\Abefore:(.*)\z/i) do |relation, date_str, _|
45+
if date = Filter.word_to_date(date_str)
46+
relation.where("posts.created_at < ?", date)
47+
else
48+
relation
4849
end
50+
end
4951

50-
if after_match = @raw_filter.match(VALID_FILTER_PATTERNS[:after])
51-
components[:date_range][:after] = after_match[1]
52+
register_filter(/\Aafter:(.*)\z/i) do |relation, date_str, _|
53+
if date = Filter.word_to_date(date_str)
54+
relation.where("posts.created_at > ?", date)
55+
else
56+
relation
5257
end
58+
end
5359

54-
# Extract categories
55-
@raw_filter
56-
.scan(VALID_FILTER_PATTERNS[:category])
57-
.each { |match| components[:categories] << match[0] }
60+
# Category filter
61+
register_filter(/\Acategory:([a-zA-Z0-9_\-]+)\z/i) do |relation, slug, _|
62+
category = Category.find_by("LOWER(slug) = LOWER(?)", slug)
63+
if category
64+
category_ids = [category.id]
65+
category_ids +=
66+
Category.subcategory_ids(category.id) if category.subcategory_ids.present?
67+
relation.where("topics.category_id IN (?)", category_ids)
68+
else
69+
relation.where("1 = 0") # No results if category doesn't exist
70+
end
71+
end
5872

59-
# Extract tags
60-
@raw_filter
61-
.scan(VALID_FILTER_PATTERNS[:tag])
62-
.each { |match| components[:tags] << match[0] }
73+
# Tag filter
74+
register_filter(/\Atag:([a-zA-Z0-9_\-]+)\z/i) do |relation, name, _|
75+
tag = Tag.find_by_name(name)
76+
if tag
77+
relation.joins("INNER JOIN topic_tags ON topic_tags.topic_id = topics.id").where(
78+
"topic_tags.tag_id = ?",
79+
tag.id,
80+
)
81+
else
82+
relation.where("1 = 0") # No results if tag doesn't exist
83+
end
84+
end
6385

64-
# Extract groups
65-
@raw_filter
66-
.scan(VALID_FILTER_PATTERNS[:group])
67-
.each { |match| components[:groups] << match[0] }
86+
# User filter
87+
register_filter(/\A\@(\w+)\z/i) do |relation, username, filter|
88+
user = User.find_by(username_lower: username.downcase)
89+
if user
90+
relation.where("posts.user_id = ?", user.id)
91+
else
92+
relation.where("1 = 0") # No results if user doesn't exist
93+
end
94+
end
6895

69-
# Extract status
70-
if status_match = @raw_filter.match(VALID_FILTER_PATTERNS[:status])
71-
components[:status] = status_match[1]
96+
# Posted by current user
97+
register_filter(/\Ain:posted\z/i) do |relation, _, filter|
98+
if filter.guardian.user
99+
relation.where("posts.user_id = ?", filter.guardian.user.id)
100+
else
101+
relation.where("1 = 0") # No results if not logged in
72102
end
103+
end
73104

74-
components
105+
register_filter(/\Agroup:([a-zA-Z0-9_\-]+)\z/i) do |relation, name, filter|
106+
group = Group.find_by("name ILIKE ?", name)
107+
if group
108+
relation.where(
109+
"posts.user_id IN (
110+
SELECT gu.user_id FROM group_users gu
111+
WHERE gu.group_id = ?
112+
)",
113+
group.id,
114+
)
115+
else
116+
relation.where("1 = 0") # No results if group doesn't exist
117+
end
75118
end
76119

77-
def next_batch
78-
previous_offset = @current_offset
79-
@current_offset += @batch_size
80-
previous_offset
120+
def initialize(term, guardian: nil, limit: nil, offset: nil)
121+
@term = term.to_s
122+
@guardian = guardian || Guardian.new
123+
@limit = limit
124+
@offset = offset
125+
@filters = []
126+
@valid = true
127+
128+
@term = process_filters(@term)
81129
end
82130

83-
def reset_batch
84-
@current_offset = 0
131+
def search
132+
filtered = Post.secured(@guardian).joins(:topic).merge(Topic.secured(@guardian))
133+
134+
@filters.each do |filter_block, match_data|
135+
filtered = filter_block.call(filtered, match_data, self)
136+
end
137+
138+
filtered = filtered.limit(@limit) if @limit.to_i > 0
139+
filtered = filtered.offset(@offset) if @offset.to_i > 0
140+
141+
filtered
85142
end
86143

87-
def to_query_params
88-
params = {}
89-
params[:username] = parsed_components[:users].first if parsed_components[:users].any?
90-
params[:before] = parsed_components[:date_range][:before] if parsed_components[
91-
:date_range
92-
][
93-
:before
94-
]
95-
params[:after] = parsed_components[:date_range][:after] if parsed_components[:date_range][
96-
:after
97-
]
98-
params[:category] = parsed_components[:categories].first if parsed_components[
99-
:categories
100-
].any?
101-
params[:tags] = parsed_components[:tags].join(",") if parsed_components[:tags].any?
102-
params[:status] = parsed_components[:status] if parsed_components[:status]
103-
params
144+
private
145+
146+
def process_filters(term)
147+
return "" if term.blank?
148+
149+
term
150+
.to_s
151+
.scan(/(([^" \t\n\x0B\f\r]+)?(("[^"]+")?))/)
152+
.to_a
153+
.map do |(word, _)|
154+
next if word.blank?
155+
156+
# Check for order:xxx syntax
157+
if word =~ /\Aorder:(\w+)\z/i
158+
@order = $1.downcase.to_sym
159+
next nil
160+
end
161+
162+
# Check registered filters
163+
found = false
164+
self.class.registered_filters.each do |matcher, block|
165+
if word =~ matcher
166+
@filters << [block, $1]
167+
found = true
168+
break
169+
end
170+
end
171+
172+
found ? nil : word
173+
end
174+
.compact
175+
.join(" ")
104176
end
105177
end
106178
end

0 commit comments

Comments
 (0)