Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/locales/client.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ en:
name: "Search"
description: "Enhances search experience by providing AI-generated answers to queries"
discoveries: "Discoveries"
embeddings:
name: "Embeddings"
description: "Powers features like Related Topics and AI Search by generating semantic representations of text"
hyde: "HyDE"
discord:
name: "Discord integration"
description: "Adds the ability to search Discord channels"
Expand Down
3 changes: 3 additions & 0 deletions config/locales/server.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ en:
spam_detector:
name: "Spam detector"
description: "Default persona powering our Spam detection feature"
content_creator:
name: "Content creator"
description: "Default persona powering HyDE search"

topic_not_found: "Summary unavailable, topic not found!"
summarizing: "Summarizing topic"
Expand Down
23 changes: 20 additions & 3 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,34 +222,45 @@ discourse_ai:
default: false
client: true
validator: "DiscourseAi::Configuration::EmbeddingsModuleValidator"
area: "ai-features/embeddings"
ai_embeddings_selected_model:
type: enum
default: ""
allow_any: false
enum: "DiscourseAi::Configuration::EmbeddingDefsEnumerator"
validator: "DiscourseAi::Configuration::EmbeddingDefsValidator"
area: "ai-features/embeddings"
ai_embeddings_per_post_enabled:
default: false
hidden: true
ai_embeddings_generate_for_pms: false
ai_embeddings_generate_for_pms:
default: false
area: "ai-features/embeddings"
ai_embeddings_semantic_related_topics_enabled:
default: false
client: true
ai_embeddings_semantic_related_topics: 5
ai_embeddings_semantic_related_include_closed_topics: true
area: "ai-features/embeddings"
ai_embeddings_semantic_related_topics:
default: 5
area: "ai-features/embeddings"
ai_embeddings_semantic_related_include_closed_topics:
default: true
area: "ai-features/embeddings"
ai_embeddings_backfill_batch_size:
default: 250
hidden: true
ai_embeddings_semantic_search_enabled:
default: false
client: true
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
area: "ai-features/embeddings"
ai_embeddings_semantic_search_hyde_model:
default: ""
type: enum
allow_any: false
enum: "DiscourseAi::Configuration::LlmEnumerator"
validator: "DiscourseAi::Configuration::LlmValidator"
area: "ai-features/embeddings"
ai_embeddings_semantic_search_hyde_model_allowed_seeded_models:
default: ""
hidden: true
Expand All @@ -259,6 +270,12 @@ discourse_ai:
default: false
client: true
hidden: true
area: "ai-features/embeddings"
ai_embeddings_semantic_search_hyde_persona:
default: "-32"
type: enum
enum: "DiscourseAi::Configuration::PersonaEnumerator"
area: "ai-features/embeddings"

ai_embeddings_discourse_service_api_endpoint:
default: ""
Expand Down
2 changes: 2 additions & 0 deletions db/fixtures/personas/603_ai_personas.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def from_setting(setting_name)
setting_name = "ai_helper_custom_prompts_allowed_groups"
default_groups = [Group::AUTO_GROUPS[:staff]]
persona.allowed_group_ids = from_setting(setting_name) || default_groups
elsif persona_class == DiscourseAi::Personas::ContentCreator
persona.allowed_group_ids = [Group::AUTO_GROUPS[:everyone]]
else
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
end
Expand Down
14 changes: 14 additions & 0 deletions lib/configuration/feature.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,17 @@ def spam_features
]
end

def embeddings_features
feature_cache[:embeddings] ||= [
new(
"hyde",
"ai_embeddings_semantic_search_hyde_persona",
DiscourseAi::Configuration::Module::EMBEDDINGS_ID,
DiscourseAi::Configuration::Module::EMBEDDINGS,
),
]
end

def lookup_bot_persona_ids
AiPersona
.where(enabled: true)
Expand Down Expand Up @@ -196,6 +207,7 @@ def all
translation_features,
bot_features,
spam_features,
embeddings_features,
].flatten
end

Expand Down Expand Up @@ -241,6 +253,8 @@ def llm_models
DiscourseAi::AiHelper::Assistant.find_ai_helper_model(name, persona_klass)
when DiscourseAi::Configuration::Module::TRANSLATION
DiscourseAi::Translation::BaseTranslator.preferred_llm_model(persona_klass)
when DiscourseAi::Configuration::Module::EMBEDDINGS
DiscourseAi::Embeddings::SemanticSearch.new(nil).find_ai_hyde_model(persona_klass)
end

if llm_model.blank? && persona.default_llm_id
Expand Down
21 changes: 20 additions & 1 deletion lib/configuration/module.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,19 @@ class Module
TRANSLATION = "translation"
BOT = "bot"
SPAM = "spam"
EMBEDDINGS = "embeddings"

NAMES = [SUMMARIZATION, SEARCH, DISCORD, INFERENCE, AI_HELPER, TRANSLATION, BOT, SPAM].freeze
NAMES = [
SUMMARIZATION,
SEARCH,
DISCORD,
INFERENCE,
AI_HELPER,
TRANSLATION,
BOT,
SPAM,
EMBEDDINGS,
].freeze

SUMMARIZATION_ID = 1
SEARCH_ID = 2
Expand All @@ -22,6 +33,7 @@ class Module
TRANSLATION_ID = 6
BOT_ID = 7
SPAM_ID = 8
EMBEDDINGS_ID = 9

class << self
def all
Expand Down Expand Up @@ -75,6 +87,13 @@ def all
enabled_by_setting: "ai_spam_detection_enabled",
features: DiscourseAi::Configuration::Feature.spam_features,
),
new(
EMBEDDINGS_ID,
EMBEDDINGS,
enabled_by_setting: "ai_embeddings_enabled",
features: DiscourseAi::Configuration::Feature.embeddings_features,
extra_check: -> { SiteSetting.ai_embeddings_semantic_search_enabled },
),
]
end

Expand Down
71 changes: 55 additions & 16 deletions lib/embeddings/semantic_search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ def search_for_topics(query, page = 1, hyde: true)
return Post.none
end

search_embedding = hyde ? hyde_embedding(search_term) : embedding(search_term)
search_embedding = nil
search_embedding = hyde_embedding(search_term) if hyde
search_embedding = embedding(search_term) if search_embedding.blank?

over_selection_limit = limit * OVER_SELECTION_FACTOR

Expand Down Expand Up @@ -176,26 +178,51 @@ def quick_search(query)
end

def hypothetical_post_from(search_term)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a content creator for a forum. The forum description is as follows:
#{SiteSetting.title}
#{SiteSetting.site_description}
context =
DiscourseAi::Personas::BotContext.new(
user: @guardian.user,
skip_tool_details: true,
feature_name: "semantic_search_hyde",
messages: [{ type: :user, content: <<~TEXT.strip }],
Using this description, write a forum post about the subject inside the <input></input> XML tags:

<input>#{search_term}</input>
TEXT
)

Put the forum post between <ai></ai> tags.
TEXT
bot = build_bot(@guardian.user)
return nil if bot.nil?

prompt.push(type: :user, content: <<~TEXT.strip)
Using this description, write a forum post about the subject inside the <input></input> XML tags:
structured_output = nil
raw_response = +""
hyde_schema_key = bot.persona.response_format&.first.to_h

<input>#{search_term}</input>
TEXT
buffer_blk =
Proc.new do |partial, _, type|
if type == :structured_output
structured_output = partial
elsif type.blank?
# Assume response is a regular completion.
raw_response << partial
end
end

llm_response =
DiscourseAi::Completions::Llm.proxy(
SiteSetting.ai_embeddings_semantic_search_hyde_model,
).generate(prompt, user: @guardian.user, feature_name: "semantic_search_hyde")
bot.reply(context, &buffer_blk)

structured_output&.read_buffered_property(hyde_schema_key["key"]&.to_sym) || raw_response
end

# Priorities are:
# 1. Persona's default LLM
# 2. `ai_embeddings_semantic_search_hyde_model` setting.
def find_ai_hyde_model(persona_klass)
model_id =
persona_klass.default_llm_id ||
SiteSetting.ai_embeddings_semantic_search_hyde_model&.split(":")&.last

Nokogiri::HTML5.fragment(llm_response).at("ai")&.text.presence || llm_response
return if model_id.blank?

LlmModel.find_by(id: model_id)
end

private
Expand All @@ -209,6 +236,18 @@ def build_hyde_key(digest, hyde_model)
def build_embedding_key(digest, hyde_model, embedding_model)
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
end

def build_bot(user)
persona_id = SiteSetting.ai_embeddings_semantic_search_hyde_persona

persona_klass = AiPersona.find_by(id: persona_id)&.class_instance
return if persona_klass.nil?

llm_model = find_ai_hyde_model(persona_klass)
return if llm_model.nil?

DiscourseAi::Personas::Bot.as(user, persona: persona_klass.new, model: llm_model)
end
end
end
end
2 changes: 1 addition & 1 deletion lib/personas/bot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def reply(context, llm_args: {}, &update_blk)
text = +""
result.each { |item| text << item if item.is_a?(String) }
end
raw_context << [text, bot_user.username]
raw_context << [text, bot_user&.username]
end

total_completions += 1
Expand Down
31 changes: 31 additions & 0 deletions lib/personas/content_creator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# frozen_string_literal: true

module DiscourseAi
module Personas
class ContentCreator < Persona
def self.default_enabled
false
end

def system_prompt
<<~PROMPT.strip
You are a content creator for a forum. The forum title and description is as follows:
* Ttitle: {site_title}
* Description: {site_description}

Format your response as a JSON object with a single key named "output", which has the created content.
Your output should be in the following format:
<output>
{"output": "xx"}
</output>

Where "xx" is replaced by the content.
PROMPT
end

def response_format
[{ "key" => "output", "type" => "string" }]
end
end
end
end
1 change: 1 addition & 0 deletions lib/personas/persona.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def system_personas
TopicTitleTranslator => -29,
ShortTextTranslator => -30,
SpamDetector => -31,
ContentCreator => -32,
}
end

Expand Down
8 changes: 4 additions & 4 deletions spec/lib/modules/embeddings/semantic_search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def insert_candidate(candidate)
end

def trigger_search(query)
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{hypothetical_post}</ai>"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([hypothetical_post]) do
subject.search_for_topics(query)
end
end
Expand Down Expand Up @@ -123,9 +123,9 @@ def trigger_search(query)
context "while searching as anon" do
it "returns an empty list" do
posts =
DiscourseAi::Completions::Llm.with_prepared_responses(
["<ai>#{hypothetical_post}</ai>"],
) { described_class.new(Guardian.new(nil)).search_for_topics(query) }
DiscourseAi::Completions::Llm.with_prepared_responses([hypothetical_post]) do
described_class.new(Guardian.new(nil)).search_for_topics(query)
end

expect(posts).to be_empty
end
Expand Down
4 changes: 2 additions & 2 deletions spec/lib/personas/tools/search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
DiscourseAi::Embeddings::Schema.for(Topic).store(post1.topic, hyde_embedding, "digest")

results =
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
search.invoke(&progress_blk)
end

Expand All @@ -144,7 +144,7 @@

# results will be expanded by semantic search, but it will find nothing
results =
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
search.invoke(&progress_blk)
end

Expand Down
2 changes: 1 addition & 1 deletion spec/lib/utils/search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@

# Using a completely different search query, should still find via semantic search
results =
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
described_class.perform_search(
search_query: "totally different query",
current_user: admin,
Expand Down
2 changes: 1 addition & 1 deletion spec/requests/admin/ai_features_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
get "/admin/plugins/discourse-ai/ai-features.json"

expect(response.status).to eq(200)
expect(response.parsed_body["ai_features"].count).to eq(8)
expect(response.parsed_body["ai_features"].count).to eq(9)
end
end

Expand Down
2 changes: 1 addition & 1 deletion spec/system/admin_ai_features_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
ai_features_page.toggle_unconfigured

# this changes as we add more AI features
expect(ai_features_page).to have_listed_modules(7)
expect(ai_features_page).to have_listed_modules(8)
end

it "lists the persona used for the corresponding AI feature" do
Expand Down
Loading