Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
303c637
FAQ HANDLER ready to be merged
PrithvijitBose Aug 11, 2025
cc93f48
Added pydantic schema
PrithvijitBose Aug 11, 2025
7410fd5
refactor(faq_handler): improve site filter parsing, env handling, and…
PrithvijitBose Aug 11, 2025
59784ec
refactor: use centralized app_settings for org identity and handles
PrithvijitBose Aug 11, 2025
8d71d82
fix: adjust iteration stop condition to allow max iteration count
PrithvijitBose Aug 11, 2025
de63397
made changes on setting.py
PrithvijitBose Aug 13, 2025
34ec22a
fix: truncate long string content to 500 chars with ellipsis
PrithvijitBose Aug 13, 2025
b05f020
feat: add organization identity fields (name, website, github, twitte…
PrithvijitBose Aug 13, 2025
25675a8
refactor(faq): pass search_tool and llm dependencies to FAQTool
PrithvijitBose Oct 13, 2025
295065c
refactor(imports): switch FAQ node import to absolute path
PrithvijitBose Oct 13, 2025
5299f17
Add a new file 'faq_prompt.py'
PrithvijitBose Oct 13, 2025
eb56748
feat(faq): implement dynamic FAQTool with search and LLM integration
PrithvijitBose Oct 13, 2025
bd98139
chore(faq): remove standalone script execution block
PrithvijitBose Oct 13, 2025
44ef616
chore(faq): remove standalone script execution block
PrithvijitBose Oct 13, 2025
70f760f
chore(faq): remove standalone script execution block
PrithvijitBose Oct 13, 2025
d16769f
refactor: remove search_tool dependency from FAQTool initialization
PrithvijitBose Oct 13, 2025
ef640d2
refactor(faq): integrate TavilySearchTool and add domain filtering
PrithvijitBose Oct 13, 2025
345a68a
Merge branch 'main' into FAQ-HANDLER
PrithvijitBose Oct 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions backend/app/agents/devrel/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self, config: Dict[str, Any] = None):
google_api_key=settings.gemini_api_key
)
self.search_tool = DuckDuckGoSearchTool()
self.faq_tool = FAQTool()
self.faq_tool = FAQTool(search_tool=self.search_tool, llm=self.llm)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

πŸ› οΈ Refactor suggestion | 🟠 Major

🧩 Analysis chain

Remove unused FAQTool instance.

The self.faq_tool instance is created but never used. The workflow now calls handle_faq_node directly via the tool wrapper, bypassing the FAQTool class. This line (and potentially the import on line 9) can be removed.

Verify that FAQTool is not used elsewhere in the codebase:

If no other usage is found, apply this diff:

-from .tools.faq_tool import FAQTool
-        self.faq_tool = FAQTool(search_tool=self.search_tool, llm=self.llm)

🏁 Script executed:

#!/bin/bash
# Search for usage of self.faq_tool in the DevRel agent and related files
rg -n 'self\.faq_tool' --type py -g '!__pycache__' -g '!*.pyc'

Length of output: 113


🏁 Script executed:

#!/bin/bash
# Search for any usage of FAQTool instantiation or calls elsewhere
rg -n 'FAQTool\(' --type py -g '!__pycache__' -g '!*.pyc'

Length of output: 113


Remove unused FAQTool import and instantiation

-from .tools.faq_tool import FAQTool
…
-        self.faq_tool = FAQTool(search_tool=self.search_tool, llm=self.llm)
πŸ“ Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
self.faq_tool = FAQTool(search_tool=self.search_tool, llm=self.llm)
--- a/backend/app/agents/devrel/agent.py
@@
-from .tools.faq_tool import FAQTool
@@ class DevrelAgent:
def __init__(self, config, llm, search_tool):
# existing initialization...
- self.faq_tool = FAQTool(search_tool=self.search_tool, llm=self.llm)
πŸ€– Prompt for AI Agents
In backend/app/agents/devrel/agent.py around line 31, the FAQTool is imported
and instantiated but not used; remove the unused import and delete the
instantiation (self.faq_tool = FAQTool(...)) to avoid dead code, and run
tests/linting to ensure no other references remainβ€”if other code expects
faq_tool, either wire it up properly or remove those references as well.

self.github_toolkit = GitHubToolkit()
self.checkpointer = InMemorySaver()
super().__init__("DevRelAgent", self.config)
Expand All @@ -43,7 +43,12 @@ def _build_graph(self):
# Phase 2: ReAct Supervisor - Decide what to do next
workflow.add_node("react_supervisor", partial(react_supervisor_node, llm=self.llm))
workflow.add_node("web_search_tool", partial(web_search_tool_node, search_tool=self.search_tool, llm=self.llm))
workflow.add_node("faq_handler_tool", partial(faq_handler_tool_node, faq_tool=self.faq_tool))

workflow.add_node("faq_handler_tool", partial(
faq_handler_tool_node, search_tool=self.search_tool, llm=self.llm))

workflow.add_node("faq_handler_tool", partial(faq_handler_tool_node, search_tool=self.search_tool, llm=self.llm))

workflow.add_node("onboarding_tool", onboarding_tool_node)
workflow.add_node("github_toolkit_tool", partial(github_toolkit_tool_node, github_toolkit=self.github_toolkit))

Expand Down
241 changes: 235 additions & 6 deletions backend/app/agents/devrel/nodes/handlers/faq.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,255 @@
import logging
from typing import List, Dict, Any
from langchain_core.messages import HumanMessage
from app.agents.state import AgentState
from app.core.config.settings import settings as app_settings


# Configure logger for this module
logger = logging.getLogger(__name__)

async def handle_faq_node(state: AgentState, faq_tool) -> dict:
"""Handle FAQ requests"""
logger.info(f"Handling FAQ for session {state.session_id}")

# Organization identity and official handles from centralized settings
ORG_NAME = app_settings.org_name
OFFICIAL_HANDLES = [app_settings.org_website, app_settings.org_github, app_settings.org_twitter]


async def handle_faq_node(state: AgentState, search_tool: Any, llm: Any) -> dict:
"""
Handle FAQ requests dynamically using web search and AI synthesis.
Pass official handles to search tool if it supports site-restricted queries.
"""
logger.info(f"[FAQ_HANDLER] Handling dynamic FAQ for session {state.session_id}")

latest_message = ""
if state.messages:
latest_message = state.messages[-1].get("content", "")
elif state.context.get("original_message"):
latest_message = state.context["original_message"]

# faq_tool will be passed from the agent, similar to llm for classify_intent
faq_response = await faq_tool.get_response(latest_message)
# Early exit if no message
if not latest_message:
logger.warning("[FAQ_HANDLER] Empty latest user message; returning fallback")
return {
"task_result": {
"type": "faq",
"response": _generate_fallback_response(latest_message, ORG_NAME),
"source": "dynamic_web_search"
},
"current_task": "faq_handled"
}
Comment on lines +31 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if this section is really required...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

faq.py is not required now..i have merged its functions with faq_tool.py..Will remove the commit


# Append site restrictions to the query if search tool supports it
try:
from urllib.parse import urlparse
domains = []
for u in OFFICIAL_HANDLES:
try:
parsed = urlparse(u)
domain = parsed.netloc or parsed.path # handles bare domains
if domain:
domains.append(domain)
except Exception:
continue
site_filters = " OR ".join([f"site:{d}" for d in domains])
except Exception:
site_filters = ""
logger.info(f"[FAQ_HANDLER] Applying site filters for search: {site_filters or '(none)'}")

faq_response = await _dynamic_faq_process(
latest_message,
search_tool,
llm,
org_name=ORG_NAME,
site_filters=site_filters,
)

return {
"task_result": {
"type": "faq",
"response": faq_response,
"source": "faq_database"
"source": "dynamic_web_search"
},
"current_task": "faq_handled"
}


async def _dynamic_faq_process(
message: str,
search_tool: Any,
llm: Any,
org_name: str = ORG_NAME,
site_filters: str = "",
) -> str:
"""
Dynamic FAQ handler implementing:
1. Intent Detection & Query Refinement
2. Web Search (with site restrictions)
3. AI-Powered Synthesis
4. Generate Final Response
5. Format with Sources
"""
try:
# Step 1: Intent Detection & Query Refinement
logger.info(f"[FAQ_HANDLER] Step 1: Refining FAQ query for org '{org_name}'")
refined_query = await _refine_faq_query(message, llm, org_name)

# Append site filters for restricting to official handles
if site_filters:
refined_query = f"({refined_query}) AND ({site_filters})"
logger.info(f"[FAQ_HANDLER] Refined and filtered query: {refined_query}")

Comment on lines +94 to +101
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Reduce user-content logging to DEBUG and truncate to lower PII exposure and noise

Refined queries and synthesized responses may contain user-provided content. Logging them at INFO can leak PII and increase noise.

Apply:

-        logger.info(f"[FAQ_HANDLER] Step 1: Refining FAQ query for org '{org_name}'")
+        logger.debug(f"[FAQ_HANDLER] Step 1: Refining FAQ query for org '{org_name}'")
@@
-        logger.info(f"[FAQ_HANDLER] Refined and filtered query: {refined_query}")
+        logger.debug(f"[FAQ_HANDLER] Refined and filtered query: {refined_query[:200]}{'...' if len(refined_query) > 200 else ''}")
@@
-    logger.info(f"[FAQ_HANDLER] Synthesized FAQ response: {synthesized_answer[:100]}...")
+    logger.debug(f"[FAQ_HANDLER] Synthesized FAQ response: {synthesized_answer[:100]}...")

If you need INFO-level observability, consider redacting with a sanitizer (emails, phone numbers) before logging.

Also applies to: 196-196

# Step 2: Dynamic Web Search
logger.info(f"[FAQ_HANDLER] Step 2: Searching for: {refined_query}")
try:
search_results = await search_tool.search(refined_query)
except Exception as search_err:
logger.error(f"[FAQ_HANDLER] Search tool error: {search_err}")
return _generate_fallback_response(message, org_name)
Comment on lines +103 to +108
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

πŸ› οΈ Refactor suggestion

Add timeouts around external search calls to prevent hangs

Wrap the search call with asyncio.wait_for using configurable timeouts; otherwise a network stall can block the node.

Apply:

-        try:
-            search_results = await search_tool.search(refined_query)
+        try:
+            search_results = await asyncio.wait_for(
+                search_tool.search(refined_query),
+                timeout=FAQ_SEARCH_TIMEOUT
+            )

Add these near the top of the module (outside the selected range):

import asyncio  # at the imports

# Timeouts with safe defaults; override via settings if available
FAQ_SEARCH_TIMEOUT = getattr(app_settings, "faq_search_timeout", 10.0)
FAQ_LLM_TIMEOUT = getattr(app_settings, "faq_llm_timeout", 15.0)
πŸ€– Prompt for AI Agents
In backend/app/agents/devrel/nodes/handlers/faq.py around lines 103 to 108, the
external search call can hang; add an asyncio timeout: import asyncio at the top
and define FAQ_SEARCH_TIMEOUT = getattr(app_settings, "faq_search_timeout",
10.0) (and FAQ_LLM_TIMEOUT similarly if present), then replace the direct await
search_tool.search(refined_query) with await
asyncio.wait_for(search_tool.search(refined_query), FAQ_SEARCH_TIMEOUT); catch
asyncio.TimeoutError (and still catch general Exception) and log a
timeout-specific error before returning _generate_fallback_response(message,
org_name).


if not search_results:
logger.warning(f"[FAQ_HANDLER] No results found for query: {refined_query}")
return _generate_fallback_response(message, org_name)

# Step 3 & 4: AI-Powered Synthesis & Response Generation
logger.info("[FAQ_HANDLER] Step 3-4: Synthesizing search results into FAQ response")
synthesized_response = await _synthesize_faq_response(message, search_results, llm, org_name)

# Step 5: Format Final Response with Sources
logger.info("[FAQ_HANDLER] Step 5: Formatting final response with sources")
final_response = _format_faq_response(synthesized_response, search_results)

return final_response

except Exception as e:
logger.error(f"[FAQ_HANDLER] Error in dynamic FAQ process: {e}")
return _generate_fallback_response(message, org_name)


async def _refine_faq_query(message: str, llm: Any, org_name: str) -> str:
"""
Step 1: Refine user query for organization-specific FAQ search.
"""
refinement_prompt = f"""
You are helping someone find information about {org_name}.
Transform their question into an effective search query that will find official information about the organization.
User Question: "{message}"
Create a search query that focuses on:
- Official {org_name} information
- The organization's website, blog, or documentation
- Adding terms like "about", "mission", "projects" if relevant
Return only the refined search query, nothing else.
Examples:
- "What does this org do?" β†’ "{org_name} about mission what we do"
- "How do you work?" β†’ "{org_name} how it works process methodology"
- "What projects do you have?" β†’ "{org_name} projects portfolio what we build"
"""
response = await llm.ainvoke([HumanMessage(content=refinement_prompt)])
refined_query = response.content.strip()
logger.info(f"[FAQ_HANDLER] Refined query: {refined_query}")
return refined_query


async def _synthesize_faq_response(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really required? We already have a response synthesizer prompt

message: str,
search_results: List[Dict[str, Any]],
llm: Any,
org_name: str
) -> str:
"""
Step 3-4: Use LLM to synthesize search results into a comprehensive FAQ answer.
"""
results_context = ""
for i, result in enumerate(search_results[:5]): # Limit to top 5 results
title = result.get('title', 'N/A')
content = result.get('content', 'N/A')
if isinstance(content, str) and len(content) > 500:
content = content[:500] + "..."
url = result.get('url', 'N/A')
results_context += f"\nResult {i+1}:\nTitle: {title}\nContent: {content}\nURL: {url}\n"

synthesis_prompt = f"""
You are an AI assistant representing {org_name}. A user asked: "{message}"
Based on the following search results from official sources, provide a comprehensive, helpful answer about {org_name}.
Search Results:
{results_context}
Instructions:
1. Answer the user's question directly and conversationally
2. Focus on the most relevant and recent information
3. Be informative but concise (2-3 paragraphs max)
4. If the search results don't fully answer the question, acknowledge what you found
5. Sound helpful and knowledgeable about {org_name}
6. Don't mention "search results" in your response - speak as if you know about the organization
Your response:
"""

response = await llm.ainvoke([HumanMessage(content=synthesis_prompt)])
synthesized_answer = response.content.strip()
Comment on lines +194 to +195
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

πŸ› οΈ Refactor suggestion

Apply timeout to LLM synthesis calls

Similar to search operations, LLM calls should have timeouts to prevent hanging.

-    response = await llm.ainvoke([HumanMessage(content=synthesis_prompt)])
+    response = await asyncio.wait_for(
+        llm.ainvoke([HumanMessage(content=synthesis_prompt)]),
+        timeout=FAQ_LLM_TIMEOUT
+    )

Add exception handling:

except asyncio.TimeoutError:
    logger.error(f"[FAQ_HANDLER] LLM timeout after {FAQ_LLM_TIMEOUT}s")
    return _generate_fallback_response(message, org_name)
πŸ€– Prompt for AI Agents
In backend/app/agents/devrel/nodes/handlers/faq.py around lines 194-195, wrap
the LLM synthesis call in asyncio.wait_for using the FAQ_LLM_TIMEOUT constant
and add exception handling: put the await llm.ainvoke(...) inside a try block
with response = await asyncio.wait_for(llm.ainvoke(...), FAQ_LLM_TIMEOUT), catch
asyncio.TimeoutError, log the timeout with logger.error(f"[FAQ_HANDLER] LLM
timeout after {FAQ_LLM_TIMEOUT}s") and return
_generate_fallback_response(message, org_name); ensure asyncio is imported if
not already.

logger.info(f"[FAQ_HANDLER] Synthesized FAQ response: {synthesized_answer[:100]}...")
return synthesized_answer


def _format_faq_response(synthesized_answer: str, search_results: List[Dict[str, Any]]) -> str:
"""
Step 5: Format the final response with sources.
"""
formatted_response = synthesized_answer

if search_results:
formatted_response += "\n\n**πŸ“š Sources:**"
for i, result in enumerate(search_results[:3]): # Show top 3 sources
title = result.get('title', 'Source')
url = result.get('url', '#')
formatted_response += f"\n{i+1}. [{title}]({url})"

return formatted_response


def _generate_fallback_response(message: str, org_name: str) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not required. In case of anomaly where there won't be any results the bot itself manages that scenario and responds well.

"""
Generate a helpful fallback when search fails.
"""
return (
f"I'd be happy to help you learn about {org_name}, but I couldn't find current information to answer your question: \"{message}\"\n\n"
"This might be because:\n"
"- The information isn't publicly available yet\n"
"- The search terms need to be more specific\n"
"- There might be connectivity issues\n\n"
"Try asking a more specific question, or check out our official website and documentation for the most up-to-date information about "
f"{org_name}."
)


# Example usage for testing
if __name__ == "__main__":
import asyncio
from unittest.mock import AsyncMock

class MockState:
session_id = "test_session"
messages = [{"content": "What projects does your organization have?"}]
context = {}

async def test_faq_handler():
mock_state = MockState()
mock_search_tool = AsyncMock()
mock_search_tool.search.return_value = [
{"title": "Project A", "content": "Details about Project A.", "url": "https://aossie.org/projects/a"},
{"title": "Project B", "content": "Details about Project B.", "url": "https://aossie.org/projects/b"},
]
mock_llm = AsyncMock()
mock_llm.ainvoke.return_value = AsyncMock(content="We have Project A and Project B focusing on AI and Web.")

response = await handle_faq_node(mock_state, mock_search_tool, mock_llm)
print("FAQ Handler response:")
print(response)

asyncio.run(test_faq_handler())
Loading